Annotation of src/sys/kern/sys_aio.c, Revision 1.31
1.31 ! rmind 1: /* $NetBSD: sys_aio.c,v 1.30 2009/11/22 19:09:16 mbalmer Exp $ */
1.1 rmind 2:
3: /*
1.31 ! rmind 4: * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org>
1.10 rmind 5: * All rights reserved.
1.1 rmind 6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
1.19 rmind 16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26: * SUCH DAMAGE.
1.1 rmind 27: */
28:
29: /*
1.19 rmind 30: * Implementation of POSIX asynchronous I/O.
31: * Defined in the Base Definitions volume of IEEE Std 1003.1-2001.
1.1 rmind 32: */
33:
34: #include <sys/cdefs.h>
1.31 ! rmind 35: __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.30 2009/11/22 19:09:16 mbalmer Exp $");
1.4 rmind 36:
1.20 ad 37: #ifdef _KERNEL_OPT
1.4 rmind 38: #include "opt_ddb.h"
1.20 ad 39: #endif
1.1 rmind 40:
41: #include <sys/param.h>
42: #include <sys/condvar.h>
43: #include <sys/file.h>
44: #include <sys/filedesc.h>
45: #include <sys/kernel.h>
46: #include <sys/kmem.h>
47: #include <sys/lwp.h>
48: #include <sys/mutex.h>
49: #include <sys/pool.h>
50: #include <sys/proc.h>
51: #include <sys/queue.h>
52: #include <sys/signal.h>
53: #include <sys/signalvar.h>
1.20 ad 54: #include <sys/syscall.h>
1.1 rmind 55: #include <sys/syscallargs.h>
1.20 ad 56: #include <sys/syscallvar.h>
1.1 rmind 57: #include <sys/sysctl.h>
58: #include <sys/systm.h>
59: #include <sys/types.h>
60: #include <sys/vnode.h>
1.11 ad 61: #include <sys/atomic.h>
1.20 ad 62: #include <sys/module.h>
1.21 pooka 63: #include <sys/buf.h>
1.1 rmind 64:
65: #include <uvm/uvm_extern.h>
66:
1.20 ad 67: MODULE(MODULE_CLASS_MISC, aio, NULL);
68:
1.1 rmind 69: /*
70: * System-wide limits and counter of AIO operations.
71: */
1.31 ! rmind 72: u_int aio_listio_max = AIO_LISTIO_MAX;
! 73: static u_int aio_max = AIO_MAX;
! 74: static u_int aio_jobs_count;
! 75:
! 76: static struct pool aio_job_pool;
! 77: static struct pool aio_lio_pool;
! 78: static void * aio_ehook;
! 79:
! 80: static void aio_worker(void *);
! 81: static void aio_process(struct aio_job *);
! 82: static void aio_sendsig(struct proc *, struct sigevent *);
! 83: static int aio_enqueue_job(int, void *, struct lio_req *);
! 84: static void aio_exit(proc_t *, void *);
1.20 ad 85:
86: static const struct syscall_package aio_syscalls[] = {
87: { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel },
88: { SYS_aio_error, 0, (sy_call_t *)sys_aio_error },
89: { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync },
90: { SYS_aio_read, 0, (sy_call_t *)sys_aio_read },
91: { SYS_aio_return, 0, (sy_call_t *)sys_aio_return },
1.22 christos 92: { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 },
1.20 ad 93: { SYS_aio_write, 0, (sy_call_t *)sys_aio_write },
94: { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio },
95: { 0, 0, NULL },
96: };
1.1 rmind 97:
98: /*
1.20 ad 99: * Tear down all AIO state.
1.4 rmind 100: */
1.20 ad 101: static int
102: aio_fini(bool interface)
103: {
104: int error;
105: proc_t *p;
106:
107: if (interface) {
108: /* Stop syscall activity. */
109: error = syscall_disestablish(NULL, aio_syscalls);
110: if (error != 0)
111: return error;
112: /* Abort if any processes are using AIO. */
113: mutex_enter(proc_lock);
114: PROCLIST_FOREACH(p, &allproc) {
115: if (p->p_aio != NULL)
116: break;
117: }
118: mutex_exit(proc_lock);
119: if (p != NULL) {
120: error = syscall_establish(NULL, aio_syscalls);
121: KASSERT(error == 0);
122: return EBUSY;
123: }
124: }
125: KASSERT(aio_jobs_count == 0);
126: exithook_disestablish(aio_ehook);
127: pool_destroy(&aio_job_pool);
128: pool_destroy(&aio_lio_pool);
129: return 0;
130: }
131:
132: /*
133: * Initialize global AIO state.
134: */
135: static int
136: aio_init(void)
1.4 rmind 137: {
1.20 ad 138: int error;
1.4 rmind 139:
140: pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0,
141: "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE);
142: pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0,
143: "aio_lio_pool", &pool_allocator_nointr, IPL_NONE);
1.20 ad 144: aio_ehook = exithook_establish(aio_exit, NULL);
145: error = syscall_establish(NULL, aio_syscalls);
146: if (error != 0)
147: aio_fini(false);
148: return error;
149: }
150:
151: /*
152: * Module interface.
153: */
154: static int
155: aio_modcmd(modcmd_t cmd, void *arg)
156: {
157:
158: switch (cmd) {
159: case MODULE_CMD_INIT:
160: return aio_init();
161: case MODULE_CMD_FINI:
162: return aio_fini(true);
163: default:
164: return ENOTTY;
165: }
1.4 rmind 166: }
167:
168: /*
1.1 rmind 169: * Initialize Asynchronous I/O data structures for the process.
170: */
1.20 ad 171: static int
172: aio_procinit(struct proc *p)
1.1 rmind 173: {
174: struct aioproc *aio;
175: struct lwp *l;
1.8 ad 176: int error;
1.1 rmind 177: vaddr_t uaddr;
178:
179: /* Allocate and initialize AIO structure */
1.15 ad 180: aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP);
1.1 rmind 181: if (aio == NULL)
182: return EAGAIN;
183:
1.4 rmind 184: /* Initialize queue and their synchronization structures */
1.1 rmind 185: mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE);
186: cv_init(&aio->aio_worker_cv, "aiowork");
187: cv_init(&aio->done_cv, "aiodone");
188: TAILQ_INIT(&aio->jobs_queue);
189:
190: /*
191: * Create an AIO worker thread.
192: * XXX: Currently, AIO thread is not protected against user's actions.
193: */
1.29 rmind 194: uaddr = uvm_uarea_alloc();
1.1 rmind 195: if (uaddr == 0) {
1.5 rmind 196: aio_exit(p, aio);
1.1 rmind 197: return EAGAIN;
198: }
1.29 rmind 199: error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker,
1.8 ad 200: NULL, &l, curlwp->l_class);
201: if (error != 0) {
1.29 rmind 202: uvm_uarea_free(uaddr);
1.5 rmind 203: aio_exit(p, aio);
1.8 ad 204: return error;
1.1 rmind 205: }
206:
1.5 rmind 207: /* Recheck if we are really first */
1.18 ad 208: mutex_enter(p->p_lock);
1.5 rmind 209: if (p->p_aio) {
1.18 ad 210: mutex_exit(p->p_lock);
1.5 rmind 211: aio_exit(p, aio);
212: lwp_exit(l);
213: return 0;
214: }
215: p->p_aio = aio;
216:
1.1 rmind 217: /* Complete the initialization of thread, and run it */
218: aio->aio_worker = l;
219: p->p_nrlwps++;
220: lwp_lock(l);
221: l->l_stat = LSRUN;
1.12 rmind 222: l->l_priority = MAXPRI_USER;
1.2 yamt 223: sched_enqueue(l, false);
1.1 rmind 224: lwp_unlock(l);
1.18 ad 225: mutex_exit(p->p_lock);
1.1 rmind 226:
227: return 0;
228: }
229:
230: /*
231: * Exit of Asynchronous I/O subsystem of process.
232: */
1.20 ad 233: static void
234: aio_exit(struct proc *p, void *cookie)
1.1 rmind 235: {
236: struct aio_job *a_job;
1.20 ad 237: struct aioproc *aio;
1.1 rmind 238:
1.20 ad 239: if (cookie != NULL)
240: aio = cookie;
241: else if ((aio = p->p_aio) == NULL)
1.1 rmind 242: return;
243:
244: /* Free AIO queue */
245: while (!TAILQ_EMPTY(&aio->jobs_queue)) {
246: a_job = TAILQ_FIRST(&aio->jobs_queue);
247: TAILQ_REMOVE(&aio->jobs_queue, a_job, list);
1.4 rmind 248: pool_put(&aio_job_pool, a_job);
1.11 ad 249: atomic_dec_uint(&aio_jobs_count);
1.1 rmind 250: }
251:
252: /* Destroy and free the entire AIO data structure */
253: cv_destroy(&aio->aio_worker_cv);
254: cv_destroy(&aio->done_cv);
255: mutex_destroy(&aio->aio_mtx);
256: kmem_free(aio, sizeof(struct aioproc));
257: }
258:
259: /*
260: * AIO worker thread and processor.
261: */
1.26 yamt 262: static void
1.1 rmind 263: aio_worker(void *arg)
264: {
265: struct proc *p = curlwp->l_proc;
266: struct aioproc *aio = p->p_aio;
267: struct aio_job *a_job;
268: struct lio_req *lio;
269: sigset_t oss, nss;
1.4 rmind 270: int error, refcnt;
1.1 rmind 271:
272: /*
273: * Make an empty signal mask, so it
274: * handles only SIGKILL and SIGSTOP.
275: */
276: sigfillset(&nss);
1.18 ad 277: mutex_enter(p->p_lock);
1.1 rmind 278: error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss);
1.18 ad 279: mutex_exit(p->p_lock);
1.1 rmind 280: KASSERT(error == 0);
281:
282: for (;;) {
283: /*
284: * Loop for each job in the queue. If there
1.4 rmind 285: * are no jobs then sleep.
1.1 rmind 286: */
287: mutex_enter(&aio->aio_mtx);
288: while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) {
289: if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) {
290: /*
1.4 rmind 291: * Thread was interrupted - check for
292: * pending exit or suspend.
1.1 rmind 293: */
1.4 rmind 294: mutex_exit(&aio->aio_mtx);
295: lwp_userret(curlwp);
296: mutex_enter(&aio->aio_mtx);
1.1 rmind 297: }
298: }
299:
300: /* Take the job from the queue */
301: aio->curjob = a_job;
302: TAILQ_REMOVE(&aio->jobs_queue, a_job, list);
303:
1.11 ad 304: atomic_dec_uint(&aio_jobs_count);
1.1 rmind 305: aio->jobs_count--;
306:
307: mutex_exit(&aio->aio_mtx);
308:
309: /* Process an AIO operation */
310: aio_process(a_job);
311:
312: /* Copy data structure back to the user-space */
313: (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr,
314: sizeof(struct aiocb));
315:
316: mutex_enter(&aio->aio_mtx);
317: aio->curjob = NULL;
1.4 rmind 318:
1.1 rmind 319: /* Decrease a reference counter, if there is a LIO structure */
320: lio = a_job->lio;
1.4 rmind 321: refcnt = (lio != NULL ? --lio->refcnt : -1);
322:
1.1 rmind 323: /* Notify all suspenders */
324: cv_broadcast(&aio->done_cv);
325: mutex_exit(&aio->aio_mtx);
326:
327: /* Send a signal, if any */
328: aio_sendsig(p, &a_job->aiocbp.aio_sigevent);
329:
330: /* Destroy the LIO structure */
1.4 rmind 331: if (refcnt == 0) {
1.1 rmind 332: aio_sendsig(p, &lio->sig);
1.4 rmind 333: pool_put(&aio_lio_pool, lio);
1.1 rmind 334: }
335:
1.30 mbalmer 336: /* Destroy the job */
1.4 rmind 337: pool_put(&aio_job_pool, a_job);
1.1 rmind 338: }
339:
1.4 rmind 340: /* NOTREACHED */
1.1 rmind 341: }
342:
343: static void
344: aio_process(struct aio_job *a_job)
345: {
346: struct proc *p = curlwp->l_proc;
347: struct aiocb *aiocbp = &a_job->aiocbp;
348: struct file *fp;
349: int fd = aiocbp->aio_fildes;
350: int error = 0;
351:
352: KASSERT(a_job->aio_op != 0);
353:
1.4 rmind 354: if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) {
1.1 rmind 355: struct iovec aiov;
356: struct uio auio;
357:
358: if (aiocbp->aio_nbytes > SSIZE_MAX) {
359: error = EINVAL;
360: goto done;
361: }
362:
1.16 ad 363: fp = fd_getfile(fd);
1.1 rmind 364: if (fp == NULL) {
365: error = EBADF;
366: goto done;
367: }
368:
369: aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf;
370: aiov.iov_len = aiocbp->aio_nbytes;
371: auio.uio_iov = &aiov;
372: auio.uio_iovcnt = 1;
373: auio.uio_resid = aiocbp->aio_nbytes;
374: auio.uio_vmspace = p->p_vmspace;
375:
376: if (a_job->aio_op & AIO_READ) {
377: /*
378: * Perform a Read operation
379: */
380: KASSERT((a_job->aio_op & AIO_WRITE) == 0);
381:
382: if ((fp->f_flag & FREAD) == 0) {
1.16 ad 383: fd_putfile(fd);
1.1 rmind 384: error = EBADF;
385: goto done;
386: }
387: auio.uio_rw = UIO_READ;
388: error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset,
389: &auio, fp->f_cred, FOF_UPDATE_OFFSET);
390: } else {
391: /*
392: * Perform a Write operation
393: */
394: KASSERT(a_job->aio_op & AIO_WRITE);
395:
396: if ((fp->f_flag & FWRITE) == 0) {
1.16 ad 397: fd_putfile(fd);
1.1 rmind 398: error = EBADF;
399: goto done;
400: }
401: auio.uio_rw = UIO_WRITE;
402: error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset,
403: &auio, fp->f_cred, FOF_UPDATE_OFFSET);
404: }
1.16 ad 405: fd_putfile(fd);
1.1 rmind 406:
407: /* Store the result value */
408: a_job->aiocbp.aio_nbytes -= auio.uio_resid;
409: a_job->aiocbp._retval = (error == 0) ?
410: a_job->aiocbp.aio_nbytes : -1;
411:
1.4 rmind 412: } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) {
1.1 rmind 413: /*
414: * Perform a file Sync operation
415: */
416: struct vnode *vp;
417:
1.16 ad 418: if ((error = fd_getvnode(fd, &fp)) != 0)
1.1 rmind 419: goto done;
420:
421: if ((fp->f_flag & FWRITE) == 0) {
1.16 ad 422: fd_putfile(fd);
1.1 rmind 423: error = EBADF;
424: goto done;
425: }
426:
427: vp = (struct vnode *)fp->f_data;
428: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
429: if (a_job->aio_op & AIO_DSYNC) {
430: error = VOP_FSYNC(vp, fp->f_cred,
1.9 pooka 431: FSYNC_WAIT | FSYNC_DATAONLY, 0, 0);
1.1 rmind 432: } else if (a_job->aio_op & AIO_SYNC) {
433: error = VOP_FSYNC(vp, fp->f_cred,
1.9 pooka 434: FSYNC_WAIT, 0, 0);
1.1 rmind 435: }
436: VOP_UNLOCK(vp, 0);
1.16 ad 437: fd_putfile(fd);
1.1 rmind 438:
439: /* Store the result value */
440: a_job->aiocbp._retval = (error == 0) ? 0 : -1;
441:
442: } else
443: panic("aio_process: invalid operation code\n");
444:
445: done:
446: /* Job is done, set the error, if any */
447: a_job->aiocbp._errno = error;
448: a_job->aiocbp._state = JOB_DONE;
449: }
450:
451: /*
452: * Send AIO signal.
453: */
454: static void
455: aio_sendsig(struct proc *p, struct sigevent *sig)
456: {
457: ksiginfo_t ksi;
458:
459: if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE)
460: return;
461:
462: KSI_INIT(&ksi);
463: ksi.ksi_signo = sig->sigev_signo;
464: ksi.ksi_code = SI_ASYNCIO;
1.3 christos 465: ksi.ksi_value = sig->sigev_value;
1.17 ad 466: mutex_enter(proc_lock);
1.1 rmind 467: kpsignal(p, &ksi, NULL);
1.17 ad 468: mutex_exit(proc_lock);
1.1 rmind 469: }
470:
471: /*
472: * Enqueue the job.
473: */
474: static int
475: aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio)
476: {
477: struct proc *p = curlwp->l_proc;
478: struct aioproc *aio;
479: struct aio_job *a_job;
480: struct aiocb aiocbp;
481: struct sigevent *sig;
482: int error;
483:
1.12 rmind 484: /* Non-accurate check for the limit */
485: if (aio_jobs_count + 1 > aio_max)
1.1 rmind 486: return EAGAIN;
487:
488: /* Get the data structure from user-space */
489: error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb));
490: if (error)
491: return error;
492:
493: /* Check if signal is set, and validate it */
494: sig = &aiocbp.aio_sigevent;
495: if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG ||
496: sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA)
497: return EINVAL;
498:
499: /* Buffer and byte count */
500: if (((AIO_SYNC | AIO_DSYNC) & op) == 0)
501: if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX)
502: return EINVAL;
503:
504: /* Check the opcode, if LIO_NOP - simply ignore */
505: if (op == AIO_LIO) {
506: KASSERT(lio != NULL);
507: if (aiocbp.aio_lio_opcode == LIO_WRITE)
508: op = AIO_WRITE;
509: else if (aiocbp.aio_lio_opcode == LIO_READ)
510: op = AIO_READ;
511: else
512: return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : EINVAL;
513: } else {
514: KASSERT(lio == NULL);
515: }
516:
517: /*
518: * Look for already existing job. If found - the job is in-progress.
519: * According to POSIX this is invalid, so return the error.
520: */
521: aio = p->p_aio;
522: if (aio) {
523: mutex_enter(&aio->aio_mtx);
524: if (aio->curjob) {
525: a_job = aio->curjob;
526: if (a_job->aiocb_uptr == aiocb_uptr) {
527: mutex_exit(&aio->aio_mtx);
528: return EINVAL;
529: }
530: }
531: TAILQ_FOREACH(a_job, &aio->jobs_queue, list) {
532: if (a_job->aiocb_uptr != aiocb_uptr)
533: continue;
534: mutex_exit(&aio->aio_mtx);
535: return EINVAL;
536: }
537: mutex_exit(&aio->aio_mtx);
538: }
539:
540: /*
541: * Check if AIO structure is initialized, if not - initialize it.
542: * In LIO case, we did that already. We will recheck this with
1.20 ad 543: * the lock in aio_procinit().
1.1 rmind 544: */
545: if (lio == NULL && p->p_aio == NULL)
1.20 ad 546: if (aio_procinit(p))
1.1 rmind 547: return EAGAIN;
548: aio = p->p_aio;
549:
550: /*
551: * Set the state with errno, and copy data
552: * structure back to the user-space.
553: */
554: aiocbp._state = JOB_WIP;
555: aiocbp._errno = EINPROGRESS;
556: aiocbp._retval = -1;
557: error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb));
558: if (error)
559: return error;
560:
561: /* Allocate and initialize a new AIO job */
1.4 rmind 562: a_job = pool_get(&aio_job_pool, PR_WAITOK);
1.1 rmind 563: memset(a_job, 0, sizeof(struct aio_job));
564:
565: /*
566: * Set the data.
567: * Store the user-space pointer for searching. Since we
568: * are storing only per proc pointers - it is safe.
569: */
570: memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb));
571: a_job->aiocb_uptr = aiocb_uptr;
572: a_job->aio_op |= op;
573: a_job->lio = lio;
574:
575: /*
576: * Add the job to the queue, update the counters, and
577: * notify the AIO worker thread to handle the job.
578: */
579: mutex_enter(&aio->aio_mtx);
580:
581: /* Fail, if the limit was reached */
1.13 rmind 582: if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max ||
583: aio->jobs_count >= aio_listio_max) {
1.12 rmind 584: atomic_dec_uint(&aio_jobs_count);
1.1 rmind 585: mutex_exit(&aio->aio_mtx);
1.4 rmind 586: pool_put(&aio_job_pool, a_job);
1.1 rmind 587: return EAGAIN;
588: }
589:
590: TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list);
591: aio->jobs_count++;
592: if (lio)
593: lio->refcnt++;
594: cv_signal(&aio->aio_worker_cv);
595:
596: mutex_exit(&aio->aio_mtx);
597:
598: /*
599: * One would handle the errors only with aio_error() function.
600: * This way is appropriate according to POSIX.
601: */
602: return 0;
603: }
604:
605: /*
606: * Syscall functions.
607: */
608:
609: int
1.27 yamt 610: sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap,
611: register_t *retval)
1.1 rmind 612: {
1.14 dsl 613: /* {
1.1 rmind 614: syscallarg(int) fildes;
615: syscallarg(struct aiocb *) aiocbp;
1.14 dsl 616: } */
1.1 rmind 617: struct proc *p = l->l_proc;
618: struct aioproc *aio;
619: struct aio_job *a_job;
620: struct aiocb *aiocbp_ptr;
621: struct lio_req *lio;
622: struct filedesc *fdp = p->p_fd;
623: unsigned int cn, errcnt, fildes;
1.24 ad 624: fdtab_t *dt;
1.1 rmind 625:
626: TAILQ_HEAD(, aio_job) tmp_jobs_list;
627:
628: /* Check for invalid file descriptor */
629: fildes = (unsigned int)SCARG(uap, fildes);
1.24 ad 630: dt = fdp->fd_dt;
631: if (fildes >= dt->dt_nfiles)
1.16 ad 632: return EBADF;
1.24 ad 633: if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL)
1.1 rmind 634: return EBADF;
635:
636: /* Check if AIO structure is initialized */
637: if (p->p_aio == NULL) {
638: *retval = AIO_NOTCANCELED;
639: return 0;
640: }
641:
642: aio = p->p_aio;
643: aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp);
644:
645: mutex_enter(&aio->aio_mtx);
646:
647: /* Cancel the jobs, and remove them from the queue */
648: cn = 0;
649: TAILQ_INIT(&tmp_jobs_list);
650: TAILQ_FOREACH(a_job, &aio->jobs_queue, list) {
651: if (aiocbp_ptr) {
652: if (aiocbp_ptr != a_job->aiocb_uptr)
653: continue;
654: if (fildes != a_job->aiocbp.aio_fildes) {
655: mutex_exit(&aio->aio_mtx);
656: return EBADF;
657: }
658: } else if (a_job->aiocbp.aio_fildes != fildes)
659: continue;
660:
661: TAILQ_REMOVE(&aio->jobs_queue, a_job, list);
662: TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list);
663:
664: /* Decrease the counters */
1.11 ad 665: atomic_dec_uint(&aio_jobs_count);
1.1 rmind 666: aio->jobs_count--;
667: lio = a_job->lio;
1.4 rmind 668: if (lio != NULL && --lio->refcnt != 0)
669: a_job->lio = NULL;
1.1 rmind 670:
671: cn++;
672: if (aiocbp_ptr)
673: break;
674: }
675:
676: /* There are canceled jobs */
677: if (cn)
678: *retval = AIO_CANCELED;
679:
680: /* We cannot cancel current job */
681: a_job = aio->curjob;
682: if (a_job && ((a_job->aiocbp.aio_fildes == fildes) ||
683: (a_job->aiocb_uptr == aiocbp_ptr)))
684: *retval = AIO_NOTCANCELED;
685:
686: mutex_exit(&aio->aio_mtx);
687:
688: /* Free the jobs after the lock */
689: errcnt = 0;
690: while (!TAILQ_EMPTY(&tmp_jobs_list)) {
691: a_job = TAILQ_FIRST(&tmp_jobs_list);
692: TAILQ_REMOVE(&tmp_jobs_list, a_job, list);
693: /* Set the errno and copy structures back to the user-space */
694: a_job->aiocbp._errno = ECANCELED;
695: a_job->aiocbp._state = JOB_DONE;
696: if (copyout(&a_job->aiocbp, a_job->aiocb_uptr,
697: sizeof(struct aiocb)))
698: errcnt++;
699: /* Send a signal if any */
700: aio_sendsig(p, &a_job->aiocbp.aio_sigevent);
1.6 rmind 701: if (a_job->lio) {
702: lio = a_job->lio;
703: aio_sendsig(p, &lio->sig);
704: pool_put(&aio_lio_pool, lio);
705: }
1.4 rmind 706: pool_put(&aio_job_pool, a_job);
1.1 rmind 707: }
708:
709: if (errcnt)
710: return EFAULT;
711:
712: /* Set a correct return value */
713: if (*retval == 0)
714: *retval = AIO_ALLDONE;
715:
716: return 0;
717: }
718:
719: int
1.27 yamt 720: sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap,
721: register_t *retval)
1.1 rmind 722: {
1.14 dsl 723: /* {
1.1 rmind 724: syscallarg(const struct aiocb *) aiocbp;
1.14 dsl 725: } */
1.1 rmind 726: struct proc *p = l->l_proc;
727: struct aioproc *aio = p->p_aio;
728: struct aiocb aiocbp;
729: int error;
730:
731: if (aio == NULL)
732: return EINVAL;
733:
734: error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb));
735: if (error)
736: return error;
737:
738: if (aiocbp._state == JOB_NONE)
739: return EINVAL;
740:
741: *retval = aiocbp._errno;
742:
743: return 0;
744: }
745:
746: int
1.27 yamt 747: sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap,
748: register_t *retval)
1.1 rmind 749: {
1.14 dsl 750: /* {
1.1 rmind 751: syscallarg(int) op;
752: syscallarg(struct aiocb *) aiocbp;
1.14 dsl 753: } */
1.1 rmind 754: int op = SCARG(uap, op);
755:
756: if ((op != O_DSYNC) && (op != O_SYNC))
757: return EINVAL;
758:
759: op = O_DSYNC ? AIO_DSYNC : AIO_SYNC;
760:
761: return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL);
762: }
763:
764: int
1.27 yamt 765: sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap,
766: register_t *retval)
1.1 rmind 767: {
1.14 dsl 768: /* {
1.1 rmind 769: syscallarg(struct aiocb *) aiocbp;
1.14 dsl 770: } */
1.1 rmind 771:
772: return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL);
773: }
774:
775: int
1.27 yamt 776: sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap,
777: register_t *retval)
1.1 rmind 778: {
1.14 dsl 779: /* {
1.1 rmind 780: syscallarg(struct aiocb *) aiocbp;
1.14 dsl 781: } */
1.1 rmind 782: struct proc *p = l->l_proc;
783: struct aioproc *aio = p->p_aio;
784: struct aiocb aiocbp;
785: int error;
786:
787: if (aio == NULL)
788: return EINVAL;
789:
790: error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb));
791: if (error)
792: return error;
793:
794: if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE)
795: return EINVAL;
796:
797: *retval = aiocbp._retval;
798:
799: /* Reset the internal variables */
800: aiocbp._errno = 0;
801: aiocbp._retval = -1;
802: aiocbp._state = JOB_NONE;
803: error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb));
804:
805: return error;
806: }
807:
808: int
1.22 christos 809: sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap,
810: register_t *retval)
1.1 rmind 811: {
1.14 dsl 812: /* {
1.1 rmind 813: syscallarg(const struct aiocb *const[]) list;
814: syscallarg(int) nent;
815: syscallarg(const struct timespec *) timeout;
1.14 dsl 816: } */
1.22 christos 817: struct aiocb **list;
1.1 rmind 818: struct timespec ts;
1.22 christos 819: int error, nent;
1.1 rmind 820:
821: nent = SCARG(uap, nent);
822: if (nent <= 0 || nent > aio_listio_max)
823: return EAGAIN;
824:
825: if (SCARG(uap, timeout)) {
826: /* Convert timespec to ticks */
827: error = copyin(SCARG(uap, timeout), &ts,
828: sizeof(struct timespec));
829: if (error)
830: return error;
1.22 christos 831: }
1.31 ! rmind 832:
1.28 yamt 833: list = kmem_alloc(nent * sizeof(*list), KM_SLEEP);
834: error = copyin(SCARG(uap, list), list, nent * sizeof(*list));
1.22 christos 835: if (error)
836: goto out;
837: error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL);
838: out:
1.28 yamt 839: kmem_free(list, nent * sizeof(*list));
1.22 christos 840: return error;
841: }
842:
843: int
844: aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent,
845: struct timespec *ts)
846: {
847: struct proc *p = l->l_proc;
848: struct aioproc *aio;
849: struct aio_job *a_job;
850: int i, error, timo;
851:
852: if (p->p_aio == NULL)
853: return EAGAIN;
854: aio = p->p_aio;
855:
856: if (ts) {
857: timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000));
858: if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0)
1.1 rmind 859: timo = 1;
860: if (timo <= 0)
861: return EAGAIN;
862: } else
863: timo = 0;
864:
865: mutex_enter(&aio->aio_mtx);
866: for (;;) {
867: for (i = 0; i < nent; i++) {
868:
869: /* Skip NULL entries */
870: if (aiocbp_list[i] == NULL)
871: continue;
872:
873: /* Skip current job */
874: if (aio->curjob) {
875: a_job = aio->curjob;
876: if (a_job->aiocb_uptr == aiocbp_list[i])
877: continue;
878: }
879:
880: /* Look for a job in the queue */
881: TAILQ_FOREACH(a_job, &aio->jobs_queue, list)
882: if (a_job->aiocb_uptr == aiocbp_list[i])
883: break;
884:
885: if (a_job == NULL) {
886: struct aiocb aiocbp;
887:
888: mutex_exit(&aio->aio_mtx);
889:
1.31 ! rmind 890: /* Check if the job is done. */
1.1 rmind 891: error = copyin(aiocbp_list[i], &aiocbp,
892: sizeof(struct aiocb));
893: if (error == 0 && aiocbp._state != JOB_DONE) {
894: mutex_enter(&aio->aio_mtx);
895: continue;
896: }
897: return error;
898: }
899: }
900:
901: /* Wait for a signal or when timeout occurs */
902: error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo);
903: if (error) {
904: if (error == EWOULDBLOCK)
905: error = EAGAIN;
906: break;
907: }
908: }
909: mutex_exit(&aio->aio_mtx);
910: return error;
911: }
912:
913: int
1.27 yamt 914: sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap,
915: register_t *retval)
1.1 rmind 916: {
1.14 dsl 917: /* {
1.1 rmind 918: syscallarg(struct aiocb *) aiocbp;
1.14 dsl 919: } */
1.1 rmind 920:
921: return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL);
922: }
923:
924: int
1.27 yamt 925: sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap,
926: register_t *retval)
1.1 rmind 927: {
1.14 dsl 928: /* {
1.1 rmind 929: syscallarg(int) mode;
930: syscallarg(struct aiocb *const[]) list;
931: syscallarg(int) nent;
932: syscallarg(struct sigevent *) sig;
1.14 dsl 933: } */
1.1 rmind 934: struct proc *p = l->l_proc;
935: struct aioproc *aio;
936: struct aiocb **aiocbp_list;
937: struct lio_req *lio;
938: int i, error, errcnt, mode, nent;
939:
940: mode = SCARG(uap, mode);
941: nent = SCARG(uap, nent);
942:
1.12 rmind 943: /* Non-accurate checks for the limit and invalid values */
1.1 rmind 944: if (nent < 1 || nent > aio_listio_max)
945: return EINVAL;
1.12 rmind 946: if (aio_jobs_count + nent > aio_max)
1.1 rmind 947: return EAGAIN;
948:
949: /* Check if AIO structure is initialized, if not - initialize it */
950: if (p->p_aio == NULL)
1.20 ad 951: if (aio_procinit(p))
1.1 rmind 952: return EAGAIN;
953: aio = p->p_aio;
954:
955: /* Create a LIO structure */
1.4 rmind 956: lio = pool_get(&aio_lio_pool, PR_WAITOK);
957: lio->refcnt = 1;
958: error = 0;
959:
960: switch (mode) {
961: case LIO_WAIT:
1.1 rmind 962: memset(&lio->sig, 0, sizeof(struct sigevent));
1.4 rmind 963: break;
964: case LIO_NOWAIT:
965: /* Check for signal, validate it */
966: if (SCARG(uap, sig)) {
967: struct sigevent *sig = &lio->sig;
968:
969: error = copyin(SCARG(uap, sig), &lio->sig,
970: sizeof(struct sigevent));
971: if (error == 0 &&
972: (sig->sigev_signo < 0 ||
973: sig->sigev_signo >= NSIG ||
974: sig->sigev_notify < SIGEV_NONE ||
975: sig->sigev_notify > SIGEV_SA))
976: error = EINVAL;
977: } else
978: memset(&lio->sig, 0, sizeof(struct sigevent));
979: break;
980: default:
981: error = EINVAL;
982: break;
983: }
984:
985: if (error != 0) {
986: pool_put(&aio_lio_pool, lio);
987: return error;
988: }
1.1 rmind 989:
990: /* Get the list from user-space */
1.28 yamt 991: aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP);
1.1 rmind 992: error = copyin(SCARG(uap, list), aiocbp_list,
1.28 yamt 993: nent * sizeof(*aiocbp_list));
1.4 rmind 994: if (error) {
995: mutex_enter(&aio->aio_mtx);
1.1 rmind 996: goto err;
1.4 rmind 997: }
1.1 rmind 998:
999: /* Enqueue all jobs */
1000: errcnt = 0;
1001: for (i = 0; i < nent; i++) {
1002: error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio);
1003: /*
1004: * According to POSIX, in such error case it may
1005: * fail with other I/O operations initiated.
1006: */
1007: if (error)
1008: errcnt++;
1009: }
1010:
1.4 rmind 1011: mutex_enter(&aio->aio_mtx);
1012:
1.1 rmind 1013: /* Return an error, if any */
1014: if (errcnt) {
1015: error = EIO;
1016: goto err;
1017: }
1018:
1019: if (mode == LIO_WAIT) {
1020: /*
1021: * Wait for AIO completion. In such case,
1022: * the LIO structure will be freed here.
1023: */
1.4 rmind 1024: while (lio->refcnt > 1 && error == 0)
1.1 rmind 1025: error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx);
1026: if (error)
1027: error = EINTR;
1028: }
1029:
1030: err:
1.4 rmind 1031: if (--lio->refcnt != 0)
1032: lio = NULL;
1033: mutex_exit(&aio->aio_mtx);
1034: if (lio != NULL) {
1035: aio_sendsig(p, &lio->sig);
1036: pool_put(&aio_lio_pool, lio);
1037: }
1.28 yamt 1038: kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list));
1.1 rmind 1039: return error;
1040: }
1041:
1042: /*
1043: * SysCtl
1044: */
1045:
1046: static int
1047: sysctl_aio_listio_max(SYSCTLFN_ARGS)
1048: {
1049: struct sysctlnode node;
1050: int error, newsize;
1051:
1052: node = *rnode;
1053: node.sysctl_data = &newsize;
1054:
1055: newsize = aio_listio_max;
1056: error = sysctl_lookup(SYSCTLFN_CALL(&node));
1057: if (error || newp == NULL)
1058: return error;
1059:
1060: if (newsize < 1 || newsize > aio_max)
1061: return EINVAL;
1062: aio_listio_max = newsize;
1063:
1064: return 0;
1065: }
1066:
1067: static int
1068: sysctl_aio_max(SYSCTLFN_ARGS)
1069: {
1070: struct sysctlnode node;
1071: int error, newsize;
1072:
1073: node = *rnode;
1074: node.sysctl_data = &newsize;
1075:
1076: newsize = aio_max;
1077: error = sysctl_lookup(SYSCTLFN_CALL(&node));
1078: if (error || newp == NULL)
1079: return error;
1080:
1081: if (newsize < 1 || newsize < aio_listio_max)
1082: return EINVAL;
1083: aio_max = newsize;
1084:
1085: return 0;
1086: }
1087:
1088: SYSCTL_SETUP(sysctl_aio_setup, "sysctl aio setup")
1089: {
1090:
1091: sysctl_createv(clog, 0, NULL, NULL,
1092: CTLFLAG_PERMANENT,
1093: CTLTYPE_NODE, "kern", NULL,
1094: NULL, 0, NULL, 0,
1095: CTL_KERN, CTL_EOL);
1096: sysctl_createv(clog, 0, NULL, NULL,
1097: CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
1098: CTLTYPE_INT, "posix_aio",
1099: SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
1100: "Asynchronous I/O option to which the "
1101: "system attempts to conform"),
1102: NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0,
1103: CTL_KERN, CTL_CREATE, CTL_EOL);
1104: sysctl_createv(clog, 0, NULL, NULL,
1105: CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1106: CTLTYPE_INT, "aio_listio_max",
1107: SYSCTL_DESCR("Maximum number of asynchronous I/O "
1108: "operations in a single list I/O call"),
1109: sysctl_aio_listio_max, 0, &aio_listio_max, 0,
1110: CTL_KERN, CTL_CREATE, CTL_EOL);
1111: sysctl_createv(clog, 0, NULL, NULL,
1112: CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1113: CTLTYPE_INT, "aio_max",
1114: SYSCTL_DESCR("Maximum number of asynchronous I/O "
1115: "operations"),
1116: sysctl_aio_max, 0, &aio_max, 0,
1117: CTL_KERN, CTL_CREATE, CTL_EOL);
1118: }
1119:
1120: /*
1121: * Debugging
1122: */
1123: #if defined(DDB)
1124: void
1125: aio_print_jobs(void (*pr)(const char *, ...))
1126: {
1127: struct proc *p = (curlwp == NULL ? NULL : curlwp->l_proc);
1128: struct aioproc *aio;
1129: struct aio_job *a_job;
1130: struct aiocb *aiocbp;
1131:
1132: if (p == NULL) {
1133: (*pr)("AIO: We are not in the processes right now.\n");
1134: return;
1135: }
1136:
1137: aio = p->p_aio;
1138: if (aio == NULL) {
1139: (*pr)("AIO data is not initialized (PID = %d).\n", p->p_pid);
1140: return;
1141: }
1142:
1143: (*pr)("AIO: PID = %d\n", p->p_pid);
1144: (*pr)("AIO: Global count of the jobs = %u\n", aio_jobs_count);
1145: (*pr)("AIO: Count of the jobs = %u\n", aio->jobs_count);
1146:
1147: if (aio->curjob) {
1148: a_job = aio->curjob;
1149: (*pr)("\nAIO current job:\n");
1150: (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n",
1151: a_job->aio_op, a_job->aiocbp._errno,
1152: a_job->aiocbp._state, a_job->aiocb_uptr);
1153: aiocbp = &a_job->aiocbp;
1154: (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n",
1155: aiocbp->aio_fildes, aiocbp->aio_offset,
1156: aiocbp->aio_buf, aiocbp->aio_nbytes);
1157: }
1158:
1159: (*pr)("\nAIO queue:\n");
1160: TAILQ_FOREACH(a_job, &aio->jobs_queue, list) {
1161: (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n",
1162: a_job->aio_op, a_job->aiocbp._errno,
1163: a_job->aiocbp._state, a_job->aiocb_uptr);
1164: aiocbp = &a_job->aiocbp;
1165: (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n",
1166: aiocbp->aio_fildes, aiocbp->aio_offset,
1167: aiocbp->aio_buf, aiocbp->aio_nbytes);
1168: }
1169: }
1170: #endif /* defined(DDB) */
CVSweb <webmaster@jp.NetBSD.org>