[BACK]Return to sys_pipe.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/sys_pipe.c, Revision 1.86

1.86    ! ad          1: /*     $NetBSD: sys_pipe.c,v 1.85 2007/07/09 21:10:56 ad Exp $ */
1.35      pk          2:
                      3: /*-
1.80      ad          4:  * Copyright (c) 2003, 2007 The NetBSD Foundation, Inc.
1.35      pk          5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
1.80      ad          8:  * by Paul Kranenburg, and by Andrew Doran.
1.35      pk          9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  * 3. All advertising materials mentioning features or use of this software
                     19:  *    must display the following acknowledgement:
                     20:  *        This product includes software developed by the NetBSD
                     21:  *        Foundation, Inc. and its contributors.
                     22:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     23:  *    contributors may be used to endorse or promote products derived
                     24:  *    from this software without specific prior written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     27:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     28:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     29:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     30:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36:  * POSSIBILITY OF SUCH DAMAGE.
                     37:  */
1.2       jdolecek   38:
1.1       jdolecek   39: /*
                     40:  * Copyright (c) 1996 John S. Dyson
                     41:  * All rights reserved.
                     42:  *
                     43:  * Redistribution and use in source and binary forms, with or without
                     44:  * modification, are permitted provided that the following conditions
                     45:  * are met:
                     46:  * 1. Redistributions of source code must retain the above copyright
                     47:  *    notice immediately at the beginning of the file, without modification,
                     48:  *    this list of conditions, and the following disclaimer.
                     49:  * 2. Redistributions in binary form must reproduce the above copyright
                     50:  *    notice, this list of conditions and the following disclaimer in the
                     51:  *    documentation and/or other materials provided with the distribution.
                     52:  * 3. Absolutely no warranty of function or purpose is made by the author
                     53:  *    John S. Dyson.
                     54:  * 4. Modifications may be freely made to this file if the above conditions
                     55:  *    are met.
                     56:  *
1.24      jdolecek   57:  * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.95 2002/03/09 22:06:31 alfred Exp $
1.1       jdolecek   58:  */
                     59:
                     60: /*
                     61:  * This file contains a high-performance replacement for the socket-based
                     62:  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
                     63:  * all features of sockets, but does do everything that pipes normally
                     64:  * do.
1.2       jdolecek   65:  *
                     66:  * Adaption for NetBSD UVM, including uvm_loan() based direct write, was
                     67:  * written by Jaromir Dolecek.
1.1       jdolecek   68:  */
                     69:
                     70: /*
                     71:  * This code has two modes of operation, a small write mode and a large
                     72:  * write mode.  The small write mode acts like conventional pipes with
                     73:  * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
                     74:  * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
1.35      pk         75:  * and PIPE_SIZE in size it is mapped read-only into the kernel address space
                     76:  * using the UVM page loan facility from where the receiving process can copy
                     77:  * the data directly from the pages in the sending process.
1.1       jdolecek   78:  *
                     79:  * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
                     80:  * happen for small transfers so that the system will not spend all of
                     81:  * its time context switching.  PIPE_SIZE is constrained by the
                     82:  * amount of kernel virtual memory.
                     83:  */
1.19      lukem      84:
                     85: #include <sys/cdefs.h>
1.86    ! ad         86: __KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.85 2007/07/09 21:10:56 ad Exp $");
1.2       jdolecek   87:
1.1       jdolecek   88: #include <sys/param.h>
                     89: #include <sys/systm.h>
1.2       jdolecek   90: #include <sys/proc.h>
1.1       jdolecek   91: #include <sys/fcntl.h>
                     92: #include <sys/file.h>
                     93: #include <sys/filedesc.h>
                     94: #include <sys/filio.h>
1.24      jdolecek   95: #include <sys/kernel.h>
1.1       jdolecek   96: #include <sys/ttycom.h>
                     97: #include <sys/stat.h>
1.24      jdolecek   98: #include <sys/malloc.h>
1.1       jdolecek   99: #include <sys/poll.h>
1.2       jdolecek  100: #include <sys/signalvar.h>
                    101: #include <sys/vnode.h>
                    102: #include <sys/uio.h>
                    103: #include <sys/lock.h>
                    104: #include <sys/select.h>
                    105: #include <sys/mount.h>
                    106: #include <sys/syscallargs.h>
                    107: #include <uvm/uvm.h>
                    108: #include <sys/sysctl.h>
1.72      elad      109: #include <sys/kauth.h>
1.2       jdolecek  110:
1.1       jdolecek  111: #include <sys/pipe.h>
                    112:
1.17      jdolecek  113: /*
1.1       jdolecek  114:  * Use this define if you want to disable *fancy* VM things.  Expect an
1.35      pk        115:  * approx 30% decrease in transfer rate.
1.1       jdolecek  116:  */
                    117: /* #define PIPE_NODIRECT */
                    118:
                    119: /*
                    120:  * interfaces to the outside world
                    121:  */
1.63      perry     122: static int pipe_read(struct file *fp, off_t *offset, struct uio *uio,
1.72      elad      123:                kauth_cred_t cred, int flags);
1.63      perry     124: static int pipe_write(struct file *fp, off_t *offset, struct uio *uio,
1.72      elad      125:                kauth_cred_t cred, int flags);
1.69      christos  126: static int pipe_close(struct file *fp, struct lwp *l);
                    127: static int pipe_poll(struct file *fp, int events, struct lwp *l);
1.27      jdolecek  128: static int pipe_kqfilter(struct file *fp, struct knote *kn);
1.69      christos  129: static int pipe_stat(struct file *fp, struct stat *sb, struct lwp *l);
1.38      dsl       130: static int pipe_ioctl(struct file *fp, u_long cmd, void *data,
1.69      christos  131:                struct lwp *l);
1.1       jdolecek  132:
1.62      christos  133: static const struct fileops pipeops = {
                    134:        pipe_read, pipe_write, pipe_ioctl, fnullop_fcntl, pipe_poll,
1.35      pk        135:        pipe_stat, pipe_close, pipe_kqfilter
                    136: };
1.1       jdolecek  137:
                    138: /*
                    139:  * Default pipe buffer size(s), this can be kind-of large now because pipe
                    140:  * space is pageable.  The pipe code will try to maintain locality of
                    141:  * reference for performance reasons, so small amounts of outstanding I/O
                    142:  * will not wipe the cache.
                    143:  */
                    144: #define MINPIPESIZE (PIPE_SIZE/3)
                    145: #define MAXPIPESIZE (2*PIPE_SIZE/3)
                    146:
                    147: /*
                    148:  * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
                    149:  * is there so that on large systems, we don't exhaust it.
                    150:  */
                    151: #define MAXPIPEKVA (8*1024*1024)
1.2       jdolecek  152: static int maxpipekva = MAXPIPEKVA;
1.1       jdolecek  153:
                    154: /*
                    155:  * Limit for direct transfers, we cannot, of course limit
                    156:  * the amount of kva for pipes in general though.
                    157:  */
                    158: #define LIMITPIPEKVA (16*1024*1024)
1.2       jdolecek  159: static int limitpipekva = LIMITPIPEKVA;
1.1       jdolecek  160:
                    161: /*
                    162:  * Limit the number of "big" pipes
                    163:  */
1.2       jdolecek  164: #define LIMITBIGPIPES  32
                    165: static int maxbigpipes = LIMITBIGPIPES;
                    166: static int nbigpipe = 0;
1.1       jdolecek  167:
1.2       jdolecek  168: /*
                    169:  * Amount of KVA consumed by pipe buffers.
                    170:  */
                    171: static int amountpipekva = 0;
1.34      thorpej   172:
                    173: MALLOC_DEFINE(M_PIPE, "pipe", "Pipe structures");
1.1       jdolecek  174:
1.42      christos  175: static void pipeclose(struct file *fp, struct pipe *pipe);
1.35      pk        176: static void pipe_free_kmem(struct pipe *pipe);
                    177: static int pipe_create(struct pipe **pipep, int allockva);
                    178: static int pipelock(struct pipe *pipe, int catch);
1.70      perry     179: static inline void pipeunlock(struct pipe *pipe);
1.66      christos  180: static void pipeselwakeup(struct pipe *pipe, struct pipe *sigp, int code);
1.1       jdolecek  181: #ifndef PIPE_NODIRECT
1.42      christos  182: static int pipe_direct_write(struct file *fp, struct pipe *wpipe,
                    183:     struct uio *uio);
1.1       jdolecek  184: #endif
1.35      pk        185: static int pipespace(struct pipe *pipe, int size);
1.2       jdolecek  186:
                    187: #ifndef PIPE_NODIRECT
1.24      jdolecek  188: static int pipe_loan_alloc(struct pipe *, int);
                    189: static void pipe_loan_free(struct pipe *);
1.2       jdolecek  190: #endif /* PIPE_NODIRECT */
                    191:
1.57      simonb    192: static POOL_INIT(pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl",
1.81      ad        193:     &pool_allocator_nointr, IPL_NONE);
1.24      jdolecek  194:
1.82      ad        195: static krwlock_t pipe_peer_lock;
                    196:
                    197: void
                    198: pipe_init(void)
                    199: {
                    200:
                    201:        rw_init(&pipe_peer_lock);
                    202: }
                    203:
1.1       jdolecek  204: /*
                    205:  * The pipe system call for the DTYPE_PIPE type of pipes
                    206:  */
                    207:
                    208: /* ARGSUSED */
1.2       jdolecek  209: int
1.77      yamt      210: sys_pipe(struct lwp *l, void *v, register_t *retval)
1.1       jdolecek  211: {
                    212:        struct file *rf, *wf;
1.53      dsl       213:        struct pipe *rpipe, *wpipe;
1.1       jdolecek  214:        int fd, error;
1.2       jdolecek  215:
1.6       jdolecek  216:        rpipe = wpipe = NULL;
                    217:        if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 0)) {
1.42      christos  218:                pipeclose(NULL, rpipe);
                    219:                pipeclose(NULL, wpipe);
1.6       jdolecek  220:                return (ENFILE);
                    221:        }
                    222:
1.2       jdolecek  223:        /*
                    224:         * Note: the file structure returned from falloc() is marked
                    225:         * as 'larval' initially. Unless we mark it as 'mature' by
                    226:         * FILE_SET_MATURE(), any attempt to do anything with it would
                    227:         * return EBADF, including e.g. dup(2) or close(2). This avoids
                    228:         * file descriptor races if we block in the second falloc().
                    229:         */
                    230:
1.74      ad        231:        error = falloc(l, &rf, &fd);
1.2       jdolecek  232:        if (error)
                    233:                goto free2;
                    234:        retval[0] = fd;
                    235:        rf->f_flag = FREAD;
                    236:        rf->f_type = DTYPE_PIPE;
1.79      christos  237:        rf->f_data = (void *)rpipe;
1.2       jdolecek  238:        rf->f_ops = &pipeops;
                    239:
1.74      ad        240:        error = falloc(l, &wf, &fd);
1.2       jdolecek  241:        if (error)
                    242:                goto free3;
                    243:        retval[1] = fd;
                    244:        wf->f_flag = FWRITE;
                    245:        wf->f_type = DTYPE_PIPE;
1.79      christos  246:        wf->f_data = (void *)wpipe;
1.2       jdolecek  247:        wf->f_ops = &pipeops;
                    248:
                    249:        rpipe->pipe_peer = wpipe;
                    250:        wpipe->pipe_peer = rpipe;
1.1       jdolecek  251:
1.2       jdolecek  252:        FILE_SET_MATURE(rf);
                    253:        FILE_SET_MATURE(wf);
1.69      christos  254:        FILE_UNUSE(rf, l);
                    255:        FILE_UNUSE(wf, l);
1.1       jdolecek  256:        return (0);
1.2       jdolecek  257: free3:
1.69      christos  258:        FILE_UNUSE(rf, l);
1.2       jdolecek  259:        ffree(rf);
1.74      ad        260:        fdremove(l->l_proc->p_fd, retval[0]);
1.2       jdolecek  261: free2:
1.42      christos  262:        pipeclose(NULL, wpipe);
                    263:        pipeclose(NULL, rpipe);
1.2       jdolecek  264:
                    265:        return (error);
1.1       jdolecek  266: }
                    267:
                    268: /*
                    269:  * Allocate kva for pipe circular buffer, the space is pageable
                    270:  * This routine will 'realloc' the size of a pipe safely, if it fails
                    271:  * it will retain the old buffer.
                    272:  * If it fails it will return ENOMEM.
                    273:  */
                    274: static int
1.68      thorpej   275: pipespace(struct pipe *pipe, int size)
1.1       jdolecek  276: {
1.79      christos  277:        void *buffer;
1.2       jdolecek  278:        /*
1.35      pk        279:         * Allocate pageable virtual address space. Physical memory is
                    280:         * allocated on demand.
1.2       jdolecek  281:         */
1.79      christos  282:        buffer = (void *) uvm_km_alloc(kernel_map, round_page(size), 0,
1.65      yamt      283:            UVM_KMF_PAGEABLE);
1.2       jdolecek  284:        if (buffer == NULL)
                    285:                return (ENOMEM);
1.1       jdolecek  286:
                    287:        /* free old resources if we're resizing */
1.35      pk        288:        pipe_free_kmem(pipe);
                    289:        pipe->pipe_buffer.buffer = buffer;
                    290:        pipe->pipe_buffer.size = size;
                    291:        pipe->pipe_buffer.in = 0;
                    292:        pipe->pipe_buffer.out = 0;
                    293:        pipe->pipe_buffer.cnt = 0;
                    294:        amountpipekva += pipe->pipe_buffer.size;
1.1       jdolecek  295:        return (0);
                    296: }
                    297:
                    298: /*
1.35      pk        299:  * Initialize and allocate VM and memory for pipe.
1.1       jdolecek  300:  */
                    301: static int
1.68      thorpej   302: pipe_create(struct pipe **pipep, int allockva)
1.1       jdolecek  303: {
1.35      pk        304:        struct pipe *pipe;
1.1       jdolecek  305:        int error;
                    306:
1.55      pooka     307:        pipe = *pipep = pool_get(&pipe_pool, PR_WAITOK);
1.1       jdolecek  308:
1.63      perry     309:        /* Initialize */
1.35      pk        310:        memset(pipe, 0, sizeof(struct pipe));
                    311:        pipe->pipe_state = PIPE_SIGNALR;
1.1       jdolecek  312:
1.73      kardel    313:        getmicrotime(&pipe->pipe_ctime);
1.35      pk        314:        pipe->pipe_atime = pipe->pipe_ctime;
                    315:        pipe->pipe_mtime = pipe->pipe_ctime;
1.80      ad        316:        mutex_init(&pipe->pipe_lock, MUTEX_DEFAULT, IPL_NONE);
                    317:        cv_init(&pipe->pipe_cv, "pipe");
                    318:        cv_init(&pipe->pipe_lkcv, "pipelk");
1.86    ! ad        319:        selinit(&pipe->pipe_sel);
1.1       jdolecek  320:
1.53      dsl       321:        if (allockva && (error = pipespace(pipe, PIPE_SIZE)))
                    322:                return (error);
                    323:
1.1       jdolecek  324:        return (0);
                    325: }
                    326:
                    327:
                    328: /*
1.35      pk        329:  * Lock a pipe for I/O, blocking other access
                    330:  * Called with pipe spin lock held.
                    331:  * Return with pipe spin lock released on success.
1.1       jdolecek  332:  */
1.35      pk        333: static int
1.68      thorpej   334: pipelock(struct pipe *pipe, int catch)
1.1       jdolecek  335: {
1.80      ad        336:        int error;
1.1       jdolecek  337:
1.80      ad        338:        KASSERT(mutex_owned(&pipe->pipe_lock));
1.35      pk        339:
1.67      yamt      340:        while (pipe->pipe_state & PIPE_LOCKFL) {
                    341:                pipe->pipe_state |= PIPE_LWANT;
1.80      ad        342:                if (catch) {
                    343:                        error = cv_wait_sig(&pipe->pipe_lkcv,
                    344:                            &pipe->pipe_lock);
                    345:                        if (error != 0)
                    346:                                return error;
                    347:                } else
                    348:                        cv_wait(&pipe->pipe_lkcv, &pipe->pipe_lock);
1.1       jdolecek  349:        }
1.67      yamt      350:
                    351:        pipe->pipe_state |= PIPE_LOCKFL;
1.80      ad        352:        mutex_exit(&pipe->pipe_lock);
1.67      yamt      353:
                    354:        return 0;
1.1       jdolecek  355: }
                    356:
                    357: /*
                    358:  * unlock a pipe I/O lock
                    359:  */
1.70      perry     360: static inline void
1.68      thorpej   361: pipeunlock(struct pipe *pipe)
1.1       jdolecek  362: {
1.24      jdolecek  363:
1.67      yamt      364:        KASSERT(pipe->pipe_state & PIPE_LOCKFL);
                    365:
                    366:        pipe->pipe_state &= ~PIPE_LOCKFL;
                    367:        if (pipe->pipe_state & PIPE_LWANT) {
                    368:                pipe->pipe_state &= ~PIPE_LWANT;
1.80      ad        369:                cv_broadcast(&pipe->pipe_lkcv);
1.67      yamt      370:        }
1.1       jdolecek  371: }
                    372:
1.2       jdolecek  373: /*
                    374:  * Select/poll wakup. This also sends SIGIO to peer connected to
                    375:  * 'sigpipe' side of pipe.
                    376:  */
1.35      pk        377: static void
1.68      thorpej   378: pipeselwakeup(struct pipe *selp, struct pipe *sigp, int code)
1.1       jdolecek  379: {
1.43      jdolecek  380:        int band;
1.27      jdolecek  381:
1.48      jdolecek  382:        selnotify(&selp->pipe_sel, NOTE_SUBMIT);
1.43      jdolecek  383:
1.35      pk        384:        if (sigp == NULL || (sigp->pipe_state & PIPE_ASYNC) == 0)
                    385:                return;
                    386:
1.43      jdolecek  387:        switch (code) {
1.42      christos  388:        case POLL_IN:
1.43      jdolecek  389:                band = POLLIN|POLLRDNORM;
1.42      christos  390:                break;
                    391:        case POLL_OUT:
1.43      jdolecek  392:                band = POLLOUT|POLLWRNORM;
1.42      christos  393:                break;
                    394:        case POLL_HUP:
1.43      jdolecek  395:                band = POLLHUP;
1.42      christos  396:                break;
                    397: #if POLL_HUP != POLL_ERR
                    398:        case POLL_ERR:
1.43      jdolecek  399:                band = POLLERR;
1.42      christos  400:                break;
                    401: #endif
                    402:        default:
1.45      christos  403:                band = 0;
1.42      christos  404: #ifdef DIAGNOSTIC
                    405:                printf("bad siginfo code %d in pipe notification.\n", code);
                    406: #endif
                    407:                break;
                    408:        }
1.43      jdolecek  409:
1.44      christos  410:        fownsignal(sigp->pipe_pgid, SIGIO, code, band, selp);
1.1       jdolecek  411: }
                    412:
                    413: /* ARGSUSED */
1.2       jdolecek  414: static int
1.77      yamt      415: pipe_read(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
                    416:     int flags)
1.1       jdolecek  417: {
                    418:        struct pipe *rpipe = (struct pipe *) fp->f_data;
1.35      pk        419:        struct pipebuf *bp = &rpipe->pipe_buffer;
1.1       jdolecek  420:        int error;
1.2       jdolecek  421:        size_t nread = 0;
                    422:        size_t size;
                    423:        size_t ocnt;
1.1       jdolecek  424:
1.80      ad        425:        mutex_enter(&rpipe->pipe_lock);
1.1       jdolecek  426:        ++rpipe->pipe_busy;
1.35      pk        427:        ocnt = bp->cnt;
1.28      jdolecek  428:
1.35      pk        429: again:
1.1       jdolecek  430:        error = pipelock(rpipe, 1);
                    431:        if (error)
                    432:                goto unlocked_error;
1.2       jdolecek  433:
1.1       jdolecek  434:        while (uio->uio_resid) {
                    435:                /*
                    436:                 * normal pipe buffer receive
                    437:                 */
1.35      pk        438:                if (bp->cnt > 0) {
                    439:                        size = bp->size - bp->out;
                    440:                        if (size > bp->cnt)
                    441:                                size = bp->cnt;
1.2       jdolecek  442:                        if (size > uio->uio_resid)
                    443:                                size = uio->uio_resid;
1.1       jdolecek  444:
1.79      christos  445:                        error = uiomove((char *)bp->buffer + bp->out, size, uio);
1.1       jdolecek  446:                        if (error)
                    447:                                break;
                    448:
1.35      pk        449:                        bp->out += size;
                    450:                        if (bp->out >= bp->size)
                    451:                                bp->out = 0;
1.1       jdolecek  452:
1.35      pk        453:                        bp->cnt -= size;
1.1       jdolecek  454:
                    455:                        /*
                    456:                         * If there is no more to read in the pipe, reset
                    457:                         * its pointers to the beginning.  This improves
                    458:                         * cache hit stats.
                    459:                         */
1.35      pk        460:                        if (bp->cnt == 0) {
                    461:                                bp->in = 0;
                    462:                                bp->out = 0;
1.1       jdolecek  463:                        }
                    464:                        nread += size;
1.85      ad        465:                        continue;
                    466:                }
                    467:
                    468:                /* Lock to see up-to-date value of pipe_status. */
                    469:                mutex_enter(&rpipe->pipe_lock);
                    470:
1.1       jdolecek  471: #ifndef PIPE_NODIRECT
1.85      ad        472:                if ((rpipe->pipe_state & PIPE_DIRECTR) != 0) {
1.35      pk        473:                        /*
                    474:                         * Direct copy, bypassing a kernel buffer.
                    475:                         */
1.79      christos  476:                        void *  va;
1.35      pk        477:
                    478:                        KASSERT(rpipe->pipe_state & PIPE_DIRECTW);
1.85      ad        479:                        mutex_exit(&rpipe->pipe_lock);
1.35      pk        480:
                    481:                        size = rpipe->pipe_map.cnt;
1.2       jdolecek  482:                        if (size > uio->uio_resid)
                    483:                                size = uio->uio_resid;
1.1       jdolecek  484:
1.79      christos  485:                        va = (char *)rpipe->pipe_map.kva + rpipe->pipe_map.pos;
1.1       jdolecek  486:                        error = uiomove(va, size, uio);
                    487:                        if (error)
                    488:                                break;
                    489:                        nread += size;
                    490:                        rpipe->pipe_map.pos += size;
                    491:                        rpipe->pipe_map.cnt -= size;
                    492:                        if (rpipe->pipe_map.cnt == 0) {
1.80      ad        493:                                mutex_enter(&rpipe->pipe_lock);
1.35      pk        494:                                rpipe->pipe_state &= ~PIPE_DIRECTR;
1.80      ad        495:                                cv_broadcast(&rpipe->pipe_cv);
                    496:                                mutex_exit(&rpipe->pipe_lock);
1.1       jdolecek  497:                        }
1.85      ad        498:                        continue;
                    499:                }
1.1       jdolecek  500: #endif
1.85      ad        501:                /*
                    502:                 * Break if some data was read.
                    503:                 */
                    504:                if (nread > 0) {
                    505:                        mutex_exit(&rpipe->pipe_lock);
                    506:                        break;
                    507:                }
1.1       jdolecek  508:
1.85      ad        509:                /*
                    510:                 * detect EOF condition
                    511:                 * read returns 0 on EOF, no need to set error
                    512:                 */
                    513:                if (rpipe->pipe_state & PIPE_EOF) {
                    514:                        mutex_exit(&rpipe->pipe_lock);
                    515:                        break;
                    516:                }
1.36      pk        517:
1.85      ad        518:                /*
                    519:                 * don't block on non-blocking I/O
                    520:                 */
                    521:                if (fp->f_flag & FNONBLOCK) {
                    522:                        mutex_exit(&rpipe->pipe_lock);
                    523:                        error = EAGAIN;
                    524:                        break;
                    525:                }
1.1       jdolecek  526:
1.85      ad        527:                /*
                    528:                 * Unlock the pipe buffer for our remaining processing.
                    529:                 * We will either break out with an error or we will
                    530:                 * sleep and relock to loop.
                    531:                 */
                    532:                pipeunlock(rpipe);
1.2       jdolecek  533:
1.85      ad        534:                /*
                    535:                 * Re-check to see if more direct writes are pending.
                    536:                 */
                    537:                if ((rpipe->pipe_state & PIPE_DIRECTR) != 0)
                    538:                        goto again;
1.1       jdolecek  539:
1.85      ad        540:                /*
                    541:                 * We want to read more, wake up select/poll.
                    542:                 */
                    543:                pipeselwakeup(rpipe, rpipe->pipe_peer, POLL_IN);
1.35      pk        544:
1.85      ad        545:                /*
                    546:                 * If the "write-side" is blocked, wake it up now.
                    547:                 */
                    548:                if (rpipe->pipe_state & PIPE_WANTW) {
                    549:                        rpipe->pipe_state &= ~PIPE_WANTW;
                    550:                        cv_broadcast(&rpipe->pipe_cv);
                    551:                }
1.2       jdolecek  552:
1.85      ad        553:                /* Now wait until the pipe is filled */
                    554:                rpipe->pipe_state |= PIPE_WANTR;
                    555:                error = cv_wait_sig(&rpipe->pipe_cv, &rpipe->pipe_lock);
                    556:                if (error != 0)
                    557:                        goto unlocked_error;
                    558:                goto again;
1.1       jdolecek  559:        }
1.35      pk        560:
                    561:        if (error == 0)
1.73      kardel    562:                getmicrotime(&rpipe->pipe_atime);
1.35      pk        563:
1.80      ad        564:        mutex_enter(&rpipe->pipe_lock);
1.1       jdolecek  565:        pipeunlock(rpipe);
                    566:
                    567: unlocked_error:
                    568:        --rpipe->pipe_busy;
                    569:
                    570:        /*
1.2       jdolecek  571:         * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0.
1.1       jdolecek  572:         */
1.2       jdolecek  573:        if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) {
                    574:                rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW);
1.80      ad        575:                cv_broadcast(&rpipe->pipe_cv);
1.35      pk        576:        } else if (bp->cnt < MINPIPESIZE) {
1.1       jdolecek  577:                /*
                    578:                 * Handle write blocking hysteresis.
                    579:                 */
                    580:                if (rpipe->pipe_state & PIPE_WANTW) {
                    581:                        rpipe->pipe_state &= ~PIPE_WANTW;
1.80      ad        582:                        cv_broadcast(&rpipe->pipe_cv);
1.1       jdolecek  583:                }
                    584:        }
                    585:
1.2       jdolecek  586:        /*
                    587:         * If anything was read off the buffer, signal to the writer it's
                    588:         * possible to write more data. Also send signal if we are here for the
                    589:         * first time after last write.
                    590:         */
1.35      pk        591:        if ((bp->size - bp->cnt) >= PIPE_BUF
                    592:            && (ocnt != bp->cnt || (rpipe->pipe_state & PIPE_SIGNALR))) {
1.66      christos  593:                pipeselwakeup(rpipe, rpipe->pipe_peer, POLL_OUT);
1.2       jdolecek  594:                rpipe->pipe_state &= ~PIPE_SIGNALR;
                    595:        }
1.1       jdolecek  596:
1.80      ad        597:        mutex_exit(&rpipe->pipe_lock);
1.1       jdolecek  598:        return (error);
                    599: }
                    600:
1.2       jdolecek  601: #ifndef PIPE_NODIRECT
                    602: /*
                    603:  * Allocate structure for loan transfer.
                    604:  */
1.18      chs       605: static int
1.68      thorpej   606: pipe_loan_alloc(struct pipe *wpipe, int npages)
1.2       jdolecek  607: {
1.18      chs       608:        vsize_t len;
                    609:
                    610:        len = (vsize_t)npages << PAGE_SHIFT;
1.65      yamt      611:        wpipe->pipe_map.kva = uvm_km_alloc(kernel_map, len, 0,
                    612:            UVM_KMF_VAONLY | UVM_KMF_WAITVA);
1.22      thorpej   613:        if (wpipe->pipe_map.kva == 0)
1.2       jdolecek  614:                return (ENOMEM);
                    615:
1.18      chs       616:        amountpipekva += len;
1.2       jdolecek  617:        wpipe->pipe_map.npages = npages;
1.18      chs       618:        wpipe->pipe_map.pgs = malloc(npages * sizeof(struct vm_page *), M_PIPE,
                    619:            M_WAITOK);
1.2       jdolecek  620:        return (0);
                    621: }
                    622:
                    623: /*
                    624:  * Free resources allocated for loan transfer.
                    625:  */
                    626: static void
1.68      thorpej   627: pipe_loan_free(struct pipe *wpipe)
1.2       jdolecek  628: {
1.18      chs       629:        vsize_t len;
                    630:
                    631:        len = (vsize_t)wpipe->pipe_map.npages << PAGE_SHIFT;
1.65      yamt      632:        uvm_km_free(kernel_map, wpipe->pipe_map.kva, len, UVM_KMF_VAONLY);
1.22      thorpej   633:        wpipe->pipe_map.kva = 0;
1.18      chs       634:        amountpipekva -= len;
                    635:        free(wpipe->pipe_map.pgs, M_PIPE);
                    636:        wpipe->pipe_map.pgs = NULL;
1.2       jdolecek  637: }
                    638:
                    639: /*
                    640:  * NetBSD direct write, using uvm_loan() mechanism.
                    641:  * This implements the pipe buffer write mechanism.  Note that only
                    642:  * a direct write OR a normal pipe write can be pending at any given time.
                    643:  * If there are any characters in the pipe buffer, the direct write will
                    644:  * be deferred until the receiving process grabs all of the bytes from
                    645:  * the pipe buffer.  Then the direct mapping write is set-up.
1.35      pk        646:  *
                    647:  * Called with the long-term pipe lock held.
1.2       jdolecek  648:  */
1.18      chs       649: static int
1.77      yamt      650: pipe_direct_write(struct file *fp, struct pipe *wpipe, struct uio *uio)
1.2       jdolecek  651: {
1.5       jdolecek  652:        int error, npages, j;
1.18      chs       653:        struct vm_page **pgs;
1.2       jdolecek  654:        vaddr_t bbase, kva, base, bend;
                    655:        vsize_t blen, bcnt;
1.5       jdolecek  656:        voff_t bpos;
                    657:
1.35      pk        658:        KASSERT(wpipe->pipe_map.cnt == 0);
1.2       jdolecek  659:
                    660:        /*
1.14      jdolecek  661:         * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers
                    662:         * not aligned to PAGE_SIZE.
1.5       jdolecek  663:         */
1.14      jdolecek  664:        bbase = (vaddr_t)uio->uio_iov->iov_base;
1.5       jdolecek  665:        base = trunc_page(bbase);
1.14      jdolecek  666:        bend = round_page(bbase + uio->uio_iov->iov_len);
1.5       jdolecek  667:        blen = bend - base;
                    668:        bpos = bbase - base;
                    669:
                    670:        if (blen > PIPE_DIRECT_CHUNK) {
                    671:                blen = PIPE_DIRECT_CHUNK;
                    672:                bend = base + blen;
                    673:                bcnt = PIPE_DIRECT_CHUNK - bpos;
1.18      chs       674:        } else {
1.14      jdolecek  675:                bcnt = uio->uio_iov->iov_len;
1.18      chs       676:        }
                    677:        npages = blen >> PAGE_SHIFT;
1.5       jdolecek  678:
                    679:        /*
                    680:         * Free the old kva if we need more pages than we have
                    681:         * allocated.
1.2       jdolecek  682:         */
1.35      pk        683:        if (wpipe->pipe_map.kva != 0 && npages > wpipe->pipe_map.npages)
1.5       jdolecek  684:                pipe_loan_free(wpipe);
1.2       jdolecek  685:
1.5       jdolecek  686:        /* Allocate new kva. */
1.22      thorpej   687:        if (wpipe->pipe_map.kva == 0) {
1.18      chs       688:                error = pipe_loan_alloc(wpipe, npages);
1.35      pk        689:                if (error)
                    690:                        return (error);
1.18      chs       691:        }
                    692:
1.5       jdolecek  693:        /* Loan the write buffer memory from writer process */
1.18      chs       694:        pgs = wpipe->pipe_map.pgs;
1.71      yamt      695:        error = uvm_loan(&uio->uio_vmspace->vm_map, base, blen,
1.35      pk        696:                         pgs, UVM_LOAN_TOPAGE);
1.18      chs       697:        if (error) {
1.35      pk        698:                pipe_loan_free(wpipe);
1.61      yamt      699:                return (ENOMEM); /* so that caller fallback to ordinary write */
1.18      chs       700:        }
                    701:
1.5       jdolecek  702:        /* Enter the loaned pages to kva */
                    703:        kva = wpipe->pipe_map.kva;
1.18      chs       704:        for (j = 0; j < npages; j++, kva += PAGE_SIZE) {
                    705:                pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pgs[j]), VM_PROT_READ);
                    706:        }
1.12      jdolecek  707:        pmap_update(pmap_kernel());
1.2       jdolecek  708:
1.35      pk        709:        /* Now we can put the pipe in direct write mode */
                    710:        wpipe->pipe_map.pos = bpos;
                    711:        wpipe->pipe_map.cnt = bcnt;
                    712:
                    713:        /*
1.85      ad        714:         * But before we can let someone do a direct read, we
                    715:         * have to wait until the pipe is drained.  Release the
                    716:         * pipe lock while we wait.
1.35      pk        717:         */
1.80      ad        718:        mutex_enter(&wpipe->pipe_lock);
1.85      ad        719:        wpipe->pipe_state |= PIPE_DIRECTW;
1.35      pk        720:        pipeunlock(wpipe);
                    721:
                    722:        while (error == 0 && wpipe->pipe_buffer.cnt > 0) {
                    723:                if (wpipe->pipe_state & PIPE_WANTR) {
                    724:                        wpipe->pipe_state &= ~PIPE_WANTR;
1.80      ad        725:                        cv_broadcast(&wpipe->pipe_cv);
1.35      pk        726:                }
                    727:
                    728:                wpipe->pipe_state |= PIPE_WANTW;
1.80      ad        729:                error = cv_wait_sig(&wpipe->pipe_cv, &wpipe->pipe_lock);
1.35      pk        730:                if (error == 0 && wpipe->pipe_state & PIPE_EOF)
1.5       jdolecek  731:                        error = EPIPE;
1.35      pk        732:        }
                    733:
                    734:        /* Pipe is drained; next read will off the direct buffer */
                    735:        wpipe->pipe_state |= PIPE_DIRECTR;
                    736:
                    737:        /* Wait until the reader is done */
                    738:        while (error == 0 && (wpipe->pipe_state & PIPE_DIRECTR)) {
1.5       jdolecek  739:                if (wpipe->pipe_state & PIPE_WANTR) {
                    740:                        wpipe->pipe_state &= ~PIPE_WANTR;
1.80      ad        741:                        cv_broadcast(&wpipe->pipe_cv);
1.2       jdolecek  742:                }
1.66      christos  743:                pipeselwakeup(wpipe, wpipe, POLL_IN);
1.80      ad        744:                error = cv_wait_sig(&wpipe->pipe_cv, &wpipe->pipe_lock);
1.35      pk        745:                if (error == 0 && wpipe->pipe_state & PIPE_EOF)
                    746:                        error = EPIPE;
1.5       jdolecek  747:        }
                    748:
1.35      pk        749:        /* Take pipe out of direct write mode */
                    750:        wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTR);
1.2       jdolecek  751:
1.35      pk        752:        /* Acquire the pipe lock and cleanup */
                    753:        (void)pipelock(wpipe, 0);
1.85      ad        754:
1.21      chs       755:        if (pgs != NULL) {
                    756:                pmap_kremove(wpipe->pipe_map.kva, blen);
1.18      chs       757:                uvm_unloan(pgs, npages, UVM_LOAN_TOPAGE);
1.21      chs       758:        }
1.5       jdolecek  759:        if (error || amountpipekva > maxpipekva)
                    760:                pipe_loan_free(wpipe);
                    761:
1.15      jdolecek  762:        if (error) {
1.66      christos  763:                pipeselwakeup(wpipe, wpipe, POLL_ERR);
1.2       jdolecek  764:
1.5       jdolecek  765:                /*
1.15      jdolecek  766:                 * If nothing was read from what we offered, return error
1.18      chs       767:                 * straight on. Otherwise update uio resid first. Caller
1.15      jdolecek  768:                 * will deal with the error condition, returning short
                    769:                 * write, error, or restarting the write(2) as appropriate.
1.5       jdolecek  770:                 */
1.15      jdolecek  771:                if (wpipe->pipe_map.cnt == bcnt) {
1.35      pk        772:                        wpipe->pipe_map.cnt = 0;
1.80      ad        773:                        cv_broadcast(&wpipe->pipe_cv);
1.15      jdolecek  774:                        return (error);
1.2       jdolecek  775:                }
                    776:
1.15      jdolecek  777:                bcnt -= wpipe->pipe_map.cnt;
1.5       jdolecek  778:        }
1.2       jdolecek  779:
1.18      chs       780:        uio->uio_resid -= bcnt;
1.8       jdolecek  781:        /* uio_offset not updated, not set/used for write(2) */
1.18      chs       782:        uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + bcnt;
1.14      jdolecek  783:        uio->uio_iov->iov_len -= bcnt;
                    784:        if (uio->uio_iov->iov_len == 0) {
                    785:                uio->uio_iov++;
                    786:                uio->uio_iovcnt--;
                    787:        }
1.2       jdolecek  788:
1.35      pk        789:        wpipe->pipe_map.cnt = 0;
1.15      jdolecek  790:        return (error);
1.2       jdolecek  791: }
                    792: #endif /* !PIPE_NODIRECT */
                    793:
                    794: static int
1.77      yamt      795: pipe_write(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
                    796:     int flags)
1.1       jdolecek  797: {
                    798:        struct pipe *wpipe, *rpipe;
1.35      pk        799:        struct pipebuf *bp;
                    800:        int error;
1.1       jdolecek  801:
1.35      pk        802:        /* We want to write to our peer */
1.1       jdolecek  803:        rpipe = (struct pipe *) fp->f_data;
1.35      pk        804:
                    805: retry:
                    806:        error = 0;
1.80      ad        807:        mutex_enter(&rpipe->pipe_lock);
1.1       jdolecek  808:        wpipe = rpipe->pipe_peer;
                    809:
                    810:        /*
1.35      pk        811:         * Detect loss of pipe read side, issue SIGPIPE if lost.
1.1       jdolecek  812:         */
1.35      pk        813:        if (wpipe == NULL)
                    814:                error = EPIPE;
1.80      ad        815:        else if (mutex_tryenter(&wpipe->pipe_lock) == 0) {
1.35      pk        816:                /* Deal with race for peer */
1.80      ad        817:                mutex_exit(&rpipe->pipe_lock);
1.85      ad        818:                /* XXX Might be about to deadlock w/kernel_lock. */
                    819:                yield();
1.35      pk        820:                goto retry;
                    821:        } else if ((wpipe->pipe_state & PIPE_EOF) != 0) {
1.80      ad        822:                mutex_exit(&wpipe->pipe_lock);
1.35      pk        823:                error = EPIPE;
1.24      jdolecek  824:        }
1.2       jdolecek  825:
1.80      ad        826:        mutex_exit(&rpipe->pipe_lock);
1.35      pk        827:        if (error != 0)
                    828:                return (error);
                    829:
1.1       jdolecek  830:        ++wpipe->pipe_busy;
                    831:
1.35      pk        832:        /* Aquire the long-term pipe lock */
                    833:        if ((error = pipelock(wpipe,1)) != 0) {
                    834:                --wpipe->pipe_busy;
                    835:                if (wpipe->pipe_busy == 0
                    836:                    && (wpipe->pipe_state & PIPE_WANTCLOSE)) {
                    837:                        wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR);
1.80      ad        838:                        cv_broadcast(&wpipe->pipe_cv);
1.35      pk        839:                }
1.80      ad        840:                mutex_exit(&wpipe->pipe_lock);
1.35      pk        841:                return (error);
                    842:        }
                    843:
                    844:        bp = &wpipe->pipe_buffer;
                    845:
1.1       jdolecek  846:        /*
1.35      pk        847:         * If it is advantageous to resize the pipe buffer, do so.
1.1       jdolecek  848:         */
                    849:        if ((uio->uio_resid > PIPE_SIZE) &&
1.35      pk        850:            (nbigpipe < maxbigpipes) &&
1.2       jdolecek  851: #ifndef PIPE_NODIRECT
1.35      pk        852:            (wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
1.2       jdolecek  853: #endif
1.35      pk        854:            (bp->size <= PIPE_SIZE) && (bp->cnt == 0)) {
1.1       jdolecek  855:
1.35      pk        856:                if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
                    857:                        nbigpipe++;
1.24      jdolecek  858:        }
1.1       jdolecek  859:
                    860:        while (uio->uio_resid) {
1.26      thorpej   861:                size_t space;
1.1       jdolecek  862:
                    863: #ifndef PIPE_NODIRECT
                    864:                /*
1.35      pk        865:                 * Pipe buffered writes cannot be coincidental with
                    866:                 * direct writes.  Also, only one direct write can be
                    867:                 * in progress at any one time.  We wait until the currently
                    868:                 * executing direct write is completed before continuing.
                    869:                 *
                    870:                 * We break out if a signal occurs or the reader goes away.
                    871:                 */
                    872:                while (error == 0 && wpipe->pipe_state & PIPE_DIRECTW) {
1.80      ad        873:                        mutex_enter(&wpipe->pipe_lock);
1.35      pk        874:                        if (wpipe->pipe_state & PIPE_WANTR) {
                    875:                                wpipe->pipe_state &= ~PIPE_WANTR;
1.80      ad        876:                                cv_broadcast(&wpipe->pipe_cv);
1.35      pk        877:                        }
                    878:                        pipeunlock(wpipe);
1.80      ad        879:                        error = cv_wait_sig(&wpipe->pipe_cv,
                    880:                            &wpipe->pipe_lock);
1.35      pk        881:
                    882:                        (void)pipelock(wpipe, 0);
                    883:                        if (wpipe->pipe_state & PIPE_EOF)
                    884:                                error = EPIPE;
                    885:                }
                    886:                if (error)
                    887:                        break;
                    888:
                    889:                /*
1.1       jdolecek  890:                 * If the transfer is large, we can gain performance if
                    891:                 * we do process-to-process copies directly.
                    892:                 * If the write is non-blocking, we don't use the
                    893:                 * direct write mechanism.
                    894:                 *
                    895:                 * The direct write mechanism will detect the reader going
                    896:                 * away on us.
                    897:                 */
1.14      jdolecek  898:                if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
1.1       jdolecek  899:                    (fp->f_flag & FNONBLOCK) == 0 &&
1.2       jdolecek  900:                    (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) {
1.42      christos  901:                        error = pipe_direct_write(fp, wpipe, uio);
1.5       jdolecek  902:
                    903:                        /*
1.49      wiz       904:                         * Break out if error occurred, unless it's ENOMEM.
1.14      jdolecek  905:                         * ENOMEM means we failed to allocate some resources
                    906:                         * for direct write, so we just fallback to ordinary
                    907:                         * write. If the direct write was successful,
                    908:                         * process rest of data via ordinary write.
1.5       jdolecek  909:                         */
1.35      pk        910:                        if (error == 0)
1.14      jdolecek  911:                                continue;
                    912:
1.5       jdolecek  913:                        if (error != ENOMEM)
1.1       jdolecek  914:                                break;
                    915:                }
1.2       jdolecek  916: #endif /* PIPE_NODIRECT */
1.1       jdolecek  917:
1.35      pk        918:                space = bp->size - bp->cnt;
1.1       jdolecek  919:
                    920:                /* Writes of size <= PIPE_BUF must be atomic. */
1.14      jdolecek  921:                if ((space < uio->uio_resid) && (uio->uio_resid <= PIPE_BUF))
1.1       jdolecek  922:                        space = 0;
                    923:
1.16      mycroft   924:                if (space > 0) {
1.2       jdolecek  925:                        int size;       /* Transfer size */
                    926:                        int segsize;    /* first segment to transfer */
                    927:
                    928:                        /*
                    929:                         * Transfer size is minimum of uio transfer
                    930:                         * and free space in pipe buffer.
                    931:                         */
                    932:                        if (space > uio->uio_resid)
                    933:                                size = uio->uio_resid;
                    934:                        else
                    935:                                size = space;
                    936:                        /*
1.63      perry     937:                         * First segment to transfer is minimum of
1.2       jdolecek  938:                         * transfer size and contiguous space in
                    939:                         * pipe buffer.  If first segment to transfer
                    940:                         * is less than the transfer size, we've got
                    941:                         * a wraparound in the buffer.
                    942:                         */
1.35      pk        943:                        segsize = bp->size - bp->in;
1.2       jdolecek  944:                        if (segsize > size)
                    945:                                segsize = size;
1.18      chs       946:
1.2       jdolecek  947:                        /* Transfer first segment */
1.79      christos  948:                        error = uiomove((char *)bp->buffer + bp->in, segsize,
                    949:                            uio);
1.18      chs       950:
1.2       jdolecek  951:                        if (error == 0 && segsize < size) {
1.63      perry     952:                                /*
1.2       jdolecek  953:                                 * Transfer remaining part now, to
                    954:                                 * support atomic writes.  Wraparound
                    955:                                 * happened.
                    956:                                 */
                    957: #ifdef DEBUG
1.35      pk        958:                                if (bp->in + segsize != bp->size)
1.2       jdolecek  959:                                        panic("Expected pipe buffer wraparound disappeared");
                    960: #endif
1.18      chs       961:
1.79      christos  962:                                error = uiomove(bp->buffer,
                    963:                                    size - segsize, uio);
1.2       jdolecek  964:                        }
1.35      pk        965:                        if (error)
                    966:                                break;
                    967:
                    968:                        bp->in += size;
                    969:                        if (bp->in >= bp->size) {
1.2       jdolecek  970: #ifdef DEBUG
1.35      pk        971:                                if (bp->in != size - segsize + bp->size)
                    972:                                        panic("Expected wraparound bad");
1.2       jdolecek  973: #endif
1.35      pk        974:                                bp->in = size - segsize;
                    975:                        }
1.18      chs       976:
1.35      pk        977:                        bp->cnt += size;
1.2       jdolecek  978: #ifdef DEBUG
1.35      pk        979:                        if (bp->cnt > bp->size)
                    980:                                panic("Pipe buffer overflow");
1.2       jdolecek  981: #endif
1.1       jdolecek  982:                } else {
                    983:                        /*
                    984:                         * If the "read-side" has been blocked, wake it up now.
                    985:                         */
1.80      ad        986:                        mutex_enter(&wpipe->pipe_lock);
1.1       jdolecek  987:                        if (wpipe->pipe_state & PIPE_WANTR) {
                    988:                                wpipe->pipe_state &= ~PIPE_WANTR;
1.80      ad        989:                                cv_broadcast(&wpipe->pipe_cv);
1.1       jdolecek  990:                        }
1.80      ad        991:                        mutex_exit(&wpipe->pipe_lock);
1.1       jdolecek  992:
                    993:                        /*
                    994:                         * don't block on non-blocking I/O
                    995:                         */
                    996:                        if (fp->f_flag & FNONBLOCK) {
                    997:                                error = EAGAIN;
                    998:                                break;
                    999:                        }
                   1000:
                   1001:                        /*
                   1002:                         * We have no more space and have something to offer,
                   1003:                         * wake up select/poll.
                   1004:                         */
1.35      pk       1005:                        if (bp->cnt)
1.66      christos 1006:                                pipeselwakeup(wpipe, wpipe, POLL_OUT);
1.1       jdolecek 1007:
1.80      ad       1008:                        mutex_enter(&wpipe->pipe_lock);
1.35      pk       1009:                        pipeunlock(wpipe);
1.1       jdolecek 1010:                        wpipe->pipe_state |= PIPE_WANTW;
1.80      ad       1011:                        error = cv_wait_sig(&wpipe->pipe_cv,
                   1012:                            &wpipe->pipe_lock);
1.35      pk       1013:                        (void)pipelock(wpipe, 0);
1.1       jdolecek 1014:                        if (error != 0)
                   1015:                                break;
                   1016:                        /*
                   1017:                         * If read side wants to go away, we just issue a signal
                   1018:                         * to ourselves.
                   1019:                         */
                   1020:                        if (wpipe->pipe_state & PIPE_EOF) {
                   1021:                                error = EPIPE;
                   1022:                                break;
1.18      chs      1023:                        }
1.1       jdolecek 1024:                }
                   1025:        }
                   1026:
1.80      ad       1027:        mutex_enter(&wpipe->pipe_lock);
1.1       jdolecek 1028:        --wpipe->pipe_busy;
1.2       jdolecek 1029:        if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) {
                   1030:                wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR);
1.80      ad       1031:                cv_broadcast(&wpipe->pipe_cv);
1.35      pk       1032:        } else if (bp->cnt > 0) {
1.1       jdolecek 1033:                /*
                   1034:                 * If we have put any characters in the buffer, we wake up
                   1035:                 * the reader.
                   1036:                 */
                   1037:                if (wpipe->pipe_state & PIPE_WANTR) {
                   1038:                        wpipe->pipe_state &= ~PIPE_WANTR;
1.80      ad       1039:                        cv_broadcast(&wpipe->pipe_cv);
1.1       jdolecek 1040:                }
                   1041:        }
                   1042:
                   1043:        /*
                   1044:         * Don't return EPIPE if I/O was successful
                   1045:         */
1.35      pk       1046:        if (error == EPIPE && bp->cnt == 0 && uio->uio_resid == 0)
1.1       jdolecek 1047:                error = 0;
                   1048:
                   1049:        if (error == 0)
1.73      kardel   1050:                getmicrotime(&wpipe->pipe_mtime);
1.1       jdolecek 1051:
                   1052:        /*
1.2       jdolecek 1053:         * We have something to offer, wake up select/poll.
                   1054:         * wpipe->pipe_map.cnt is always 0 in this point (direct write
1.14      jdolecek 1055:         * is only done synchronously), so check only wpipe->pipe_buffer.cnt
1.1       jdolecek 1056:         */
1.35      pk       1057:        if (bp->cnt)
1.66      christos 1058:                pipeselwakeup(wpipe, wpipe, POLL_OUT);
1.1       jdolecek 1059:
1.2       jdolecek 1060:        /*
                   1061:         * Arrange for next read(2) to do a signal.
                   1062:         */
                   1063:        wpipe->pipe_state |= PIPE_SIGNALR;
                   1064:
1.35      pk       1065:        pipeunlock(wpipe);
1.80      ad       1066:        mutex_exit(&wpipe->pipe_lock);
1.1       jdolecek 1067:        return (error);
                   1068: }
                   1069:
                   1070: /*
                   1071:  * we implement a very minimal set of ioctls for compatibility with sockets.
                   1072:  */
                   1073: int
1.69      christos 1074: pipe_ioctl(struct file *fp, u_long cmd, void *data, struct lwp *l)
1.1       jdolecek 1075: {
1.35      pk       1076:        struct pipe *pipe = (struct pipe *)fp->f_data;
1.69      christos 1077:        struct proc *p = l->l_proc;
1.1       jdolecek 1078:
                   1079:        switch (cmd) {
                   1080:
                   1081:        case FIONBIO:
                   1082:                return (0);
                   1083:
                   1084:        case FIOASYNC:
1.80      ad       1085:                mutex_enter(&pipe->pipe_lock);
1.1       jdolecek 1086:                if (*(int *)data) {
1.35      pk       1087:                        pipe->pipe_state |= PIPE_ASYNC;
1.1       jdolecek 1088:                } else {
1.35      pk       1089:                        pipe->pipe_state &= ~PIPE_ASYNC;
1.1       jdolecek 1090:                }
1.80      ad       1091:                mutex_exit(&pipe->pipe_lock);
1.1       jdolecek 1092:                return (0);
                   1093:
                   1094:        case FIONREAD:
1.80      ad       1095:                mutex_enter(&pipe->pipe_lock);
1.2       jdolecek 1096: #ifndef PIPE_NODIRECT
1.35      pk       1097:                if (pipe->pipe_state & PIPE_DIRECTW)
                   1098:                        *(int *)data = pipe->pipe_map.cnt;
1.1       jdolecek 1099:                else
1.2       jdolecek 1100: #endif
1.35      pk       1101:                        *(int *)data = pipe->pipe_buffer.cnt;
1.80      ad       1102:                mutex_exit(&pipe->pipe_lock);
1.1       jdolecek 1103:                return (0);
                   1104:
1.59      wrstuden 1105:        case FIONWRITE:
                   1106:                /* Look at other side */
1.83      ad       1107:                rw_enter(&pipe_peer_lock, RW_READER);
1.59      wrstuden 1108:                pipe = pipe->pipe_peer;
1.80      ad       1109:                mutex_enter(&pipe->pipe_lock);
1.59      wrstuden 1110: #ifndef PIPE_NODIRECT
                   1111:                if (pipe->pipe_state & PIPE_DIRECTW)
                   1112:                        *(int *)data = pipe->pipe_map.cnt;
                   1113:                else
                   1114: #endif
                   1115:                        *(int *)data = pipe->pipe_buffer.cnt;
1.80      ad       1116:                mutex_exit(&pipe->pipe_lock);
1.82      ad       1117:                rw_exit(&pipe_peer_lock);
1.59      wrstuden 1118:                return (0);
                   1119:
                   1120:        case FIONSPACE:
                   1121:                /* Look at other side */
1.83      ad       1122:                rw_enter(&pipe_peer_lock, RW_READER);
1.59      wrstuden 1123:                pipe = pipe->pipe_peer;
1.80      ad       1124:                mutex_enter(&pipe->pipe_lock);
1.59      wrstuden 1125: #ifndef PIPE_NODIRECT
                   1126:                /*
                   1127:                 * If we're in direct-mode, we don't really have a
                   1128:                 * send queue, and any other write will block. Thus
                   1129:                 * zero seems like the best answer.
                   1130:                 */
                   1131:                if (pipe->pipe_state & PIPE_DIRECTW)
                   1132:                        *(int *)data = 0;
                   1133:                else
                   1134: #endif
                   1135:                        *(int *)data = pipe->pipe_buffer.size -
1.82      ad       1136:                            pipe->pipe_buffer.cnt;
1.80      ad       1137:                mutex_exit(&pipe->pipe_lock);
1.82      ad       1138:                rw_exit(&pipe_peer_lock);
1.59      wrstuden 1139:                return (0);
                   1140:
1.2       jdolecek 1141:        case TIOCSPGRP:
1.43      jdolecek 1142:        case FIOSETOWN:
                   1143:                return fsetown(p, &pipe->pipe_pgid, cmd, data);
1.2       jdolecek 1144:
                   1145:        case TIOCGPGRP:
1.43      jdolecek 1146:        case FIOGETOWN:
                   1147:                return fgetown(p, pipe->pipe_pgid, cmd, data);
1.1       jdolecek 1148:
                   1149:        }
1.25      atatat   1150:        return (EPASSTHROUGH);
1.1       jdolecek 1151: }
                   1152:
                   1153: int
1.69      christos 1154: pipe_poll(struct file *fp, int events, struct lwp *l)
1.1       jdolecek 1155: {
                   1156:        struct pipe *rpipe = (struct pipe *)fp->f_data;
                   1157:        struct pipe *wpipe;
1.35      pk       1158:        int eof = 0;
1.1       jdolecek 1159:        int revents = 0;
                   1160:
1.35      pk       1161: retry:
1.80      ad       1162:        mutex_enter(&rpipe->pipe_lock);
1.1       jdolecek 1163:        wpipe = rpipe->pipe_peer;
1.80      ad       1164:        if (wpipe != NULL && mutex_tryenter(&wpipe->pipe_lock) == 0) {
1.35      pk       1165:                /* Deal with race for peer */
1.80      ad       1166:                mutex_exit(&rpipe->pipe_lock);
1.85      ad       1167:                /* XXX Might be about to deadlock w/kernel_lock. */
                   1168:                yield();
1.35      pk       1169:                goto retry;
                   1170:        }
                   1171:
1.1       jdolecek 1172:        if (events & (POLLIN | POLLRDNORM))
1.2       jdolecek 1173:                if ((rpipe->pipe_buffer.cnt > 0) ||
                   1174: #ifndef PIPE_NODIRECT
1.35      pk       1175:                    (rpipe->pipe_state & PIPE_DIRECTR) ||
1.2       jdolecek 1176: #endif
1.1       jdolecek 1177:                    (rpipe->pipe_state & PIPE_EOF))
                   1178:                        revents |= events & (POLLIN | POLLRDNORM);
                   1179:
1.35      pk       1180:        eof |= (rpipe->pipe_state & PIPE_EOF);
1.80      ad       1181:        mutex_exit(&rpipe->pipe_lock);
1.35      pk       1182:
                   1183:        if (wpipe == NULL)
                   1184:                revents |= events & (POLLOUT | POLLWRNORM);
                   1185:        else {
                   1186:                if (events & (POLLOUT | POLLWRNORM))
                   1187:                        if ((wpipe->pipe_state & PIPE_EOF) || (
1.2       jdolecek 1188: #ifndef PIPE_NODIRECT
1.35      pk       1189:                             (wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
1.2       jdolecek 1190: #endif
1.35      pk       1191:                             (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
                   1192:                                revents |= events & (POLLOUT | POLLWRNORM);
1.1       jdolecek 1193:
1.35      pk       1194:                eof |= (wpipe->pipe_state & PIPE_EOF);
1.80      ad       1195:                mutex_exit(&wpipe->pipe_lock);
1.35      pk       1196:        }
                   1197:
                   1198:        if (wpipe == NULL || eof)
1.1       jdolecek 1199:                revents |= POLLHUP;
                   1200:
                   1201:        if (revents == 0) {
1.35      pk       1202:                if (events & (POLLIN | POLLRDNORM))
1.69      christos 1203:                        selrecord(l, &rpipe->pipe_sel);
1.1       jdolecek 1204:
1.35      pk       1205:                if (events & (POLLOUT | POLLWRNORM))
1.69      christos 1206:                        selrecord(l, &wpipe->pipe_sel);
1.1       jdolecek 1207:        }
                   1208:
                   1209:        return (revents);
                   1210: }
                   1211:
                   1212: static int
1.77      yamt     1213: pipe_stat(struct file *fp, struct stat *ub, struct lwp *l)
1.1       jdolecek 1214: {
                   1215:        struct pipe *pipe = (struct pipe *)fp->f_data;
                   1216:
1.83      ad       1217:        rw_enter(&pipe_peer_lock, RW_READER);
1.82      ad       1218:
1.79      christos 1219:        memset((void *)ub, 0, sizeof(*ub));
1.32      jdolecek 1220:        ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
1.1       jdolecek 1221:        ub->st_blksize = pipe->pipe_buffer.size;
1.64      christos 1222:        if (ub->st_blksize == 0 && pipe->pipe_peer)
                   1223:                ub->st_blksize = pipe->pipe_peer->pipe_buffer.size;
1.1       jdolecek 1224:        ub->st_size = pipe->pipe_buffer.cnt;
1.2       jdolecek 1225:        ub->st_blocks = (ub->st_size) ? 1 : 0;
1.60      atatat   1226:        TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec);
1.2       jdolecek 1227:        TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec);
                   1228:        TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec);
1.72      elad     1229:        ub->st_uid = kauth_cred_geteuid(fp->f_cred);
                   1230:        ub->st_gid = kauth_cred_getegid(fp->f_cred);
1.82      ad       1231:
                   1232:        rw_exit(&pipe_peer_lock);
                   1233:
1.1       jdolecek 1234:        /*
                   1235:         * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
                   1236:         * XXX (st_dev, st_ino) should be unique.
                   1237:         */
                   1238:        return (0);
                   1239: }
                   1240:
                   1241: /* ARGSUSED */
                   1242: static int
1.77      yamt     1243: pipe_close(struct file *fp, struct lwp *l)
1.1       jdolecek 1244: {
1.35      pk       1245:        struct pipe *pipe = (struct pipe *)fp->f_data;
1.1       jdolecek 1246:
                   1247:        fp->f_data = NULL;
1.42      christos 1248:        pipeclose(fp, pipe);
1.1       jdolecek 1249:        return (0);
                   1250: }
                   1251:
                   1252: static void
1.68      thorpej  1253: pipe_free_kmem(struct pipe *pipe)
1.1       jdolecek 1254: {
                   1255:
1.35      pk       1256:        if (pipe->pipe_buffer.buffer != NULL) {
                   1257:                if (pipe->pipe_buffer.size > PIPE_SIZE)
1.1       jdolecek 1258:                        --nbigpipe;
1.35      pk       1259:                amountpipekva -= pipe->pipe_buffer.size;
1.2       jdolecek 1260:                uvm_km_free(kernel_map,
1.35      pk       1261:                        (vaddr_t)pipe->pipe_buffer.buffer,
1.65      yamt     1262:                        pipe->pipe_buffer.size, UVM_KMF_PAGEABLE);
1.35      pk       1263:                pipe->pipe_buffer.buffer = NULL;
1.1       jdolecek 1264:        }
                   1265: #ifndef PIPE_NODIRECT
1.35      pk       1266:        if (pipe->pipe_map.kva != 0) {
                   1267:                pipe_loan_free(pipe);
                   1268:                pipe->pipe_map.cnt = 0;
                   1269:                pipe->pipe_map.kva = 0;
                   1270:                pipe->pipe_map.pos = 0;
                   1271:                pipe->pipe_map.npages = 0;
1.1       jdolecek 1272:        }
1.2       jdolecek 1273: #endif /* !PIPE_NODIRECT */
1.1       jdolecek 1274: }
                   1275:
                   1276: /*
                   1277:  * shutdown the pipe
                   1278:  */
                   1279: static void
1.77      yamt     1280: pipeclose(struct file *fp, struct pipe *pipe)
1.1       jdolecek 1281: {
                   1282:        struct pipe *ppipe;
                   1283:
1.35      pk       1284:        if (pipe == NULL)
1.2       jdolecek 1285:                return;
                   1286:
1.82      ad       1287:  retry:
1.83      ad       1288:        rw_enter(&pipe_peer_lock, RW_WRITER);
1.80      ad       1289:        mutex_enter(&pipe->pipe_lock);
1.35      pk       1290:
1.66      christos 1291:        pipeselwakeup(pipe, pipe, POLL_HUP);
1.1       jdolecek 1292:
1.2       jdolecek 1293:        /*
                   1294:         * If the other side is blocked, wake it up saying that
                   1295:         * we want to close it down.
                   1296:         */
1.66      christos 1297:        pipe->pipe_state |= PIPE_EOF;
1.82      ad       1298:        if (pipe->pipe_busy) {
                   1299:                rw_exit(&pipe_peer_lock);
                   1300:                while (pipe->pipe_busy) {
                   1301:                        cv_broadcast(&pipe->pipe_cv);
                   1302:                        pipe->pipe_state |= PIPE_WANTCLOSE;
                   1303:                        cv_wait_sig(&pipe->pipe_cv, &pipe->pipe_lock);
                   1304:                }
                   1305:                if (!rw_tryenter(&pipe_peer_lock, RW_READER)) {
                   1306:                        mutex_exit(&pipe->pipe_lock);
1.85      ad       1307:                        /* XXX Might be about to deadlock w/kernel_lock. */
                   1308:                        yield();
1.82      ad       1309:                        goto retry;
                   1310:                }
1.2       jdolecek 1311:        }
1.1       jdolecek 1312:
1.2       jdolecek 1313:        /*
                   1314:         * Disconnect from peer
                   1315:         */
1.35      pk       1316:        if ((ppipe = pipe->pipe_peer) != NULL) {
                   1317:                /* Deal with race for peer */
1.80      ad       1318:                if (mutex_tryenter(&ppipe->pipe_lock) == 0) {
                   1319:                        mutex_exit(&pipe->pipe_lock);
1.82      ad       1320:                        rw_exit(&pipe_peer_lock);
1.85      ad       1321:                        /* XXX Might be about to deadlock w/kernel_lock. */
                   1322:                        yield();
1.35      pk       1323:                        goto retry;
                   1324:                }
1.66      christos 1325:                pipeselwakeup(ppipe, ppipe, POLL_HUP);
1.1       jdolecek 1326:
1.2       jdolecek 1327:                ppipe->pipe_state |= PIPE_EOF;
1.80      ad       1328:                cv_broadcast(&ppipe->pipe_cv);
1.2       jdolecek 1329:                ppipe->pipe_peer = NULL;
1.80      ad       1330:                mutex_exit(&ppipe->pipe_lock);
1.1       jdolecek 1331:        }
1.35      pk       1332:
1.67      yamt     1333:        KASSERT((pipe->pipe_state & PIPE_LOCKFL) == 0);
                   1334:
1.80      ad       1335:        mutex_exit(&pipe->pipe_lock);
1.82      ad       1336:        rw_exit(&pipe_peer_lock);
1.35      pk       1337:
1.2       jdolecek 1338:        /*
                   1339:         * free resources
                   1340:         */
1.35      pk       1341:        pipe_free_kmem(pipe);
1.80      ad       1342:        mutex_destroy(&pipe->pipe_lock);
                   1343:        cv_destroy(&pipe->pipe_cv);
                   1344:        cv_destroy(&pipe->pipe_lkcv);
1.86    ! ad       1345:        seldestroy(&pipe->pipe_sel);
1.35      pk       1346:        pool_put(&pipe_pool, pipe);
1.1       jdolecek 1347: }
                   1348:
1.27      jdolecek 1349: static void
                   1350: filt_pipedetach(struct knote *kn)
1.1       jdolecek 1351: {
1.35      pk       1352:        struct pipe *pipe = (struct pipe *)kn->kn_fp->f_data;
1.1       jdolecek 1353:
1.83      ad       1354:        rw_enter(&pipe_peer_lock, RW_READER);
1.82      ad       1355:
1.27      jdolecek 1356:        switch(kn->kn_filter) {
1.1       jdolecek 1357:        case EVFILT_WRITE:
1.27      jdolecek 1358:                /* need the peer structure, not our own */
1.35      pk       1359:                pipe = pipe->pipe_peer;
1.27      jdolecek 1360:
                   1361:                /* if reader end already closed, just return */
1.82      ad       1362:                if (pipe == NULL) {
                   1363:                        rw_exit(&pipe_peer_lock);
1.27      jdolecek 1364:                        return;
1.82      ad       1365:                }
1.27      jdolecek 1366:
1.1       jdolecek 1367:                break;
                   1368:        default:
1.27      jdolecek 1369:                /* nothing to do */
1.29      kristerw 1370:                break;
1.1       jdolecek 1371:        }
1.24      jdolecek 1372:
1.27      jdolecek 1373: #ifdef DIAGNOSTIC
1.35      pk       1374:        if (kn->kn_hook != pipe)
1.27      jdolecek 1375:                panic("filt_pipedetach: inconsistent knote");
                   1376: #endif
1.1       jdolecek 1377:
1.80      ad       1378:        mutex_enter(&pipe->pipe_lock);
1.35      pk       1379:        SLIST_REMOVE(&pipe->pipe_sel.sel_klist, kn, knote, kn_selnext);
1.80      ad       1380:        mutex_exit(&pipe->pipe_lock);
1.82      ad       1381:        rw_exit(&pipe_peer_lock);
1.1       jdolecek 1382: }
                   1383:
                   1384: /*ARGSUSED*/
                   1385: static int
                   1386: filt_piperead(struct knote *kn, long hint)
                   1387: {
                   1388:        struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1.82      ad       1389:        struct pipe *wpipe;
                   1390:
1.83      ad       1391:        if ((hint & NOTE_SUBMIT) == 0) {
                   1392:                rw_enter(&pipe_peer_lock, RW_READER);
                   1393:                mutex_enter(&rpipe->pipe_lock);
                   1394:        }
1.82      ad       1395:        wpipe = rpipe->pipe_peer;
1.83      ad       1396:        kn->kn_data = rpipe->pipe_buffer.cnt;
1.1       jdolecek 1397:
                   1398:        if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
                   1399:                kn->kn_data = rpipe->pipe_map.cnt;
                   1400:
                   1401:        if ((rpipe->pipe_state & PIPE_EOF) ||
                   1402:            (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1.24      jdolecek 1403:                kn->kn_flags |= EV_EOF;
1.83      ad       1404:                if ((hint & NOTE_SUBMIT) == 0) {
1.80      ad       1405:                        mutex_exit(&rpipe->pipe_lock);
1.83      ad       1406:                        rw_exit(&pipe_peer_lock);
                   1407:                }
1.1       jdolecek 1408:                return (1);
                   1409:        }
1.83      ad       1410:
                   1411:        if ((hint & NOTE_SUBMIT) == 0) {
1.80      ad       1412:                mutex_exit(&rpipe->pipe_lock);
1.83      ad       1413:                rw_exit(&pipe_peer_lock);
                   1414:        }
1.1       jdolecek 1415:        return (kn->kn_data > 0);
                   1416: }
                   1417:
                   1418: /*ARGSUSED*/
                   1419: static int
                   1420: filt_pipewrite(struct knote *kn, long hint)
                   1421: {
                   1422:        struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1.82      ad       1423:        struct pipe *wpipe;
                   1424:
1.83      ad       1425:        if ((hint & NOTE_SUBMIT) == 0) {
                   1426:                rw_enter(&pipe_peer_lock, RW_READER);
                   1427:                mutex_enter(&rpipe->pipe_lock);
                   1428:        }
1.82      ad       1429:        wpipe = rpipe->pipe_peer;
1.1       jdolecek 1430:
                   1431:        if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
                   1432:                kn->kn_data = 0;
1.63      perry    1433:                kn->kn_flags |= EV_EOF;
1.83      ad       1434:                if ((hint & NOTE_SUBMIT) == 0) {
1.80      ad       1435:                        mutex_exit(&rpipe->pipe_lock);
1.83      ad       1436:                        rw_exit(&pipe_peer_lock);
                   1437:                }
1.1       jdolecek 1438:                return (1);
                   1439:        }
                   1440:        kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
                   1441:        if (wpipe->pipe_state & PIPE_DIRECTW)
                   1442:                kn->kn_data = 0;
                   1443:
1.83      ad       1444:        if ((hint & NOTE_SUBMIT) == 0) {
1.80      ad       1445:                mutex_exit(&rpipe->pipe_lock);
1.83      ad       1446:                rw_exit(&pipe_peer_lock);
                   1447:        }
1.1       jdolecek 1448:        return (kn->kn_data >= PIPE_BUF);
                   1449: }
1.27      jdolecek 1450:
                   1451: static const struct filterops pipe_rfiltops =
                   1452:        { 1, NULL, filt_pipedetach, filt_piperead };
                   1453: static const struct filterops pipe_wfiltops =
                   1454:        { 1, NULL, filt_pipedetach, filt_pipewrite };
                   1455:
                   1456: /*ARGSUSED*/
                   1457: static int
1.77      yamt     1458: pipe_kqfilter(struct file *fp, struct knote *kn)
1.27      jdolecek 1459: {
1.35      pk       1460:        struct pipe *pipe;
1.27      jdolecek 1461:
1.83      ad       1462:        rw_enter(&pipe_peer_lock, RW_READER);
1.35      pk       1463:        pipe = (struct pipe *)kn->kn_fp->f_data;
1.82      ad       1464:
1.27      jdolecek 1465:        switch (kn->kn_filter) {
                   1466:        case EVFILT_READ:
                   1467:                kn->kn_fop = &pipe_rfiltops;
                   1468:                break;
                   1469:        case EVFILT_WRITE:
                   1470:                kn->kn_fop = &pipe_wfiltops;
1.35      pk       1471:                pipe = pipe->pipe_peer;
                   1472:                if (pipe == NULL) {
1.27      jdolecek 1473:                        /* other end of pipe has been closed */
1.82      ad       1474:                        rw_exit(&pipe_peer_lock);
1.27      jdolecek 1475:                        return (EBADF);
                   1476:                }
                   1477:                break;
                   1478:        default:
1.82      ad       1479:                rw_exit(&pipe_peer_lock);
1.27      jdolecek 1480:                return (1);
                   1481:        }
1.82      ad       1482:
1.35      pk       1483:        kn->kn_hook = pipe;
1.80      ad       1484:        mutex_enter(&pipe->pipe_lock);
1.35      pk       1485:        SLIST_INSERT_HEAD(&pipe->pipe_sel.sel_klist, kn, kn_selnext);
1.80      ad       1486:        mutex_exit(&pipe->pipe_lock);
1.82      ad       1487:        rw_exit(&pipe_peer_lock);
                   1488:
1.27      jdolecek 1489:        return (0);
                   1490: }
1.2       jdolecek 1491:
                   1492: /*
                   1493:  * Handle pipe sysctls.
                   1494:  */
1.47      atatat   1495: SYSCTL_SETUP(sysctl_kern_pipe_setup, "sysctl kern.pipe subtree setup")
                   1496: {
                   1497:
1.54      atatat   1498:        sysctl_createv(clog, 0, NULL, NULL,
                   1499:                       CTLFLAG_PERMANENT,
1.47      atatat   1500:                       CTLTYPE_NODE, "kern", NULL,
                   1501:                       NULL, 0, NULL, 0,
                   1502:                       CTL_KERN, CTL_EOL);
1.54      atatat   1503:        sysctl_createv(clog, 0, NULL, NULL,
                   1504:                       CTLFLAG_PERMANENT,
1.56      atatat   1505:                       CTLTYPE_NODE, "pipe",
                   1506:                       SYSCTL_DESCR("Pipe settings"),
1.47      atatat   1507:                       NULL, 0, NULL, 0,
                   1508:                       CTL_KERN, KERN_PIPE, CTL_EOL);
                   1509:
1.54      atatat   1510:        sysctl_createv(clog, 0, NULL, NULL,
                   1511:                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.56      atatat   1512:                       CTLTYPE_INT, "maxkvasz",
                   1513:                       SYSCTL_DESCR("Maximum amount of kernel memory to be "
                   1514:                                    "used for pipes"),
1.47      atatat   1515:                       NULL, 0, &maxpipekva, 0,
                   1516:                       CTL_KERN, KERN_PIPE, KERN_PIPE_MAXKVASZ, CTL_EOL);
1.54      atatat   1517:        sysctl_createv(clog, 0, NULL, NULL,
                   1518:                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.56      atatat   1519:                       CTLTYPE_INT, "maxloankvasz",
                   1520:                       SYSCTL_DESCR("Limit for direct transfers via page loan"),
1.47      atatat   1521:                       NULL, 0, &limitpipekva, 0,
                   1522:                       CTL_KERN, KERN_PIPE, KERN_PIPE_LIMITKVA, CTL_EOL);
1.54      atatat   1523:        sysctl_createv(clog, 0, NULL, NULL,
                   1524:                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.56      atatat   1525:                       CTLTYPE_INT, "maxbigpipes",
                   1526:                       SYSCTL_DESCR("Maximum number of \"big\" pipes"),
1.47      atatat   1527:                       NULL, 0, &maxbigpipes, 0,
                   1528:                       CTL_KERN, KERN_PIPE, KERN_PIPE_MAXBIGPIPES, CTL_EOL);
1.54      atatat   1529:        sysctl_createv(clog, 0, NULL, NULL,
                   1530:                       CTLFLAG_PERMANENT,
1.56      atatat   1531:                       CTLTYPE_INT, "nbigpipes",
                   1532:                       SYSCTL_DESCR("Number of \"big\" pipes"),
1.47      atatat   1533:                       NULL, 0, &nbigpipe, 0,
                   1534:                       CTL_KERN, KERN_PIPE, KERN_PIPE_NBIGPIPES, CTL_EOL);
1.54      atatat   1535:        sysctl_createv(clog, 0, NULL, NULL,
                   1536:                       CTLFLAG_PERMANENT,
1.56      atatat   1537:                       CTLTYPE_INT, "kvasize",
                   1538:                       SYSCTL_DESCR("Amount of kernel memory consumed by pipe "
                   1539:                                    "buffers"),
1.47      atatat   1540:                       NULL, 0, &amountpipekva, 0,
                   1541:                       CTL_KERN, KERN_PIPE, KERN_PIPE_KVASIZE, CTL_EOL);
1.2       jdolecek 1542: }

CVSweb <webmaster@jp.NetBSD.org>