[BACK]Return to kern_event.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/kern_event.c, Revision 1.40.6.1

1.40.6.1! matt        1: /*     $NetBSD: kern_event.c,v 1.41 2007/10/08 15:12:07 ad Exp $       */
1.28      kardel      2:
1.1       lukem       3: /*-
                      4:  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     17:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     18:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     19:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     20:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     21:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     22:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     23:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     24:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     25:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     26:  * SUCH DAMAGE.
                     27:  *
                     28:  * $FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp $
                     29:  */
1.14      jdolecek   30:
                     31: #include <sys/cdefs.h>
1.40.6.1! matt       32: __KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.41 2007/10/08 15:12:07 ad Exp $");
1.1       lukem      33:
                     34: #include <sys/param.h>
                     35: #include <sys/systm.h>
                     36: #include <sys/kernel.h>
                     37: #include <sys/proc.h>
1.22      perry      38: #include <sys/malloc.h>
1.1       lukem      39: #include <sys/unistd.h>
                     40: #include <sys/file.h>
                     41: #include <sys/fcntl.h>
1.3       jdolecek   42: #include <sys/select.h>
1.1       lukem      43: #include <sys/queue.h>
                     44: #include <sys/event.h>
                     45: #include <sys/eventvar.h>
                     46: #include <sys/poll.h>
1.3       jdolecek   47: #include <sys/pool.h>
1.1       lukem      48: #include <sys/protosw.h>
                     49: #include <sys/socket.h>
                     50: #include <sys/socketvar.h>
                     51: #include <sys/stat.h>
                     52: #include <sys/uio.h>
1.3       jdolecek   53: #include <sys/mount.h>
                     54: #include <sys/filedesc.h>
                     55: #include <sys/syscallargs.h>
1.27      elad       56: #include <sys/kauth.h>
1.40      ad         57: #include <sys/conf.h>
1.1       lukem      58:
1.3       jdolecek   59: static void    kqueue_wakeup(struct kqueue *kq);
1.1       lukem      60:
1.24      cube       61: static int     kqueue_scan(struct file *, size_t, struct kevent *,
1.25      christos   62:     const struct timespec *, struct lwp *, register_t *,
1.24      cube       63:     const struct kevent_ops *);
1.3       jdolecek   64: static int     kqueue_read(struct file *fp, off_t *offset, struct uio *uio,
1.27      elad       65:                    kauth_cred_t cred, int flags);
1.3       jdolecek   66: static int     kqueue_write(struct file *fp, off_t *offset, struct uio *uio,
1.27      elad       67:                    kauth_cred_t cred, int flags);
1.13      dsl        68: static int     kqueue_ioctl(struct file *fp, u_long com, void *data,
1.25      christos   69:                    struct lwp *l);
1.13      dsl        70: static int     kqueue_fcntl(struct file *fp, u_int com, void *data,
1.25      christos   71:                    struct lwp *l);
                     72: static int     kqueue_poll(struct file *fp, int events, struct lwp *l);
1.3       jdolecek   73: static int     kqueue_kqfilter(struct file *fp, struct knote *kn);
1.25      christos   74: static int     kqueue_stat(struct file *fp, struct stat *sp, struct lwp *l);
                     75: static int     kqueue_close(struct file *fp, struct lwp *l);
1.1       lukem      76:
1.21      christos   77: static const struct fileops kqueueops = {
1.3       jdolecek   78:        kqueue_read, kqueue_write, kqueue_ioctl, kqueue_fcntl, kqueue_poll,
                     79:        kqueue_stat, kqueue_close, kqueue_kqfilter
1.1       lukem      80: };
                     81:
1.3       jdolecek   82: static void    knote_attach(struct knote *kn, struct filedesc *fdp);
1.25      christos   83: static void    knote_drop(struct knote *kn, struct lwp *l,
1.3       jdolecek   84:                    struct filedesc *fdp);
                     85: static void    knote_enqueue(struct knote *kn);
                     86: static void    knote_dequeue(struct knote *kn);
1.1       lukem      87:
                     88: static void    filt_kqdetach(struct knote *kn);
                     89: static int     filt_kqueue(struct knote *kn, long hint);
                     90: static int     filt_procattach(struct knote *kn);
                     91: static void    filt_procdetach(struct knote *kn);
                     92: static int     filt_proc(struct knote *kn, long hint);
                     93: static int     filt_fileattach(struct knote *kn);
1.8       jdolecek   94: static void    filt_timerexpire(void *knx);
                     95: static int     filt_timerattach(struct knote *kn);
                     96: static void    filt_timerdetach(struct knote *kn);
                     97: static int     filt_timer(struct knote *kn, long hint);
1.1       lukem      98:
1.3       jdolecek   99: static const struct filterops kqread_filtops =
1.1       lukem     100:        { 1, NULL, filt_kqdetach, filt_kqueue };
1.3       jdolecek  101: static const struct filterops proc_filtops =
1.1       lukem     102:        { 0, filt_procattach, filt_procdetach, filt_proc };
1.3       jdolecek  103: static const struct filterops file_filtops =
1.1       lukem     104:        { 1, filt_fileattach, NULL, NULL };
1.26      yamt      105: static const struct filterops timer_filtops =
1.8       jdolecek  106:        { 0, filt_timerattach, filt_timerdetach, filt_timer };
1.1       lukem     107:
1.38      ad        108: static POOL_INIT(kqueue_pool, sizeof(struct kqueue), 0, 0, 0, "kqueuepl", NULL,
                    109:     IPL_VM);
                    110: static POOL_INIT(knote_pool, sizeof(struct knote), 0, 0, 0, "knotepl", NULL,
                    111:     IPL_VM);
1.8       jdolecek  112: static int     kq_ncallouts = 0;
                    113: static int     kq_calloutmax = (4 * 1024);
1.7       thorpej   114:
                    115: MALLOC_DEFINE(M_KEVENT, "kevent", "kevents/knotes");
1.1       lukem     116:
1.3       jdolecek  117: #define        KNOTE_ACTIVATE(kn)                                              \
                    118: do {                                                                   \
1.1       lukem     119:        kn->kn_status |= KN_ACTIVE;                                     \
                    120:        if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)           \
                    121:                knote_enqueue(kn);                                      \
                    122: } while(0)
                    123:
                    124: #define        KN_HASHSIZE             64              /* XXX should be tunable */
1.3       jdolecek  125: #define        KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
1.1       lukem     126:
1.3       jdolecek  127: extern const struct filterops sig_filtops;
1.1       lukem     128:
                    129: /*
                    130:  * Table for for all system-defined filters.
1.3       jdolecek  131:  * These should be listed in the numeric order of the EVFILT_* defines.
                    132:  * If filtops is NULL, the filter isn't implemented in NetBSD.
                    133:  * End of list is when name is NULL.
1.1       lukem     134:  */
1.3       jdolecek  135: struct kfilter {
                    136:        const char       *name;         /* name of filter */
                    137:        uint32_t          filter;       /* id of filter */
                    138:        const struct filterops *filtops;/* operations for filter */
                    139: };
                    140:
                    141:                /* System defined filters */
                    142: static const struct kfilter sys_kfilters[] = {
                    143:        { "EVFILT_READ",        EVFILT_READ,    &file_filtops },
                    144:        { "EVFILT_WRITE",       EVFILT_WRITE,   &file_filtops },
                    145:        { "EVFILT_AIO",         EVFILT_AIO,     NULL },
                    146:        { "EVFILT_VNODE",       EVFILT_VNODE,   &file_filtops },
                    147:        { "EVFILT_PROC",        EVFILT_PROC,    &proc_filtops },
                    148:        { "EVFILT_SIGNAL",      EVFILT_SIGNAL,  &sig_filtops },
1.8       jdolecek  149:        { "EVFILT_TIMER",       EVFILT_TIMER,   &timer_filtops },
1.22      perry     150:        { NULL,                 0,              NULL }, /* end of list */
1.1       lukem     151: };
                    152:
1.3       jdolecek  153:                /* User defined kfilters */
                    154: static struct kfilter  *user_kfilters;         /* array */
                    155: static int             user_kfilterc;          /* current offset */
                    156: static int             user_kfiltermaxc;       /* max size so far */
                    157:
                    158: /*
                    159:  * Find kfilter entry by name, or NULL if not found.
                    160:  */
                    161: static const struct kfilter *
                    162: kfilter_byname_sys(const char *name)
                    163: {
                    164:        int i;
                    165:
                    166:        for (i = 0; sys_kfilters[i].name != NULL; i++) {
                    167:                if (strcmp(name, sys_kfilters[i].name) == 0)
                    168:                        return (&sys_kfilters[i]);
                    169:        }
                    170:        return (NULL);
                    171: }
                    172:
                    173: static struct kfilter *
                    174: kfilter_byname_user(const char *name)
                    175: {
                    176:        int i;
                    177:
1.31      seanb     178:        /* user filter slots have a NULL name if previously deregistered */
                    179:        for (i = 0; i < user_kfilterc ; i++) {
                    180:                if (user_kfilters[i].name != NULL &&
1.3       jdolecek  181:                    strcmp(name, user_kfilters[i].name) == 0)
                    182:                        return (&user_kfilters[i]);
                    183:        }
                    184:        return (NULL);
                    185: }
                    186:
                    187: static const struct kfilter *
                    188: kfilter_byname(const char *name)
                    189: {
                    190:        const struct kfilter *kfilter;
                    191:
                    192:        if ((kfilter = kfilter_byname_sys(name)) != NULL)
                    193:                return (kfilter);
                    194:
                    195:        return (kfilter_byname_user(name));
                    196: }
                    197:
                    198: /*
                    199:  * Find kfilter entry by filter id, or NULL if not found.
                    200:  * Assumes entries are indexed in filter id order, for speed.
                    201:  */
                    202: static const struct kfilter *
                    203: kfilter_byfilter(uint32_t filter)
                    204: {
                    205:        const struct kfilter *kfilter;
                    206:
                    207:        if (filter < EVFILT_SYSCOUNT)   /* it's a system filter */
                    208:                kfilter = &sys_kfilters[filter];
                    209:        else if (user_kfilters != NULL &&
                    210:            filter < EVFILT_SYSCOUNT + user_kfilterc)
                    211:                                        /* it's a user filter */
                    212:                kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
                    213:        else
                    214:                return (NULL);          /* out of range */
                    215:        KASSERT(kfilter->filter == filter);     /* sanity check! */
                    216:        return (kfilter);
                    217: }
                    218:
                    219: /*
                    220:  * Register a new kfilter. Stores the entry in user_kfilters.
                    221:  * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
                    222:  * If retfilter != NULL, the new filterid is returned in it.
                    223:  */
                    224: int
                    225: kfilter_register(const char *name, const struct filterops *filtops,
                    226:     int *retfilter)
1.1       lukem     227: {
1.3       jdolecek  228:        struct kfilter *kfilter;
                    229:        void *space;
                    230:        int len;
1.31      seanb     231:        int i;
1.3       jdolecek  232:
                    233:        if (name == NULL || name[0] == '\0' || filtops == NULL)
                    234:                return (EINVAL);        /* invalid args */
                    235:        if (kfilter_byname(name) != NULL)
                    236:                return (EEXIST);        /* already exists */
                    237:        if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT)
                    238:                return (EINVAL);        /* too many */
                    239:
1.31      seanb     240:        for (i = 0; i < user_kfilterc; i++) {
                    241:                kfilter = &user_kfilters[i];
                    242:                if (kfilter->name == NULL) {
                    243:                        /* Previously deregistered slot.  Reuse. */
                    244:                        goto reuse;
                    245:                }
                    246:        }
                    247:
1.3       jdolecek  248:        /* check if need to grow user_kfilters */
                    249:        if (user_kfilterc + 1 > user_kfiltermaxc) {
                    250:                /*
                    251:                 * Grow in KFILTER_EXTENT chunks. Use malloc(9), because we
                    252:                 * want to traverse user_kfilters as an array.
                    253:                 */
                    254:                user_kfiltermaxc += KFILTER_EXTENT;
                    255:                kfilter = malloc(user_kfiltermaxc * sizeof(struct filter *),
                    256:                    M_KEVENT, M_WAITOK);
                    257:
                    258:                /* copy existing user_kfilters */
                    259:                if (user_kfilters != NULL)
1.37      christos  260:                        memcpy((void *)kfilter, (void *)user_kfilters,
1.3       jdolecek  261:                            user_kfilterc * sizeof(struct kfilter *));
                    262:                                        /* zero new sections */
1.37      christos  263:                memset((char *)kfilter +
1.3       jdolecek  264:                    user_kfilterc * sizeof(struct kfilter *), 0,
                    265:                    (user_kfiltermaxc - user_kfilterc) *
                    266:                    sizeof(struct kfilter *));
                    267:                                        /* switch to new kfilter */
                    268:                if (user_kfilters != NULL)
                    269:                        free(user_kfilters, M_KEVENT);
                    270:                user_kfilters = kfilter;
                    271:        }
1.31      seanb     272:        /* Adding new slot */
                    273:        kfilter = &user_kfilters[user_kfilterc++];
                    274: reuse:
1.3       jdolecek  275:        len = strlen(name) + 1;         /* copy name */
                    276:        space = malloc(len, M_KEVENT, M_WAITOK);
                    277:        memcpy(space, name, len);
1.31      seanb     278:        kfilter->name = space;
1.3       jdolecek  279:
1.31      seanb     280:        kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT;
1.3       jdolecek  281:
                    282:        len = sizeof(struct filterops); /* copy filtops */
                    283:        space = malloc(len, M_KEVENT, M_WAITOK);
                    284:        memcpy(space, filtops, len);
1.31      seanb     285:        kfilter->filtops = space;
1.3       jdolecek  286:
                    287:        if (retfilter != NULL)
1.31      seanb     288:                *retfilter = kfilter->filter;
1.3       jdolecek  289:        return (0);
1.1       lukem     290: }
                    291:
1.3       jdolecek  292: /*
                    293:  * Unregister a kfilter previously registered with kfilter_register.
                    294:  * This retains the filter id, but clears the name and frees filtops (filter
                    295:  * operations), so that the number isn't reused during a boot.
                    296:  * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
                    297:  */
                    298: int
                    299: kfilter_unregister(const char *name)
1.1       lukem     300: {
1.3       jdolecek  301:        struct kfilter *kfilter;
                    302:
                    303:        if (name == NULL || name[0] == '\0')
                    304:                return (EINVAL);        /* invalid name */
                    305:
                    306:        if (kfilter_byname_sys(name) != NULL)
                    307:                return (EINVAL);        /* can't detach system filters */
1.1       lukem     308:
1.3       jdolecek  309:        kfilter = kfilter_byname_user(name);
                    310:        if (kfilter == NULL)            /* not found */
                    311:                return (ENOENT);
1.1       lukem     312:
1.31      seanb     313:        /* XXXUNCONST Cast away const (but we know it's safe. */
                    314:        free(__UNCONST(kfilter->name), M_KEVENT);
                    315:        kfilter->name = NULL;   /* mark as `not implemented' */
                    316:
1.3       jdolecek  317:        if (kfilter->filtops != NULL) {
1.23      christos  318:                /* XXXUNCONST Cast away const (but we know it's safe. */
                    319:                free(__UNCONST(kfilter->filtops), M_KEVENT);
1.3       jdolecek  320:                kfilter->filtops = NULL; /* mark as `not implemented' */
                    321:        }
1.1       lukem     322:        return (0);
                    323: }
                    324:
1.3       jdolecek  325:
                    326: /*
                    327:  * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
                    328:  * descriptors. Calls struct fileops kqfilter method for given file descriptor.
                    329:  */
                    330: static int
                    331: filt_fileattach(struct knote *kn)
                    332: {
                    333:        struct file *fp;
                    334:
                    335:        fp = kn->kn_fp;
                    336:        return ((*fp->f_ops->fo_kqfilter)(fp, kn));
                    337: }
                    338:
                    339: /*
                    340:  * Filter detach method for EVFILT_READ on kqueue descriptor.
                    341:  */
1.1       lukem     342: static void
                    343: filt_kqdetach(struct knote *kn)
                    344: {
1.3       jdolecek  345:        struct kqueue *kq;
1.1       lukem     346:
1.3       jdolecek  347:        kq = (struct kqueue *)kn->kn_fp->f_data;
1.5       christos  348:        SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext);
1.1       lukem     349: }
                    350:
1.3       jdolecek  351: /*
                    352:  * Filter event method for EVFILT_READ on kqueue descriptor.
                    353:  */
1.1       lukem     354: /*ARGSUSED*/
                    355: static int
1.33      yamt      356: filt_kqueue(struct knote *kn, long hint)
1.1       lukem     357: {
1.3       jdolecek  358:        struct kqueue *kq;
1.1       lukem     359:
1.3       jdolecek  360:        kq = (struct kqueue *)kn->kn_fp->f_data;
1.1       lukem     361:        kn->kn_data = kq->kq_count;
                    362:        return (kn->kn_data > 0);
                    363: }
                    364:
1.3       jdolecek  365: /*
                    366:  * Filter attach method for EVFILT_PROC.
                    367:  */
1.1       lukem     368: static int
                    369: filt_procattach(struct knote *kn)
                    370: {
1.30      ad        371:        struct proc *p, *curp;
                    372:        struct lwp *curl;
                    373:
                    374:        curl = curlwp;
                    375:        curp = curl->l_proc;
1.1       lukem     376:
                    377:        p = pfind(kn->kn_id);
                    378:        if (p == NULL)
                    379:                return (ESRCH);
1.3       jdolecek  380:
                    381:        /*
                    382:         * Fail if it's not owned by you, or the last exec gave us
                    383:         * setuid/setgid privs (unless you're root).
                    384:         */
1.30      ad        385:        if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(curl->l_cred) ||
1.36      pavel     386:            (p->p_flag & PK_SUGID)) && kauth_authorize_generic(curl->l_cred,
1.34      elad      387:            KAUTH_GENERIC_ISSUSER, NULL) != 0)
1.3       jdolecek  388:                return (EACCES);
1.1       lukem     389:
                    390:        kn->kn_ptr.p_proc = p;
1.3       jdolecek  391:        kn->kn_flags |= EV_CLEAR;       /* automatically set */
1.1       lukem     392:
                    393:        /*
                    394:         * internal flag indicating registration done by kernel
                    395:         */
                    396:        if (kn->kn_flags & EV_FLAG1) {
1.3       jdolecek  397:                kn->kn_data = kn->kn_sdata;     /* ppid */
1.1       lukem     398:                kn->kn_fflags = NOTE_CHILD;
                    399:                kn->kn_flags &= ~EV_FLAG1;
                    400:        }
                    401:
1.3       jdolecek  402:        /* XXXSMP lock the process? */
1.1       lukem     403:        SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
                    404:
                    405:        return (0);
                    406: }
                    407:
                    408: /*
1.3       jdolecek  409:  * Filter detach method for EVFILT_PROC.
                    410:  *
1.1       lukem     411:  * The knote may be attached to a different process, which may exit,
                    412:  * leaving nothing for the knote to be attached to.  So when the process
                    413:  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
                    414:  * it will be deleted when read out.  However, as part of the knote deletion,
                    415:  * this routine is called, so a check is needed to avoid actually performing
1.3       jdolecek  416:  * a detach, because the original process might not exist any more.
1.1       lukem     417:  */
                    418: static void
                    419: filt_procdetach(struct knote *kn)
                    420: {
1.3       jdolecek  421:        struct proc *p;
1.1       lukem     422:
                    423:        if (kn->kn_status & KN_DETACHED)
                    424:                return;
                    425:
1.3       jdolecek  426:        p = kn->kn_ptr.p_proc;
                    427:
                    428:        /* XXXSMP lock the process? */
1.1       lukem     429:        SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
                    430: }
                    431:
1.3       jdolecek  432: /*
                    433:  * Filter event method for EVFILT_PROC.
                    434:  */
1.1       lukem     435: static int
                    436: filt_proc(struct knote *kn, long hint)
                    437: {
                    438:        u_int event;
                    439:
                    440:        /*
                    441:         * mask off extra data
                    442:         */
                    443:        event = (u_int)hint & NOTE_PCTRLMASK;
                    444:
                    445:        /*
                    446:         * if the user is interested in this event, record it.
                    447:         */
                    448:        if (kn->kn_sfflags & event)
                    449:                kn->kn_fflags |= event;
                    450:
                    451:        /*
                    452:         * process is gone, so flag the event as finished.
                    453:         */
                    454:        if (event == NOTE_EXIT) {
1.3       jdolecek  455:                /*
                    456:                 * Detach the knote from watched process and mark
                    457:                 * it as such. We can't leave this to kqueue_scan(),
                    458:                 * since the process might not exist by then. And we
                    459:                 * have to do this now, since psignal KNOTE() is called
                    460:                 * also for zombies and we might end up reading freed
                    461:                 * memory if the kevent would already be picked up
1.22      perry     462:                 * and knote g/c'ed.
1.3       jdolecek  463:                 */
                    464:                kn->kn_fop->f_detach(kn);
1.1       lukem     465:                kn->kn_status |= KN_DETACHED;
1.3       jdolecek  466:
                    467:                /* Mark as ONESHOT, so that the knote it g/c'ed when read */
1.22      perry     468:                kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1.1       lukem     469:                return (1);
                    470:        }
                    471:
                    472:        /*
                    473:         * process forked, and user wants to track the new process,
                    474:         * so attach a new knote to it, and immediately report an
                    475:         * event with the parent's pid.
                    476:         */
                    477:        if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
                    478:                struct kevent kev;
                    479:                int error;
                    480:
                    481:                /*
                    482:                 * register knote with new process.
                    483:                 */
                    484:                kev.ident = hint & NOTE_PDATAMASK;      /* pid */
                    485:                kev.filter = kn->kn_filter;
                    486:                kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
                    487:                kev.fflags = kn->kn_sfflags;
                    488:                kev.data = kn->kn_id;                   /* parent */
                    489:                kev.udata = kn->kn_kevent.udata;        /* preserve udata */
                    490:                error = kqueue_register(kn->kn_kq, &kev, NULL);
                    491:                if (error)
                    492:                        kn->kn_fflags |= NOTE_TRACKERR;
                    493:        }
                    494:
                    495:        return (kn->kn_fflags != 0);
1.8       jdolecek  496: }
                    497:
                    498: static void
                    499: filt_timerexpire(void *knx)
                    500: {
                    501:        struct knote *kn = knx;
                    502:        int tticks;
                    503:
                    504:        kn->kn_data++;
                    505:        KNOTE_ACTIVATE(kn);
                    506:
                    507:        if ((kn->kn_flags & EV_ONESHOT) == 0) {
                    508:                tticks = mstohz(kn->kn_sdata);
1.39      ad        509:                callout_schedule((callout_t *)kn->kn_hook, tticks);
1.8       jdolecek  510:        }
                    511: }
                    512:
                    513: /*
                    514:  * data contains amount of time to sleep, in milliseconds
1.22      perry     515:  */
1.8       jdolecek  516: static int
                    517: filt_timerattach(struct knote *kn)
                    518: {
1.39      ad        519:        callout_t *calloutp;
1.8       jdolecek  520:        int tticks;
                    521:
                    522:        if (kq_ncallouts >= kq_calloutmax)
                    523:                return (ENOMEM);
                    524:        kq_ncallouts++;
                    525:
                    526:        tticks = mstohz(kn->kn_sdata);
                    527:
                    528:        /* if the supplied value is under our resolution, use 1 tick */
                    529:        if (tticks == 0) {
                    530:                if (kn->kn_sdata == 0)
                    531:                        return (EINVAL);
                    532:                tticks = 1;
                    533:        }
                    534:
                    535:        kn->kn_flags |= EV_CLEAR;               /* automatically set */
1.39      ad        536:        MALLOC(calloutp, callout_t *, sizeof(*calloutp),
1.8       jdolecek  537:            M_KEVENT, 0);
1.39      ad        538:        callout_init(calloutp, 0);
1.8       jdolecek  539:        callout_reset(calloutp, tticks, filt_timerexpire, kn);
                    540:        kn->kn_hook = calloutp;
                    541:
                    542:        return (0);
                    543: }
                    544:
                    545: static void
                    546: filt_timerdetach(struct knote *kn)
                    547: {
1.39      ad        548:        callout_t *calloutp;
1.8       jdolecek  549:
1.39      ad        550:        calloutp = (callout_t *)kn->kn_hook;
1.8       jdolecek  551:        callout_stop(calloutp);
1.39      ad        552:        callout_destroy(calloutp);
1.8       jdolecek  553:        FREE(calloutp, M_KEVENT);
                    554:        kq_ncallouts--;
                    555: }
                    556:
                    557: static int
1.33      yamt      558: filt_timer(struct knote *kn, long hint)
1.8       jdolecek  559: {
                    560:        return (kn->kn_data != 0);
1.1       lukem     561: }
                    562:
1.3       jdolecek  563: /*
                    564:  * filt_seltrue:
                    565:  *
                    566:  *     This filter "event" routine simulates seltrue().
                    567:  */
1.1       lukem     568: int
1.33      yamt      569: filt_seltrue(struct knote *kn, long hint)
1.1       lukem     570: {
                    571:
1.3       jdolecek  572:        /*
                    573:         * We don't know how much data can be read/written,
                    574:         * but we know that it *can* be.  This is about as
                    575:         * good as select/poll does as well.
                    576:         */
                    577:        kn->kn_data = 0;
                    578:        return (1);
                    579: }
                    580:
                    581: /*
                    582:  * This provides full kqfilter entry for device switch tables, which
                    583:  * has same effect as filter using filt_seltrue() as filter method.
                    584:  */
                    585: static void
1.33      yamt      586: filt_seltruedetach(struct knote *kn)
1.3       jdolecek  587: {
                    588:        /* Nothing to do */
                    589: }
                    590:
                    591: static const struct filterops seltrue_filtops =
                    592:        { 1, NULL, filt_seltruedetach, filt_seltrue };
                    593:
                    594: int
1.33      yamt      595: seltrue_kqfilter(dev_t dev, struct knote *kn)
1.3       jdolecek  596: {
                    597:        switch (kn->kn_filter) {
                    598:        case EVFILT_READ:
                    599:        case EVFILT_WRITE:
                    600:                kn->kn_fop = &seltrue_filtops;
                    601:                break;
                    602:        default:
                    603:                return (1);
                    604:        }
                    605:
                    606:        /* Nothing more to do */
                    607:        return (0);
                    608: }
                    609:
                    610: /*
                    611:  * kqueue(2) system call.
                    612:  */
                    613: int
1.33      yamt      614: sys_kqueue(struct lwp *l, void *v, register_t *retval)
1.3       jdolecek  615: {
                    616:        struct filedesc *fdp;
                    617:        struct kqueue   *kq;
                    618:        struct file     *fp;
                    619:        int             fd, error;
                    620:
1.30      ad        621:        fdp = l->l_proc->p_fd;
                    622:        error = falloc(l, &fp, &fd);    /* setup a new file descriptor */
1.1       lukem     623:        if (error)
                    624:                return (error);
                    625:        fp->f_flag = FREAD | FWRITE;
                    626:        fp->f_type = DTYPE_KQUEUE;
                    627:        fp->f_ops = &kqueueops;
1.3       jdolecek  628:        kq = pool_get(&kqueue_pool, PR_WAITOK);
                    629:        memset((char *)kq, 0, sizeof(struct kqueue));
1.12      pk        630:        simple_lock_init(&kq->kq_lock);
1.1       lukem     631:        TAILQ_INIT(&kq->kq_head);
1.37      christos  632:        fp->f_data = (void *)kq;        /* store the kqueue with the fp */
1.3       jdolecek  633:        *retval = fd;
1.1       lukem     634:        if (fdp->fd_knlistsize < 0)
1.3       jdolecek  635:                fdp->fd_knlistsize = 0; /* this process has a kq */
1.1       lukem     636:        kq->kq_fdp = fdp;
1.3       jdolecek  637:        FILE_SET_MATURE(fp);
1.25      christos  638:        FILE_UNUSE(fp, l);              /* falloc() does FILE_USE() */
1.1       lukem     639:        return (error);
                    640: }
                    641:
1.3       jdolecek  642: /*
                    643:  * kevent(2) system call.
                    644:  */
1.24      cube      645: static int
1.33      yamt      646: kevent_fetch_changes(void *private, const struct kevent *changelist,
1.24      cube      647:     struct kevent *changes, size_t index, int n)
                    648: {
                    649:        return copyin(changelist + index, changes, n * sizeof(*changes));
                    650: }
                    651:
                    652: static int
1.33      yamt      653: kevent_put_events(void *private, struct kevent *events,
1.24      cube      654:     struct kevent *eventlist, size_t index, int n)
                    655: {
                    656:        return copyout(events, eventlist + index, n * sizeof(*events));
                    657: }
                    658:
                    659: static const struct kevent_ops kevent_native_ops = {
                    660:        keo_private: NULL,
                    661:        keo_fetch_timeout: copyin,
                    662:        keo_fetch_changes: kevent_fetch_changes,
                    663:        keo_put_events: kevent_put_events,
                    664: };
                    665:
1.1       lukem     666: int
1.6       thorpej   667: sys_kevent(struct lwp *l, void *v, register_t *retval)
1.1       lukem     668: {
1.3       jdolecek  669:        struct sys_kevent_args /* {
                    670:                syscallarg(int) fd;
                    671:                syscallarg(const struct kevent *) changelist;
                    672:                syscallarg(size_t) nchanges;
                    673:                syscallarg(struct kevent *) eventlist;
                    674:                syscallarg(size_t) nevents;
                    675:                syscallarg(const struct timespec *) timeout;
                    676:        } */ *uap = v;
1.24      cube      677:
                    678:        return kevent1(l, retval, SCARG(uap, fd), SCARG(uap, changelist),
                    679:            SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
                    680:            SCARG(uap, timeout), &kevent_native_ops);
                    681: }
                    682:
                    683: int
                    684: kevent1(struct lwp *l, register_t *retval, int fd,
                    685:     const struct kevent *changelist, size_t nchanges, struct kevent *eventlist,
                    686:     size_t nevents, const struct timespec *timeout,
                    687:     const struct kevent_ops *keops)
                    688: {
1.3       jdolecek  689:        struct kevent   *kevp;
                    690:        struct kqueue   *kq;
                    691:        struct file     *fp;
                    692:        struct timespec ts;
1.6       thorpej   693:        struct proc     *p;
1.24      cube      694:        size_t          i, n, ichange;
1.3       jdolecek  695:        int             nerrors, error;
                    696:
1.6       thorpej   697:        p = l->l_proc;
1.3       jdolecek  698:        /* check that we're dealing with a kq */
1.24      cube      699:        fp = fd_getfile(p->p_fd, fd);
1.10      pk        700:        if (fp == NULL)
1.1       lukem     701:                return (EBADF);
1.10      pk        702:
                    703:        if (fp->f_type != DTYPE_KQUEUE) {
1.40.6.1! matt      704:                mutex_exit(&fp->f_lock);
1.10      pk        705:                return (EBADF);
                    706:        }
1.1       lukem     707:
1.3       jdolecek  708:        FILE_USE(fp);
1.1       lukem     709:
1.24      cube      710:        if (timeout != NULL) {
                    711:                error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts));
1.1       lukem     712:                if (error)
                    713:                        goto done;
1.24      cube      714:                timeout = &ts;
1.1       lukem     715:        }
                    716:
                    717:        kq = (struct kqueue *)fp->f_data;
                    718:        nerrors = 0;
1.24      cube      719:        ichange = 0;
1.1       lukem     720:
1.3       jdolecek  721:        /* traverse list of events to register */
1.24      cube      722:        while (nchanges > 0) {
1.3       jdolecek  723:                /* copyin a maximum of KQ_EVENTS at each pass */
1.24      cube      724:                n = MIN(nchanges, KQ_NEVENTS);
                    725:                error = (*keops->keo_fetch_changes)(keops->keo_private,
                    726:                    changelist, kq->kq_kev, ichange, n);
1.1       lukem     727:                if (error)
                    728:                        goto done;
                    729:                for (i = 0; i < n; i++) {
                    730:                        kevp = &kq->kq_kev[i];
                    731:                        kevp->flags &= ~EV_SYSFLAGS;
1.3       jdolecek  732:                        /* register each knote */
1.25      christos  733:                        error = kqueue_register(kq, kevp, l);
1.1       lukem     734:                        if (error) {
1.24      cube      735:                                if (nevents != 0) {
1.1       lukem     736:                                        kevp->flags = EV_ERROR;
                    737:                                        kevp->data = error;
1.24      cube      738:                                        error = (*keops->keo_put_events)
                    739:                                            (keops->keo_private, kevp,
                    740:                                            eventlist, nerrors, 1);
1.3       jdolecek  741:                                        if (error)
                    742:                                                goto done;
1.24      cube      743:                                        nevents--;
1.1       lukem     744:                                        nerrors++;
                    745:                                } else {
                    746:                                        goto done;
                    747:                                }
                    748:                        }
                    749:                }
1.24      cube      750:                nchanges -= n;  /* update the results */
                    751:                ichange += n;
1.1       lukem     752:        }
                    753:        if (nerrors) {
1.3       jdolecek  754:                *retval = nerrors;
1.1       lukem     755:                error = 0;
                    756:                goto done;
                    757:        }
                    758:
1.3       jdolecek  759:        /* actually scan through the events */
1.25      christos  760:        error = kqueue_scan(fp, nevents, eventlist, timeout, l, retval, keops);
1.3       jdolecek  761:  done:
1.25      christos  762:        FILE_UNUSE(fp, l);
1.1       lukem     763:        return (error);
                    764: }
                    765:
1.3       jdolecek  766: /*
                    767:  * Register a given kevent kev onto the kqueue
                    768:  */
1.1       lukem     769: int
1.25      christos  770: kqueue_register(struct kqueue *kq, struct kevent *kev, struct lwp *l)
1.1       lukem     771: {
1.3       jdolecek  772:        const struct kfilter *kfilter;
                    773:        struct filedesc *fdp;
1.16      fvdl      774:        struct file     *fp;
1.15      darrenr   775:        struct knote    *kn;
1.3       jdolecek  776:        int             s, error;
                    777:
                    778:        fdp = kq->kq_fdp;
                    779:        fp = NULL;
                    780:        kn = NULL;
                    781:        error = 0;
                    782:        kfilter = kfilter_byfilter(kev->filter);
                    783:        if (kfilter == NULL || kfilter->filtops == NULL) {
                    784:                /* filter not found nor implemented */
1.1       lukem     785:                return (EINVAL);
                    786:        }
                    787:
1.3       jdolecek  788:        /* search if knote already exists */
                    789:        if (kfilter->filtops->f_isfd) {
                    790:                /* monitoring a file descriptor */
                    791:                if ((fp = fd_getfile(fdp, kev->ident)) == NULL)
                    792:                        return (EBADF); /* validate descriptor */
                    793:                FILE_USE(fp);
1.1       lukem     794:
                    795:                if (kev->ident < fdp->fd_knlistsize) {
                    796:                        SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
                    797:                                if (kq == kn->kn_kq &&
                    798:                                    kev->filter == kn->kn_filter)
                    799:                                        break;
                    800:                }
                    801:        } else {
1.3       jdolecek  802:                /*
                    803:                 * not monitoring a file descriptor, so
                    804:                 * lookup knotes in internal hash table
                    805:                 */
1.1       lukem     806:                if (fdp->fd_knhashmask != 0) {
                    807:                        struct klist *list;
1.22      perry     808:
1.1       lukem     809:                        list = &fdp->fd_knhash[
                    810:                            KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
                    811:                        SLIST_FOREACH(kn, list, kn_link)
                    812:                                if (kev->ident == kn->kn_id &&
                    813:                                    kq == kn->kn_kq &&
                    814:                                    kev->filter == kn->kn_filter)
                    815:                                        break;
                    816:                }
                    817:        }
                    818:
                    819:        if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
1.3       jdolecek  820:                error = ENOENT;         /* filter not found */
1.1       lukem     821:                goto done;
                    822:        }
                    823:
                    824:        /*
                    825:         * kn now contains the matching knote, or NULL if no match
                    826:         */
                    827:        if (kev->flags & EV_ADD) {
1.3       jdolecek  828:                /* add knote */
1.1       lukem     829:
                    830:                if (kn == NULL) {
1.3       jdolecek  831:                        /* create new knote */
                    832:                        kn = pool_get(&knote_pool, PR_WAITOK);
1.1       lukem     833:                        if (kn == NULL) {
                    834:                                error = ENOMEM;
                    835:                                goto done;
                    836:                        }
                    837:                        kn->kn_fp = fp;
                    838:                        kn->kn_kq = kq;
1.3       jdolecek  839:                        kn->kn_fop = kfilter->filtops;
1.1       lukem     840:
                    841:                        /*
                    842:                         * apply reference count to knote structure, and
                    843:                         * do not release it at the end of this routine.
                    844:                         */
                    845:                        fp = NULL;
                    846:
                    847:                        kn->kn_sfflags = kev->fflags;
                    848:                        kn->kn_sdata = kev->data;
                    849:                        kev->fflags = 0;
                    850:                        kev->data = 0;
                    851:                        kn->kn_kevent = *kev;
                    852:
                    853:                        knote_attach(kn, fdp);
1.3       jdolecek  854:                        if ((error = kfilter->filtops->f_attach(kn)) != 0) {
1.25      christos  855:                                knote_drop(kn, l, fdp);
1.1       lukem     856:                                goto done;
                    857:                        }
                    858:                } else {
1.3       jdolecek  859:                        /* modify existing knote */
                    860:
1.1       lukem     861:                        /*
                    862:                         * The user may change some filter values after the
1.22      perry     863:                         * initial EV_ADD, but doing so will not reset any
1.1       lukem     864:                         * filter which have already been triggered.
                    865:                         */
                    866:                        kn->kn_sfflags = kev->fflags;
                    867:                        kn->kn_sdata = kev->data;
                    868:                        kn->kn_kevent.udata = kev->udata;
                    869:                }
                    870:
1.11      pk        871:                s = splsched();
1.1       lukem     872:                if (kn->kn_fop->f_event(kn, 0))
                    873:                        KNOTE_ACTIVATE(kn);
                    874:                splx(s);
                    875:
1.3       jdolecek  876:        } else if (kev->flags & EV_DELETE) {    /* delete knote */
1.1       lukem     877:                kn->kn_fop->f_detach(kn);
1.25      christos  878:                knote_drop(kn, l, fdp);
1.1       lukem     879:                goto done;
                    880:        }
                    881:
1.3       jdolecek  882:        /* disable knote */
1.1       lukem     883:        if ((kev->flags & EV_DISABLE) &&
                    884:            ((kn->kn_status & KN_DISABLED) == 0)) {
1.11      pk        885:                s = splsched();
1.1       lukem     886:                kn->kn_status |= KN_DISABLED;
                    887:                splx(s);
                    888:        }
                    889:
1.3       jdolecek  890:        /* enable knote */
1.1       lukem     891:        if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
1.11      pk        892:                s = splsched();
1.1       lukem     893:                kn->kn_status &= ~KN_DISABLED;
                    894:                if ((kn->kn_status & KN_ACTIVE) &&
                    895:                    ((kn->kn_status & KN_QUEUED) == 0))
                    896:                        knote_enqueue(kn);
                    897:                splx(s);
                    898:        }
                    899:
1.3       jdolecek  900:  done:
1.1       lukem     901:        if (fp != NULL)
1.25      christos  902:                FILE_UNUSE(fp, l);
1.1       lukem     903:        return (error);
                    904: }
                    905:
1.3       jdolecek  906: /*
                    907:  * Scan through the list of events on fp (for a maximum of maxevents),
                    908:  * returning the results in to ulistp. Timeout is determined by tsp; if
                    909:  * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
                    910:  * as appropriate.
                    911:  */
1.1       lukem     912: static int
1.3       jdolecek  913: kqueue_scan(struct file *fp, size_t maxevents, struct kevent *ulistp,
1.25      christos  914:     const struct timespec *tsp, struct lwp *l, register_t *retval,
1.24      cube      915:     const struct kevent_ops *keops)
1.1       lukem     916: {
1.25      christos  917:        struct proc     *p = l->l_proc;
1.3       jdolecek  918:        struct kqueue   *kq;
                    919:        struct kevent   *kevp;
1.29      kardel    920:        struct timeval  atv, sleeptv;
1.19      jdolecek  921:        struct knote    *kn, *marker=NULL;
1.24      cube      922:        size_t          count, nkev, nevents;
1.3       jdolecek  923:        int             s, timeout, error;
1.1       lukem     924:
1.3       jdolecek  925:        kq = (struct kqueue *)fp->f_data;
1.1       lukem     926:        count = maxevents;
1.24      cube      927:        nkev = nevents = error = 0;
1.1       lukem     928:        if (count == 0)
                    929:                goto done;
                    930:
1.9       jdolecek  931:        if (tsp) {                              /* timeout supplied */
1.1       lukem     932:                TIMESPEC_TO_TIMEVAL(&atv, tsp);
1.29      kardel    933:                if (inittimeleft(&atv, &sleeptv) == -1) {
1.1       lukem     934:                        error = EINVAL;
                    935:                        goto done;
                    936:                }
1.28      kardel    937:                timeout = tvtohz(&atv);
1.9       jdolecek  938:                if (timeout <= 0)
1.29      kardel    939:                        timeout = -1;           /* do poll */
1.1       lukem     940:        } else {
1.9       jdolecek  941:                /* no timeout, wait forever */
1.1       lukem     942:                timeout = 0;
                    943:        }
1.19      jdolecek  944:
                    945:        MALLOC(marker, struct knote *, sizeof(*marker), M_KEVENT, M_WAITOK);
                    946:        memset(marker, 0, sizeof(*marker));
                    947:
1.1       lukem     948:        goto start;
                    949:
1.3       jdolecek  950:  retry:
1.29      kardel    951:        if (tsp && (timeout = gettimeleft(&atv, &sleeptv)) <= 0) {
                    952:                goto done;
1.1       lukem     953:        }
                    954:
1.3       jdolecek  955:  start:
1.1       lukem     956:        kevp = kq->kq_kev;
1.11      pk        957:        s = splsched();
1.12      pk        958:        simple_lock(&kq->kq_lock);
1.1       lukem     959:        if (kq->kq_count == 0) {
1.22      perry     960:                if (timeout < 0) {
1.1       lukem     961:                        error = EWOULDBLOCK;
1.17      fvdl      962:                        simple_unlock(&kq->kq_lock);
1.1       lukem     963:                } else {
                    964:                        kq->kq_state |= KQ_SLEEP;
1.12      pk        965:                        error = ltsleep(kq, PSOCK | PCATCH | PNORELOCK,
                    966:                                        "kqread", timeout, &kq->kq_lock);
1.1       lukem     967:                }
                    968:                splx(s);
                    969:                if (error == 0)
                    970:                        goto retry;
                    971:                /* don't restart after signals... */
                    972:                if (error == ERESTART)
                    973:                        error = EINTR;
                    974:                else if (error == EWOULDBLOCK)
                    975:                        error = 0;
                    976:                goto done;
                    977:        }
                    978:
1.3       jdolecek  979:        /* mark end of knote list */
1.22      perry     980:        TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1.12      pk        981:        simple_unlock(&kq->kq_lock);
1.3       jdolecek  982:
                    983:        while (count) {                         /* while user wants data ... */
1.12      pk        984:                simple_lock(&kq->kq_lock);
1.3       jdolecek  985:                kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
1.22      perry     986:                TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1.19      jdolecek  987:                if (kn == marker) {             /* if it's our marker, stop */
1.12      pk        988:                        /* What if it's some else's marker? */
                    989:                        simple_unlock(&kq->kq_lock);
1.1       lukem     990:                        splx(s);
                    991:                        if (count == maxevents)
                    992:                                goto retry;
                    993:                        goto done;
                    994:                }
1.12      pk        995:                kq->kq_count--;
                    996:                simple_unlock(&kq->kq_lock);
                    997:
1.1       lukem     998:                if (kn->kn_status & KN_DISABLED) {
1.3       jdolecek  999:                        /* don't want disabled events */
1.1       lukem    1000:                        kn->kn_status &= ~KN_QUEUED;
                   1001:                        continue;
                   1002:                }
                   1003:                if ((kn->kn_flags & EV_ONESHOT) == 0 &&
                   1004:                    kn->kn_fop->f_event(kn, 0) == 0) {
1.3       jdolecek 1005:                        /*
                   1006:                         * non-ONESHOT event that hasn't
                   1007:                         * triggered again, so de-queue.
                   1008:                         */
1.1       lukem    1009:                        kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
                   1010:                        continue;
                   1011:                }
                   1012:                *kevp = kn->kn_kevent;
                   1013:                kevp++;
                   1014:                nkev++;
                   1015:                if (kn->kn_flags & EV_ONESHOT) {
1.3       jdolecek 1016:                        /* delete ONESHOT events after retrieval */
1.1       lukem    1017:                        kn->kn_status &= ~KN_QUEUED;
                   1018:                        splx(s);
                   1019:                        kn->kn_fop->f_detach(kn);
1.25      christos 1020:                        knote_drop(kn, l, p->p_fd);
1.11      pk       1021:                        s = splsched();
1.1       lukem    1022:                } else if (kn->kn_flags & EV_CLEAR) {
1.3       jdolecek 1023:                        /* clear state after retrieval */
1.1       lukem    1024:                        kn->kn_data = 0;
                   1025:                        kn->kn_fflags = 0;
                   1026:                        kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
                   1027:                } else {
1.3       jdolecek 1028:                        /* add event back on list */
1.12      pk       1029:                        simple_lock(&kq->kq_lock);
1.22      perry    1030:                        TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1.12      pk       1031:                        kq->kq_count++;
                   1032:                        simple_unlock(&kq->kq_lock);
1.1       lukem    1033:                }
                   1034:                count--;
                   1035:                if (nkev == KQ_NEVENTS) {
1.3       jdolecek 1036:                        /* do copyouts in KQ_NEVENTS chunks */
1.1       lukem    1037:                        splx(s);
1.24      cube     1038:                        error = (*keops->keo_put_events)(keops->keo_private,
                   1039:                            &kq->kq_kev[0], ulistp, nevents, nkev);
                   1040:                        nevents += nkev;
1.1       lukem    1041:                        nkev = 0;
                   1042:                        kevp = kq->kq_kev;
1.11      pk       1043:                        s = splsched();
1.1       lukem    1044:                        if (error)
                   1045:                                break;
                   1046:                }
                   1047:        }
1.3       jdolecek 1048:
                   1049:        /* remove marker */
1.12      pk       1050:        simple_lock(&kq->kq_lock);
1.22      perry    1051:        TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
1.12      pk       1052:        simple_unlock(&kq->kq_lock);
1.1       lukem    1053:        splx(s);
1.3       jdolecek 1054:  done:
1.19      jdolecek 1055:        if (marker)
                   1056:                FREE(marker, M_KEVENT);
                   1057:
1.24      cube     1058:        if (nkev != 0)
1.3       jdolecek 1059:                /* copyout remaining events */
1.24      cube     1060:                error = (*keops->keo_put_events)(keops->keo_private,
                   1061:                    &kq->kq_kev[0], ulistp, nevents, nkev);
1.3       jdolecek 1062:        *retval = maxevents - count;
                   1063:
1.1       lukem    1064:        return (error);
                   1065: }
                   1066:
                   1067: /*
1.3       jdolecek 1068:  * struct fileops read method for a kqueue descriptor.
                   1069:  * Not implemented.
                   1070:  * XXX: This could be expanded to call kqueue_scan, if desired.
1.1       lukem    1071:  */
                   1072: /*ARGSUSED*/
                   1073: static int
1.33      yamt     1074: kqueue_read(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
                   1075:     int flags)
1.1       lukem    1076: {
1.3       jdolecek 1077:
1.1       lukem    1078:        return (ENXIO);
                   1079: }
                   1080:
1.3       jdolecek 1081: /*
                   1082:  * struct fileops write method for a kqueue descriptor.
                   1083:  * Not implemented.
                   1084:  */
1.1       lukem    1085: /*ARGSUSED*/
                   1086: static int
1.33      yamt     1087: kqueue_write(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
                   1088:     int flags)
1.1       lukem    1089: {
1.3       jdolecek 1090:
1.1       lukem    1091:        return (ENXIO);
                   1092: }
                   1093:
1.3       jdolecek 1094: /*
                   1095:  * struct fileops ioctl method for a kqueue descriptor.
                   1096:  *
                   1097:  * Two ioctls are currently supported. They both use struct kfilter_mapping:
                   1098:  *     KFILTER_BYNAME          find name for filter, and return result in
                   1099:  *                             name, which is of size len.
                   1100:  *     KFILTER_BYFILTER        find filter for name. len is ignored.
                   1101:  */
1.1       lukem    1102: /*ARGSUSED*/
                   1103: static int
1.33      yamt     1104: kqueue_ioctl(struct file *fp, u_long com, void *data, struct lwp *l)
1.1       lukem    1105: {
1.3       jdolecek 1106:        struct kfilter_mapping  *km;
                   1107:        const struct kfilter    *kfilter;
                   1108:        char                    *name;
                   1109:        int                     error;
                   1110:
1.22      perry    1111:        km = (struct kfilter_mapping *)data;
1.3       jdolecek 1112:        error = 0;
                   1113:
                   1114:        switch (com) {
                   1115:        case KFILTER_BYFILTER:  /* convert filter -> name */
                   1116:                kfilter = kfilter_byfilter(km->filter);
                   1117:                if (kfilter != NULL)
                   1118:                        error = copyoutstr(kfilter->name, km->name, km->len,
                   1119:                            NULL);
                   1120:                else
                   1121:                        error = ENOENT;
                   1122:                break;
                   1123:
                   1124:        case KFILTER_BYNAME:    /* convert name -> filter */
                   1125:                MALLOC(name, char *, KFILTER_MAXNAME, M_KEVENT, M_WAITOK);
                   1126:                error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
                   1127:                if (error) {
                   1128:                        FREE(name, M_KEVENT);
                   1129:                        break;
                   1130:                }
                   1131:                kfilter = kfilter_byname(name);
                   1132:                if (kfilter != NULL)
                   1133:                        km->filter = kfilter->filter;
                   1134:                else
                   1135:                        error = ENOENT;
                   1136:                FREE(name, M_KEVENT);
                   1137:                break;
                   1138:
                   1139:        default:
                   1140:                error = ENOTTY;
                   1141:
                   1142:        }
                   1143:        return (error);
                   1144: }
                   1145:
                   1146: /*
                   1147:  * struct fileops fcntl method for a kqueue descriptor.
                   1148:  * Not implemented.
                   1149:  */
                   1150: /*ARGSUSED*/
                   1151: static int
1.33      yamt     1152: kqueue_fcntl(struct file *fp, u_int com, void *data, struct lwp *l)
1.3       jdolecek 1153: {
                   1154:
1.1       lukem    1155:        return (ENOTTY);
                   1156: }
                   1157:
1.3       jdolecek 1158: /*
                   1159:  * struct fileops poll method for a kqueue descriptor.
                   1160:  * Determine if kqueue has events pending.
                   1161:  */
1.1       lukem    1162: static int
1.25      christos 1163: kqueue_poll(struct file *fp, int events, struct lwp *l)
1.1       lukem    1164: {
1.3       jdolecek 1165:        struct kqueue   *kq;
                   1166:        int             revents;
                   1167:
                   1168:        kq = (struct kqueue *)fp->f_data;
                   1169:        revents = 0;
                   1170:        if (events & (POLLIN | POLLRDNORM)) {
                   1171:                if (kq->kq_count) {
                   1172:                        revents |= events & (POLLIN | POLLRDNORM);
1.1       lukem    1173:                } else {
1.25      christos 1174:                        selrecord(l, &kq->kq_sel);
1.1       lukem    1175:                }
                   1176:        }
                   1177:        return (revents);
                   1178: }
                   1179:
1.3       jdolecek 1180: /*
                   1181:  * struct fileops stat method for a kqueue descriptor.
                   1182:  * Returns dummy info, with st_size being number of events pending.
                   1183:  */
1.1       lukem    1184: static int
1.33      yamt     1185: kqueue_stat(struct file *fp, struct stat *st, struct lwp *l)
1.1       lukem    1186: {
1.3       jdolecek 1187:        struct kqueue   *kq;
1.1       lukem    1188:
1.3       jdolecek 1189:        kq = (struct kqueue *)fp->f_data;
                   1190:        memset((void *)st, 0, sizeof(*st));
1.1       lukem    1191:        st->st_size = kq->kq_count;
                   1192:        st->st_blksize = sizeof(struct kevent);
                   1193:        st->st_mode = S_IFIFO;
                   1194:        return (0);
                   1195: }
                   1196:
1.3       jdolecek 1197: /*
                   1198:  * struct fileops close method for a kqueue descriptor.
                   1199:  * Cleans up kqueue.
                   1200:  */
1.1       lukem    1201: static int
1.25      christos 1202: kqueue_close(struct file *fp, struct lwp *l)
1.1       lukem    1203: {
1.25      christos 1204:        struct proc     *p = l->l_proc;
1.3       jdolecek 1205:        struct kqueue   *kq;
1.16      fvdl     1206:        struct filedesc *fdp;
1.15      darrenr  1207:        struct knote    **knp, *kn, *kn0;
1.3       jdolecek 1208:        int             i;
1.1       lukem    1209:
1.3       jdolecek 1210:        kq = (struct kqueue *)fp->f_data;
                   1211:        fdp = p->p_fd;
1.1       lukem    1212:        for (i = 0; i < fdp->fd_knlistsize; i++) {
                   1213:                knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
                   1214:                kn = *knp;
                   1215:                while (kn != NULL) {
                   1216:                        kn0 = SLIST_NEXT(kn, kn_link);
                   1217:                        if (kq == kn->kn_kq) {
                   1218:                                kn->kn_fop->f_detach(kn);
1.25      christos 1219:                                FILE_UNUSE(kn->kn_fp, l);
1.3       jdolecek 1220:                                pool_put(&knote_pool, kn);
1.1       lukem    1221:                                *knp = kn0;
                   1222:                        } else {
                   1223:                                knp = &SLIST_NEXT(kn, kn_link);
                   1224:                        }
                   1225:                        kn = kn0;
                   1226:                }
                   1227:        }
                   1228:        if (fdp->fd_knhashmask != 0) {
                   1229:                for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
                   1230:                        knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
                   1231:                        kn = *knp;
                   1232:                        while (kn != NULL) {
                   1233:                                kn0 = SLIST_NEXT(kn, kn_link);
                   1234:                                if (kq == kn->kn_kq) {
                   1235:                                        kn->kn_fop->f_detach(kn);
1.3       jdolecek 1236:                                        /* XXX non-fd release of kn->kn_ptr */
                   1237:                                        pool_put(&knote_pool, kn);
1.1       lukem    1238:                                        *knp = kn0;
                   1239:                                } else {
                   1240:                                        knp = &SLIST_NEXT(kn, kn_link);
                   1241:                                }
                   1242:                                kn = kn0;
                   1243:                        }
                   1244:                }
                   1245:        }
1.3       jdolecek 1246:        pool_put(&kqueue_pool, kq);
1.1       lukem    1247:        fp->f_data = NULL;
                   1248:
                   1249:        return (0);
                   1250: }
                   1251:
1.3       jdolecek 1252: /*
                   1253:  * wakeup a kqueue
                   1254:  */
1.1       lukem    1255: static void
                   1256: kqueue_wakeup(struct kqueue *kq)
                   1257: {
1.12      pk       1258:        int s;
1.1       lukem    1259:
1.12      pk       1260:        s = splsched();
                   1261:        simple_lock(&kq->kq_lock);
1.3       jdolecek 1262:        if (kq->kq_state & KQ_SLEEP) {          /* if currently sleeping ...  */
1.1       lukem    1263:                kq->kq_state &= ~KQ_SLEEP;
1.3       jdolecek 1264:                wakeup(kq);                     /* ... wakeup */
1.1       lukem    1265:        }
1.3       jdolecek 1266:
                   1267:        /* Notify select/poll and kevent. */
                   1268:        selnotify(&kq->kq_sel, 0);
1.12      pk       1269:        simple_unlock(&kq->kq_lock);
                   1270:        splx(s);
1.1       lukem    1271: }
                   1272:
                   1273: /*
1.3       jdolecek 1274:  * struct fileops kqfilter method for a kqueue descriptor.
                   1275:  * Event triggered when monitored kqueue changes.
                   1276:  */
                   1277: /*ARGSUSED*/
                   1278: static int
1.33      yamt     1279: kqueue_kqfilter(struct file *fp, struct knote *kn)
1.3       jdolecek 1280: {
                   1281:        struct kqueue *kq;
                   1282:
                   1283:        KASSERT(fp == kn->kn_fp);
                   1284:        kq = (struct kqueue *)kn->kn_fp->f_data;
                   1285:        if (kn->kn_filter != EVFILT_READ)
                   1286:                return (1);
                   1287:        kn->kn_fop = &kqread_filtops;
1.5       christos 1288:        SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext);
1.3       jdolecek 1289:        return (0);
                   1290: }
                   1291:
                   1292:
                   1293: /*
                   1294:  * Walk down a list of knotes, activating them if their event has triggered.
1.1       lukem    1295:  */
                   1296: void
                   1297: knote(struct klist *list, long hint)
                   1298: {
                   1299:        struct knote *kn;
                   1300:
                   1301:        SLIST_FOREACH(kn, list, kn_selnext)
                   1302:                if (kn->kn_fop->f_event(kn, hint))
                   1303:                        KNOTE_ACTIVATE(kn);
                   1304: }
                   1305:
                   1306: /*
1.3       jdolecek 1307:  * Remove all knotes from a specified klist
1.1       lukem    1308:  */
                   1309: void
1.25      christos 1310: knote_remove(struct lwp *l, struct klist *list)
1.1       lukem    1311: {
                   1312:        struct knote *kn;
                   1313:
                   1314:        while ((kn = SLIST_FIRST(list)) != NULL) {
                   1315:                kn->kn_fop->f_detach(kn);
1.25      christos 1316:                knote_drop(kn, l, l->l_proc->p_fd);
1.1       lukem    1317:        }
                   1318: }
                   1319:
                   1320: /*
1.3       jdolecek 1321:  * Remove all knotes referencing a specified fd
1.1       lukem    1322:  */
                   1323: void
1.25      christos 1324: knote_fdclose(struct lwp *l, int fd)
1.1       lukem    1325: {
1.3       jdolecek 1326:        struct filedesc *fdp;
                   1327:        struct klist    *list;
1.1       lukem    1328:
1.25      christos 1329:        fdp = l->l_proc->p_fd;
1.3       jdolecek 1330:        list = &fdp->fd_knlist[fd];
1.25      christos 1331:        knote_remove(l, list);
1.1       lukem    1332: }
                   1333:
1.3       jdolecek 1334: /*
                   1335:  * Attach a new knote to a file descriptor
                   1336:  */
1.1       lukem    1337: static void
                   1338: knote_attach(struct knote *kn, struct filedesc *fdp)
                   1339: {
1.3       jdolecek 1340:        struct klist    *list;
                   1341:        int             size;
1.1       lukem    1342:
                   1343:        if (! kn->kn_fop->f_isfd) {
1.3       jdolecek 1344:                /* if knote is not on an fd, store on internal hash table */
1.1       lukem    1345:                if (fdp->fd_knhashmask == 0)
1.3       jdolecek 1346:                        fdp->fd_knhash = hashinit(KN_HASHSIZE, HASH_LIST,
                   1347:                            M_KEVENT, M_WAITOK, &fdp->fd_knhashmask);
1.1       lukem    1348:                list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
                   1349:                goto done;
                   1350:        }
                   1351:
1.3       jdolecek 1352:        /*
                   1353:         * otherwise, knote is on an fd.
                   1354:         * knotes are stored in fd_knlist indexed by kn->kn_id.
                   1355:         */
1.1       lukem    1356:        if (fdp->fd_knlistsize <= kn->kn_id) {
1.3       jdolecek 1357:                /* expand list, it's too small */
1.1       lukem    1358:                size = fdp->fd_knlistsize;
1.3       jdolecek 1359:                while (size <= kn->kn_id) {
                   1360:                        /* grow in KQ_EXTENT chunks */
                   1361:                        size += KQ_EXTENT;
                   1362:                }
                   1363:                list = malloc(size * sizeof(struct klist *), M_KEVENT,M_WAITOK);
                   1364:                if (fdp->fd_knlist) {
                   1365:                        /* copy existing knlist */
1.37      christos 1366:                        memcpy((void *)list, (void *)fdp->fd_knlist,
1.3       jdolecek 1367:                            fdp->fd_knlistsize * sizeof(struct klist *));
                   1368:                }
                   1369:                /*
                   1370:                 * Zero new memory. Stylistically, SLIST_INIT() should be
                   1371:                 * used here, but that does same thing as the memset() anyway.
                   1372:                 */
                   1373:                memset(&list[fdp->fd_knlistsize], 0,
1.1       lukem    1374:                    (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1.3       jdolecek 1375:
                   1376:                /* switch to new knlist */
1.1       lukem    1377:                if (fdp->fd_knlist != NULL)
1.3       jdolecek 1378:                        free(fdp->fd_knlist, M_KEVENT);
1.1       lukem    1379:                fdp->fd_knlistsize = size;
                   1380:                fdp->fd_knlist = list;
                   1381:        }
1.3       jdolecek 1382:
                   1383:        /* get list head for this fd */
1.1       lukem    1384:        list = &fdp->fd_knlist[kn->kn_id];
1.3       jdolecek 1385:  done:
                   1386:        /* add new knote */
1.1       lukem    1387:        SLIST_INSERT_HEAD(list, kn, kn_link);
                   1388:        kn->kn_status = 0;
                   1389: }
                   1390:
                   1391: /*
1.3       jdolecek 1392:  * Drop knote.
                   1393:  * Should be called at spl == 0, since we don't want to hold spl
                   1394:  * while calling FILE_UNUSE and free.
1.1       lukem    1395:  */
                   1396: static void
1.25      christos 1397: knote_drop(struct knote *kn, struct lwp *l, struct filedesc *fdp)
1.1       lukem    1398: {
1.3       jdolecek 1399:        struct klist    *list;
1.1       lukem    1400:
                   1401:        if (kn->kn_fop->f_isfd)
                   1402:                list = &fdp->fd_knlist[kn->kn_id];
                   1403:        else
                   1404:                list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
                   1405:
                   1406:        SLIST_REMOVE(list, kn, knote, kn_link);
                   1407:        if (kn->kn_status & KN_QUEUED)
                   1408:                knote_dequeue(kn);
                   1409:        if (kn->kn_fop->f_isfd)
1.25      christos 1410:                FILE_UNUSE(kn->kn_fp, l);
1.3       jdolecek 1411:        pool_put(&knote_pool, kn);
1.1       lukem    1412: }
                   1413:
                   1414:
1.3       jdolecek 1415: /*
                   1416:  * Queue new event for knote.
                   1417:  */
1.1       lukem    1418: static void
                   1419: knote_enqueue(struct knote *kn)
                   1420: {
1.3       jdolecek 1421:        struct kqueue   *kq;
                   1422:        int             s;
1.1       lukem    1423:
1.3       jdolecek 1424:        kq = kn->kn_kq;
                   1425:        KASSERT((kn->kn_status & KN_QUEUED) == 0);
1.1       lukem    1426:
1.12      pk       1427:        s = splsched();
                   1428:        simple_lock(&kq->kq_lock);
1.22      perry    1429:        TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1.1       lukem    1430:        kn->kn_status |= KN_QUEUED;
                   1431:        kq->kq_count++;
1.12      pk       1432:        simple_unlock(&kq->kq_lock);
1.1       lukem    1433:        splx(s);
                   1434:        kqueue_wakeup(kq);
                   1435: }
                   1436:
1.3       jdolecek 1437: /*
                   1438:  * Dequeue event for knote.
                   1439:  */
1.1       lukem    1440: static void
                   1441: knote_dequeue(struct knote *kn)
                   1442: {
1.3       jdolecek 1443:        struct kqueue   *kq;
                   1444:        int             s;
1.1       lukem    1445:
1.12      pk       1446:        KASSERT(kn->kn_status & KN_QUEUED);
1.3       jdolecek 1447:        kq = kn->kn_kq;
1.12      pk       1448:
1.11      pk       1449:        s = splsched();
1.12      pk       1450:        simple_lock(&kq->kq_lock);
1.22      perry    1451:        TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1.1       lukem    1452:        kn->kn_status &= ~KN_QUEUED;
                   1453:        kq->kq_count--;
1.12      pk       1454:        simple_unlock(&kq->kq_lock);
1.1       lukem    1455:        splx(s);
                   1456: }

CVSweb <webmaster@jp.NetBSD.org>