[BACK]Return to kern_event.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/kern_event.c, Revision 1.25.10.1

1.25.10.1! elad        1: /*     $NetBSD: kern_event.c,v 1.25 2005/12/11 12:24:29 christos Exp $ */
1.1       lukem       2: /*-
                      3:  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
                      4:  * All rights reserved.
                      5:  *
                      6:  * Redistribution and use in source and binary forms, with or without
                      7:  * modification, are permitted provided that the following conditions
                      8:  * are met:
                      9:  * 1. Redistributions of source code must retain the above copyright
                     10:  *    notice, this list of conditions and the following disclaimer.
                     11:  * 2. Redistributions in binary form must reproduce the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer in the
                     13:  *    documentation and/or other materials provided with the distribution.
                     14:  *
                     15:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     16:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     17:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     18:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     19:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     20:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     21:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     22:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     23:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     24:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     25:  * SUCH DAMAGE.
                     26:  *
                     27:  * $FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp $
                     28:  */
1.14      jdolecek   29:
                     30: #include <sys/cdefs.h>
1.25.10.1! elad       31: __KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.25 2005/12/11 12:24:29 christos Exp $");
1.1       lukem      32:
                     33: #include <sys/param.h>
                     34: #include <sys/systm.h>
                     35: #include <sys/kernel.h>
                     36: #include <sys/proc.h>
1.22      perry      37: #include <sys/malloc.h>
1.1       lukem      38: #include <sys/unistd.h>
                     39: #include <sys/file.h>
                     40: #include <sys/fcntl.h>
1.3       jdolecek   41: #include <sys/select.h>
1.1       lukem      42: #include <sys/queue.h>
                     43: #include <sys/event.h>
                     44: #include <sys/eventvar.h>
                     45: #include <sys/poll.h>
1.3       jdolecek   46: #include <sys/pool.h>
1.1       lukem      47: #include <sys/protosw.h>
                     48: #include <sys/socket.h>
                     49: #include <sys/socketvar.h>
                     50: #include <sys/stat.h>
                     51: #include <sys/uio.h>
1.3       jdolecek   52: #include <sys/mount.h>
                     53: #include <sys/filedesc.h>
1.6       thorpej    54: #include <sys/sa.h>
1.3       jdolecek   55: #include <sys/syscallargs.h>
1.1       lukem      56:
1.3       jdolecek   57: static void    kqueue_wakeup(struct kqueue *kq);
1.1       lukem      58:
1.24      cube       59: static int     kqueue_scan(struct file *, size_t, struct kevent *,
1.25      christos   60:     const struct timespec *, struct lwp *, register_t *,
1.24      cube       61:     const struct kevent_ops *);
1.3       jdolecek   62: static int     kqueue_read(struct file *fp, off_t *offset, struct uio *uio,
1.25.10.1! elad       63:                    kauth_cred_t cred, int flags);
1.3       jdolecek   64: static int     kqueue_write(struct file *fp, off_t *offset, struct uio *uio,
1.25.10.1! elad       65:                    kauth_cred_t cred, int flags);
1.13      dsl        66: static int     kqueue_ioctl(struct file *fp, u_long com, void *data,
1.25      christos   67:                    struct lwp *l);
1.13      dsl        68: static int     kqueue_fcntl(struct file *fp, u_int com, void *data,
1.25      christos   69:                    struct lwp *l);
                     70: static int     kqueue_poll(struct file *fp, int events, struct lwp *l);
1.3       jdolecek   71: static int     kqueue_kqfilter(struct file *fp, struct knote *kn);
1.25      christos   72: static int     kqueue_stat(struct file *fp, struct stat *sp, struct lwp *l);
                     73: static int     kqueue_close(struct file *fp, struct lwp *l);
1.1       lukem      74:
1.21      christos   75: static const struct fileops kqueueops = {
1.3       jdolecek   76:        kqueue_read, kqueue_write, kqueue_ioctl, kqueue_fcntl, kqueue_poll,
                     77:        kqueue_stat, kqueue_close, kqueue_kqfilter
1.1       lukem      78: };
                     79:
1.3       jdolecek   80: static void    knote_attach(struct knote *kn, struct filedesc *fdp);
1.25      christos   81: static void    knote_drop(struct knote *kn, struct lwp *l,
1.3       jdolecek   82:                    struct filedesc *fdp);
                     83: static void    knote_enqueue(struct knote *kn);
                     84: static void    knote_dequeue(struct knote *kn);
1.1       lukem      85:
                     86: static void    filt_kqdetach(struct knote *kn);
                     87: static int     filt_kqueue(struct knote *kn, long hint);
                     88: static int     filt_procattach(struct knote *kn);
                     89: static void    filt_procdetach(struct knote *kn);
                     90: static int     filt_proc(struct knote *kn, long hint);
                     91: static int     filt_fileattach(struct knote *kn);
1.8       jdolecek   92: static void    filt_timerexpire(void *knx);
                     93: static int     filt_timerattach(struct knote *kn);
                     94: static void    filt_timerdetach(struct knote *kn);
                     95: static int     filt_timer(struct knote *kn, long hint);
1.1       lukem      96:
1.3       jdolecek   97: static const struct filterops kqread_filtops =
1.1       lukem      98:        { 1, NULL, filt_kqdetach, filt_kqueue };
1.3       jdolecek   99: static const struct filterops proc_filtops =
1.1       lukem     100:        { 0, filt_procattach, filt_procdetach, filt_proc };
1.3       jdolecek  101: static const struct filterops file_filtops =
1.1       lukem     102:        { 1, filt_fileattach, NULL, NULL };
1.8       jdolecek  103: static struct filterops timer_filtops =
                    104:        { 0, filt_timerattach, filt_timerdetach, filt_timer };
1.1       lukem     105:
1.20      simonb    106: POOL_INIT(kqueue_pool, sizeof(struct kqueue), 0, 0, 0, "kqueuepl", NULL);
                    107: POOL_INIT(knote_pool, sizeof(struct knote), 0, 0, 0, "knotepl", NULL);
1.8       jdolecek  108: static int     kq_ncallouts = 0;
                    109: static int     kq_calloutmax = (4 * 1024);
1.7       thorpej   110:
                    111: MALLOC_DEFINE(M_KEVENT, "kevent", "kevents/knotes");
1.1       lukem     112:
1.3       jdolecek  113: #define        KNOTE_ACTIVATE(kn)                                              \
                    114: do {                                                                   \
1.1       lukem     115:        kn->kn_status |= KN_ACTIVE;                                     \
                    116:        if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)           \
                    117:                knote_enqueue(kn);                                      \
                    118: } while(0)
                    119:
                    120: #define        KN_HASHSIZE             64              /* XXX should be tunable */
1.3       jdolecek  121: #define        KN_HASH(val, mask)      (((val) ^ (val >> 8)) & (mask))
1.1       lukem     122:
1.3       jdolecek  123: extern const struct filterops sig_filtops;
1.1       lukem     124:
                    125: /*
                    126:  * Table for for all system-defined filters.
1.3       jdolecek  127:  * These should be listed in the numeric order of the EVFILT_* defines.
                    128:  * If filtops is NULL, the filter isn't implemented in NetBSD.
                    129:  * End of list is when name is NULL.
1.1       lukem     130:  */
1.3       jdolecek  131: struct kfilter {
                    132:        const char       *name;         /* name of filter */
                    133:        uint32_t          filter;       /* id of filter */
                    134:        const struct filterops *filtops;/* operations for filter */
                    135: };
                    136:
                    137:                /* System defined filters */
                    138: static const struct kfilter sys_kfilters[] = {
                    139:        { "EVFILT_READ",        EVFILT_READ,    &file_filtops },
                    140:        { "EVFILT_WRITE",       EVFILT_WRITE,   &file_filtops },
                    141:        { "EVFILT_AIO",         EVFILT_AIO,     NULL },
                    142:        { "EVFILT_VNODE",       EVFILT_VNODE,   &file_filtops },
                    143:        { "EVFILT_PROC",        EVFILT_PROC,    &proc_filtops },
                    144:        { "EVFILT_SIGNAL",      EVFILT_SIGNAL,  &sig_filtops },
1.8       jdolecek  145:        { "EVFILT_TIMER",       EVFILT_TIMER,   &timer_filtops },
1.22      perry     146:        { NULL,                 0,              NULL }, /* end of list */
1.1       lukem     147: };
                    148:
1.3       jdolecek  149:                /* User defined kfilters */
                    150: static struct kfilter  *user_kfilters;         /* array */
                    151: static int             user_kfilterc;          /* current offset */
                    152: static int             user_kfiltermaxc;       /* max size so far */
                    153:
                    154: /*
                    155:  * Find kfilter entry by name, or NULL if not found.
                    156:  */
                    157: static const struct kfilter *
                    158: kfilter_byname_sys(const char *name)
                    159: {
                    160:        int i;
                    161:
                    162:        for (i = 0; sys_kfilters[i].name != NULL; i++) {
                    163:                if (strcmp(name, sys_kfilters[i].name) == 0)
                    164:                        return (&sys_kfilters[i]);
                    165:        }
                    166:        return (NULL);
                    167: }
                    168:
                    169: static struct kfilter *
                    170: kfilter_byname_user(const char *name)
                    171: {
                    172:        int i;
                    173:
                    174:        /* user_kfilters[] could be NULL if no filters were registered */
                    175:        if (!user_kfilters)
                    176:                return (NULL);
                    177:
                    178:        for (i = 0; user_kfilters[i].name != NULL; i++) {
                    179:                if (user_kfilters[i].name != '\0' &&
                    180:                    strcmp(name, user_kfilters[i].name) == 0)
                    181:                        return (&user_kfilters[i]);
                    182:        }
                    183:        return (NULL);
                    184: }
                    185:
                    186: static const struct kfilter *
                    187: kfilter_byname(const char *name)
                    188: {
                    189:        const struct kfilter *kfilter;
                    190:
                    191:        if ((kfilter = kfilter_byname_sys(name)) != NULL)
                    192:                return (kfilter);
                    193:
                    194:        return (kfilter_byname_user(name));
                    195: }
                    196:
                    197: /*
                    198:  * Find kfilter entry by filter id, or NULL if not found.
                    199:  * Assumes entries are indexed in filter id order, for speed.
                    200:  */
                    201: static const struct kfilter *
                    202: kfilter_byfilter(uint32_t filter)
                    203: {
                    204:        const struct kfilter *kfilter;
                    205:
                    206:        if (filter < EVFILT_SYSCOUNT)   /* it's a system filter */
                    207:                kfilter = &sys_kfilters[filter];
                    208:        else if (user_kfilters != NULL &&
                    209:            filter < EVFILT_SYSCOUNT + user_kfilterc)
                    210:                                        /* it's a user filter */
                    211:                kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
                    212:        else
                    213:                return (NULL);          /* out of range */
                    214:        KASSERT(kfilter->filter == filter);     /* sanity check! */
                    215:        return (kfilter);
                    216: }
                    217:
                    218: /*
                    219:  * Register a new kfilter. Stores the entry in user_kfilters.
                    220:  * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
                    221:  * If retfilter != NULL, the new filterid is returned in it.
                    222:  */
                    223: int
                    224: kfilter_register(const char *name, const struct filterops *filtops,
                    225:     int *retfilter)
1.1       lukem     226: {
1.3       jdolecek  227:        struct kfilter *kfilter;
                    228:        void *space;
                    229:        int len;
                    230:
                    231:        if (name == NULL || name[0] == '\0' || filtops == NULL)
                    232:                return (EINVAL);        /* invalid args */
                    233:        if (kfilter_byname(name) != NULL)
                    234:                return (EEXIST);        /* already exists */
                    235:        if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT)
                    236:                return (EINVAL);        /* too many */
                    237:
                    238:        /* check if need to grow user_kfilters */
                    239:        if (user_kfilterc + 1 > user_kfiltermaxc) {
                    240:                /*
                    241:                 * Grow in KFILTER_EXTENT chunks. Use malloc(9), because we
                    242:                 * want to traverse user_kfilters as an array.
                    243:                 */
                    244:                user_kfiltermaxc += KFILTER_EXTENT;
                    245:                kfilter = malloc(user_kfiltermaxc * sizeof(struct filter *),
                    246:                    M_KEVENT, M_WAITOK);
                    247:
                    248:                /* copy existing user_kfilters */
                    249:                if (user_kfilters != NULL)
                    250:                        memcpy((caddr_t)kfilter, (caddr_t)user_kfilters,
                    251:                            user_kfilterc * sizeof(struct kfilter *));
                    252:                                        /* zero new sections */
                    253:                memset((caddr_t)kfilter +
                    254:                    user_kfilterc * sizeof(struct kfilter *), 0,
                    255:                    (user_kfiltermaxc - user_kfilterc) *
                    256:                    sizeof(struct kfilter *));
                    257:                                        /* switch to new kfilter */
                    258:                if (user_kfilters != NULL)
                    259:                        free(user_kfilters, M_KEVENT);
                    260:                user_kfilters = kfilter;
                    261:        }
                    262:        len = strlen(name) + 1;         /* copy name */
                    263:        space = malloc(len, M_KEVENT, M_WAITOK);
                    264:        memcpy(space, name, len);
                    265:        user_kfilters[user_kfilterc].name = space;
                    266:
                    267:        user_kfilters[user_kfilterc].filter = user_kfilterc + EVFILT_SYSCOUNT;
                    268:
                    269:        len = sizeof(struct filterops); /* copy filtops */
                    270:        space = malloc(len, M_KEVENT, M_WAITOK);
                    271:        memcpy(space, filtops, len);
                    272:        user_kfilters[user_kfilterc].filtops = space;
                    273:
                    274:        if (retfilter != NULL)
                    275:                *retfilter = user_kfilters[user_kfilterc].filter;
                    276:        user_kfilterc++;                /* finally, increment count */
                    277:        return (0);
1.1       lukem     278: }
                    279:
1.3       jdolecek  280: /*
                    281:  * Unregister a kfilter previously registered with kfilter_register.
                    282:  * This retains the filter id, but clears the name and frees filtops (filter
                    283:  * operations), so that the number isn't reused during a boot.
                    284:  * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
                    285:  */
                    286: int
                    287: kfilter_unregister(const char *name)
1.1       lukem     288: {
1.3       jdolecek  289:        struct kfilter *kfilter;
                    290:
                    291:        if (name == NULL || name[0] == '\0')
                    292:                return (EINVAL);        /* invalid name */
                    293:
                    294:        if (kfilter_byname_sys(name) != NULL)
                    295:                return (EINVAL);        /* can't detach system filters */
1.1       lukem     296:
1.3       jdolecek  297:        kfilter = kfilter_byname_user(name);
                    298:        if (kfilter == NULL)            /* not found */
                    299:                return (ENOENT);
1.1       lukem     300:
1.3       jdolecek  301:        if (kfilter->name[0] != '\0') {
1.23      christos  302:                /* XXXUNCONST Cast away const (but we know it's safe. */
                    303:                free(__UNCONST(kfilter->name), M_KEVENT);
1.3       jdolecek  304:                kfilter->name = "";     /* mark as `not implemented' */
                    305:        }
                    306:        if (kfilter->filtops != NULL) {
1.23      christos  307:                /* XXXUNCONST Cast away const (but we know it's safe. */
                    308:                free(__UNCONST(kfilter->filtops), M_KEVENT);
1.3       jdolecek  309:                kfilter->filtops = NULL; /* mark as `not implemented' */
                    310:        }
1.1       lukem     311:        return (0);
                    312: }
                    313:
1.3       jdolecek  314:
                    315: /*
                    316:  * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
                    317:  * descriptors. Calls struct fileops kqfilter method for given file descriptor.
                    318:  */
                    319: static int
                    320: filt_fileattach(struct knote *kn)
                    321: {
                    322:        struct file *fp;
                    323:
                    324:        fp = kn->kn_fp;
                    325:        return ((*fp->f_ops->fo_kqfilter)(fp, kn));
                    326: }
                    327:
                    328: /*
                    329:  * Filter detach method for EVFILT_READ on kqueue descriptor.
                    330:  */
1.1       lukem     331: static void
                    332: filt_kqdetach(struct knote *kn)
                    333: {
1.3       jdolecek  334:        struct kqueue *kq;
1.1       lukem     335:
1.3       jdolecek  336:        kq = (struct kqueue *)kn->kn_fp->f_data;
1.5       christos  337:        SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext);
1.1       lukem     338: }
                    339:
1.3       jdolecek  340: /*
                    341:  * Filter event method for EVFILT_READ on kqueue descriptor.
                    342:  */
1.1       lukem     343: /*ARGSUSED*/
                    344: static int
                    345: filt_kqueue(struct knote *kn, long hint)
                    346: {
1.3       jdolecek  347:        struct kqueue *kq;
1.1       lukem     348:
1.3       jdolecek  349:        kq = (struct kqueue *)kn->kn_fp->f_data;
1.1       lukem     350:        kn->kn_data = kq->kq_count;
                    351:        return (kn->kn_data > 0);
                    352: }
                    353:
1.3       jdolecek  354: /*
                    355:  * Filter attach method for EVFILT_PROC.
                    356:  */
1.1       lukem     357: static int
                    358: filt_procattach(struct knote *kn)
                    359: {
                    360:        struct proc *p;
                    361:
                    362:        p = pfind(kn->kn_id);
                    363:        if (p == NULL)
                    364:                return (ESRCH);
1.3       jdolecek  365:
                    366:        /*
                    367:         * Fail if it's not owned by you, or the last exec gave us
                    368:         * setuid/setgid privs (unless you're root).
                    369:         */
1.25.10.1! elad      370:        if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(curproc->p_cred) ||
1.3       jdolecek  371:                (p->p_flag & P_SUGID))
1.25.10.1! elad      372:            && generic_authorize(curproc->p_cred, KAUTH_GENERIC_ISSUSER,
        !           373:                                 &curproc->p_acflag) != 0)
1.3       jdolecek  374:                return (EACCES);
1.1       lukem     375:
                    376:        kn->kn_ptr.p_proc = p;
1.3       jdolecek  377:        kn->kn_flags |= EV_CLEAR;       /* automatically set */
1.1       lukem     378:
                    379:        /*
                    380:         * internal flag indicating registration done by kernel
                    381:         */
                    382:        if (kn->kn_flags & EV_FLAG1) {
1.3       jdolecek  383:                kn->kn_data = kn->kn_sdata;     /* ppid */
1.1       lukem     384:                kn->kn_fflags = NOTE_CHILD;
                    385:                kn->kn_flags &= ~EV_FLAG1;
                    386:        }
                    387:
1.3       jdolecek  388:        /* XXXSMP lock the process? */
1.1       lukem     389:        SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
                    390:
                    391:        return (0);
                    392: }
                    393:
                    394: /*
1.3       jdolecek  395:  * Filter detach method for EVFILT_PROC.
                    396:  *
1.1       lukem     397:  * The knote may be attached to a different process, which may exit,
                    398:  * leaving nothing for the knote to be attached to.  So when the process
                    399:  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
                    400:  * it will be deleted when read out.  However, as part of the knote deletion,
                    401:  * this routine is called, so a check is needed to avoid actually performing
1.3       jdolecek  402:  * a detach, because the original process might not exist any more.
1.1       lukem     403:  */
                    404: static void
                    405: filt_procdetach(struct knote *kn)
                    406: {
1.3       jdolecek  407:        struct proc *p;
1.1       lukem     408:
                    409:        if (kn->kn_status & KN_DETACHED)
                    410:                return;
                    411:
1.3       jdolecek  412:        p = kn->kn_ptr.p_proc;
1.18      jdolecek  413:        KASSERT(p->p_stat == SZOMB || pfind(kn->kn_id) == p);
1.3       jdolecek  414:
                    415:        /* XXXSMP lock the process? */
1.1       lukem     416:        SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
                    417: }
                    418:
1.3       jdolecek  419: /*
                    420:  * Filter event method for EVFILT_PROC.
                    421:  */
1.1       lukem     422: static int
                    423: filt_proc(struct knote *kn, long hint)
                    424: {
                    425:        u_int event;
                    426:
                    427:        /*
                    428:         * mask off extra data
                    429:         */
                    430:        event = (u_int)hint & NOTE_PCTRLMASK;
                    431:
                    432:        /*
                    433:         * if the user is interested in this event, record it.
                    434:         */
                    435:        if (kn->kn_sfflags & event)
                    436:                kn->kn_fflags |= event;
                    437:
                    438:        /*
                    439:         * process is gone, so flag the event as finished.
                    440:         */
                    441:        if (event == NOTE_EXIT) {
1.3       jdolecek  442:                /*
                    443:                 * Detach the knote from watched process and mark
                    444:                 * it as such. We can't leave this to kqueue_scan(),
                    445:                 * since the process might not exist by then. And we
                    446:                 * have to do this now, since psignal KNOTE() is called
                    447:                 * also for zombies and we might end up reading freed
                    448:                 * memory if the kevent would already be picked up
1.22      perry     449:                 * and knote g/c'ed.
1.3       jdolecek  450:                 */
                    451:                kn->kn_fop->f_detach(kn);
1.1       lukem     452:                kn->kn_status |= KN_DETACHED;
1.3       jdolecek  453:
                    454:                /* Mark as ONESHOT, so that the knote it g/c'ed when read */
1.22      perry     455:                kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1.1       lukem     456:                return (1);
                    457:        }
                    458:
                    459:        /*
                    460:         * process forked, and user wants to track the new process,
                    461:         * so attach a new knote to it, and immediately report an
                    462:         * event with the parent's pid.
                    463:         */
                    464:        if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
                    465:                struct kevent kev;
                    466:                int error;
                    467:
                    468:                /*
                    469:                 * register knote with new process.
                    470:                 */
                    471:                kev.ident = hint & NOTE_PDATAMASK;      /* pid */
                    472:                kev.filter = kn->kn_filter;
                    473:                kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
                    474:                kev.fflags = kn->kn_sfflags;
                    475:                kev.data = kn->kn_id;                   /* parent */
                    476:                kev.udata = kn->kn_kevent.udata;        /* preserve udata */
                    477:                error = kqueue_register(kn->kn_kq, &kev, NULL);
                    478:                if (error)
                    479:                        kn->kn_fflags |= NOTE_TRACKERR;
                    480:        }
                    481:
                    482:        return (kn->kn_fflags != 0);
1.8       jdolecek  483: }
                    484:
                    485: static void
                    486: filt_timerexpire(void *knx)
                    487: {
                    488:        struct knote *kn = knx;
                    489:        int tticks;
                    490:
                    491:        kn->kn_data++;
                    492:        KNOTE_ACTIVATE(kn);
                    493:
                    494:        if ((kn->kn_flags & EV_ONESHOT) == 0) {
                    495:                tticks = mstohz(kn->kn_sdata);
                    496:                callout_schedule((struct callout *)kn->kn_hook, tticks);
                    497:        }
                    498: }
                    499:
                    500: /*
                    501:  * data contains amount of time to sleep, in milliseconds
1.22      perry     502:  */
1.8       jdolecek  503: static int
                    504: filt_timerattach(struct knote *kn)
                    505: {
                    506:        struct callout *calloutp;
                    507:        int tticks;
                    508:
                    509:        if (kq_ncallouts >= kq_calloutmax)
                    510:                return (ENOMEM);
                    511:        kq_ncallouts++;
                    512:
                    513:        tticks = mstohz(kn->kn_sdata);
                    514:
                    515:        /* if the supplied value is under our resolution, use 1 tick */
                    516:        if (tticks == 0) {
                    517:                if (kn->kn_sdata == 0)
                    518:                        return (EINVAL);
                    519:                tticks = 1;
                    520:        }
                    521:
                    522:        kn->kn_flags |= EV_CLEAR;               /* automatically set */
                    523:        MALLOC(calloutp, struct callout *, sizeof(*calloutp),
                    524:            M_KEVENT, 0);
                    525:        callout_init(calloutp);
                    526:        callout_reset(calloutp, tticks, filt_timerexpire, kn);
                    527:        kn->kn_hook = calloutp;
                    528:
                    529:        return (0);
                    530: }
                    531:
                    532: static void
                    533: filt_timerdetach(struct knote *kn)
                    534: {
                    535:        struct callout *calloutp;
                    536:
                    537:        calloutp = (struct callout *)kn->kn_hook;
                    538:        callout_stop(calloutp);
                    539:        FREE(calloutp, M_KEVENT);
                    540:        kq_ncallouts--;
                    541: }
                    542:
                    543: static int
                    544: filt_timer(struct knote *kn, long hint)
                    545: {
                    546:        return (kn->kn_data != 0);
1.1       lukem     547: }
                    548:
1.3       jdolecek  549: /*
                    550:  * filt_seltrue:
                    551:  *
                    552:  *     This filter "event" routine simulates seltrue().
                    553:  */
1.1       lukem     554: int
1.3       jdolecek  555: filt_seltrue(struct knote *kn, long hint)
1.1       lukem     556: {
                    557:
1.3       jdolecek  558:        /*
                    559:         * We don't know how much data can be read/written,
                    560:         * but we know that it *can* be.  This is about as
                    561:         * good as select/poll does as well.
                    562:         */
                    563:        kn->kn_data = 0;
                    564:        return (1);
                    565: }
                    566:
                    567: /*
                    568:  * This provides full kqfilter entry for device switch tables, which
                    569:  * has same effect as filter using filt_seltrue() as filter method.
                    570:  */
                    571: static void
                    572: filt_seltruedetach(struct knote *kn)
                    573: {
                    574:        /* Nothing to do */
                    575: }
                    576:
                    577: static const struct filterops seltrue_filtops =
                    578:        { 1, NULL, filt_seltruedetach, filt_seltrue };
                    579:
                    580: int
                    581: seltrue_kqfilter(dev_t dev, struct knote *kn)
                    582: {
                    583:        switch (kn->kn_filter) {
                    584:        case EVFILT_READ:
                    585:        case EVFILT_WRITE:
                    586:                kn->kn_fop = &seltrue_filtops;
                    587:                break;
                    588:        default:
                    589:                return (1);
                    590:        }
                    591:
                    592:        /* Nothing more to do */
                    593:        return (0);
                    594: }
                    595:
                    596: /*
                    597:  * kqueue(2) system call.
                    598:  */
                    599: int
1.6       thorpej   600: sys_kqueue(struct lwp *l, void *v, register_t *retval)
1.3       jdolecek  601: {
                    602:        struct filedesc *fdp;
                    603:        struct kqueue   *kq;
                    604:        struct file     *fp;
1.6       thorpej   605:        struct proc     *p;
1.3       jdolecek  606:        int             fd, error;
                    607:
1.6       thorpej   608:        p = l->l_proc;
1.3       jdolecek  609:        fdp = p->p_fd;
                    610:        error = falloc(p, &fp, &fd);    /* setup a new file descriptor */
1.1       lukem     611:        if (error)
                    612:                return (error);
                    613:        fp->f_flag = FREAD | FWRITE;
                    614:        fp->f_type = DTYPE_KQUEUE;
                    615:        fp->f_ops = &kqueueops;
1.3       jdolecek  616:        kq = pool_get(&kqueue_pool, PR_WAITOK);
                    617:        memset((char *)kq, 0, sizeof(struct kqueue));
1.12      pk        618:        simple_lock_init(&kq->kq_lock);
1.1       lukem     619:        TAILQ_INIT(&kq->kq_head);
1.3       jdolecek  620:        fp->f_data = (caddr_t)kq;       /* store the kqueue with the fp */
                    621:        *retval = fd;
1.1       lukem     622:        if (fdp->fd_knlistsize < 0)
1.3       jdolecek  623:                fdp->fd_knlistsize = 0; /* this process has a kq */
1.1       lukem     624:        kq->kq_fdp = fdp;
1.3       jdolecek  625:        FILE_SET_MATURE(fp);
1.25      christos  626:        FILE_UNUSE(fp, l);              /* falloc() does FILE_USE() */
1.1       lukem     627:        return (error);
                    628: }
                    629:
1.3       jdolecek  630: /*
                    631:  * kevent(2) system call.
                    632:  */
1.24      cube      633: static int
                    634: kevent_fetch_changes(void *private, const struct kevent *changelist,
                    635:     struct kevent *changes, size_t index, int n)
                    636: {
                    637:        return copyin(changelist + index, changes, n * sizeof(*changes));
                    638: }
                    639:
                    640: static int
                    641: kevent_put_events(void *private, struct kevent *events,
                    642:     struct kevent *eventlist, size_t index, int n)
                    643: {
                    644:        return copyout(events, eventlist + index, n * sizeof(*events));
                    645: }
                    646:
                    647: static const struct kevent_ops kevent_native_ops = {
                    648:        keo_private: NULL,
                    649:        keo_fetch_timeout: copyin,
                    650:        keo_fetch_changes: kevent_fetch_changes,
                    651:        keo_put_events: kevent_put_events,
                    652: };
                    653:
1.1       lukem     654: int
1.6       thorpej   655: sys_kevent(struct lwp *l, void *v, register_t *retval)
1.1       lukem     656: {
1.3       jdolecek  657:        struct sys_kevent_args /* {
                    658:                syscallarg(int) fd;
                    659:                syscallarg(const struct kevent *) changelist;
                    660:                syscallarg(size_t) nchanges;
                    661:                syscallarg(struct kevent *) eventlist;
                    662:                syscallarg(size_t) nevents;
                    663:                syscallarg(const struct timespec *) timeout;
                    664:        } */ *uap = v;
1.24      cube      665:
                    666:        return kevent1(l, retval, SCARG(uap, fd), SCARG(uap, changelist),
                    667:            SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
                    668:            SCARG(uap, timeout), &kevent_native_ops);
                    669: }
                    670:
                    671: int
                    672: kevent1(struct lwp *l, register_t *retval, int fd,
                    673:     const struct kevent *changelist, size_t nchanges, struct kevent *eventlist,
                    674:     size_t nevents, const struct timespec *timeout,
                    675:     const struct kevent_ops *keops)
                    676: {
1.3       jdolecek  677:        struct kevent   *kevp;
                    678:        struct kqueue   *kq;
                    679:        struct file     *fp;
                    680:        struct timespec ts;
1.6       thorpej   681:        struct proc     *p;
1.24      cube      682:        size_t          i, n, ichange;
1.3       jdolecek  683:        int             nerrors, error;
                    684:
1.6       thorpej   685:        p = l->l_proc;
1.3       jdolecek  686:        /* check that we're dealing with a kq */
1.24      cube      687:        fp = fd_getfile(p->p_fd, fd);
1.10      pk        688:        if (fp == NULL)
1.1       lukem     689:                return (EBADF);
1.10      pk        690:
                    691:        if (fp->f_type != DTYPE_KQUEUE) {
                    692:                simple_unlock(&fp->f_slock);
                    693:                return (EBADF);
                    694:        }
1.1       lukem     695:
1.3       jdolecek  696:        FILE_USE(fp);
1.1       lukem     697:
1.24      cube      698:        if (timeout != NULL) {
                    699:                error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts));
1.1       lukem     700:                if (error)
                    701:                        goto done;
1.24      cube      702:                timeout = &ts;
1.1       lukem     703:        }
                    704:
                    705:        kq = (struct kqueue *)fp->f_data;
                    706:        nerrors = 0;
1.24      cube      707:        ichange = 0;
1.1       lukem     708:
1.3       jdolecek  709:        /* traverse list of events to register */
1.24      cube      710:        while (nchanges > 0) {
1.3       jdolecek  711:                /* copyin a maximum of KQ_EVENTS at each pass */
1.24      cube      712:                n = MIN(nchanges, KQ_NEVENTS);
                    713:                error = (*keops->keo_fetch_changes)(keops->keo_private,
                    714:                    changelist, kq->kq_kev, ichange, n);
1.1       lukem     715:                if (error)
                    716:                        goto done;
                    717:                for (i = 0; i < n; i++) {
                    718:                        kevp = &kq->kq_kev[i];
                    719:                        kevp->flags &= ~EV_SYSFLAGS;
1.3       jdolecek  720:                        /* register each knote */
1.25      christos  721:                        error = kqueue_register(kq, kevp, l);
1.1       lukem     722:                        if (error) {
1.24      cube      723:                                if (nevents != 0) {
1.1       lukem     724:                                        kevp->flags = EV_ERROR;
                    725:                                        kevp->data = error;
1.24      cube      726:                                        error = (*keops->keo_put_events)
                    727:                                            (keops->keo_private, kevp,
                    728:                                            eventlist, nerrors, 1);
1.3       jdolecek  729:                                        if (error)
                    730:                                                goto done;
1.24      cube      731:                                        nevents--;
1.1       lukem     732:                                        nerrors++;
                    733:                                } else {
                    734:                                        goto done;
                    735:                                }
                    736:                        }
                    737:                }
1.24      cube      738:                nchanges -= n;  /* update the results */
                    739:                ichange += n;
1.1       lukem     740:        }
                    741:        if (nerrors) {
1.3       jdolecek  742:                *retval = nerrors;
1.1       lukem     743:                error = 0;
                    744:                goto done;
                    745:        }
                    746:
1.3       jdolecek  747:        /* actually scan through the events */
1.25      christos  748:        error = kqueue_scan(fp, nevents, eventlist, timeout, l, retval, keops);
1.3       jdolecek  749:  done:
1.25      christos  750:        FILE_UNUSE(fp, l);
1.1       lukem     751:        return (error);
                    752: }
                    753:
1.3       jdolecek  754: /*
                    755:  * Register a given kevent kev onto the kqueue
                    756:  */
1.1       lukem     757: int
1.25      christos  758: kqueue_register(struct kqueue *kq, struct kevent *kev, struct lwp *l)
1.1       lukem     759: {
1.3       jdolecek  760:        const struct kfilter *kfilter;
                    761:        struct filedesc *fdp;
1.16      fvdl      762:        struct file     *fp;
1.15      darrenr   763:        struct knote    *kn;
1.3       jdolecek  764:        int             s, error;
                    765:
                    766:        fdp = kq->kq_fdp;
                    767:        fp = NULL;
                    768:        kn = NULL;
                    769:        error = 0;
                    770:        kfilter = kfilter_byfilter(kev->filter);
                    771:        if (kfilter == NULL || kfilter->filtops == NULL) {
                    772:                /* filter not found nor implemented */
1.1       lukem     773:                return (EINVAL);
                    774:        }
                    775:
1.3       jdolecek  776:        /* search if knote already exists */
                    777:        if (kfilter->filtops->f_isfd) {
                    778:                /* monitoring a file descriptor */
                    779:                if ((fp = fd_getfile(fdp, kev->ident)) == NULL)
                    780:                        return (EBADF); /* validate descriptor */
                    781:                FILE_USE(fp);
1.1       lukem     782:
                    783:                if (kev->ident < fdp->fd_knlistsize) {
                    784:                        SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
                    785:                                if (kq == kn->kn_kq &&
                    786:                                    kev->filter == kn->kn_filter)
                    787:                                        break;
                    788:                }
                    789:        } else {
1.3       jdolecek  790:                /*
                    791:                 * not monitoring a file descriptor, so
                    792:                 * lookup knotes in internal hash table
                    793:                 */
1.1       lukem     794:                if (fdp->fd_knhashmask != 0) {
                    795:                        struct klist *list;
1.22      perry     796:
1.1       lukem     797:                        list = &fdp->fd_knhash[
                    798:                            KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
                    799:                        SLIST_FOREACH(kn, list, kn_link)
                    800:                                if (kev->ident == kn->kn_id &&
                    801:                                    kq == kn->kn_kq &&
                    802:                                    kev->filter == kn->kn_filter)
                    803:                                        break;
                    804:                }
                    805:        }
                    806:
                    807:        if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
1.3       jdolecek  808:                error = ENOENT;         /* filter not found */
1.1       lukem     809:                goto done;
                    810:        }
                    811:
                    812:        /*
                    813:         * kn now contains the matching knote, or NULL if no match
                    814:         */
                    815:        if (kev->flags & EV_ADD) {
1.3       jdolecek  816:                /* add knote */
1.1       lukem     817:
                    818:                if (kn == NULL) {
1.3       jdolecek  819:                        /* create new knote */
                    820:                        kn = pool_get(&knote_pool, PR_WAITOK);
1.1       lukem     821:                        if (kn == NULL) {
                    822:                                error = ENOMEM;
                    823:                                goto done;
                    824:                        }
                    825:                        kn->kn_fp = fp;
                    826:                        kn->kn_kq = kq;
1.3       jdolecek  827:                        kn->kn_fop = kfilter->filtops;
1.1       lukem     828:
                    829:                        /*
                    830:                         * apply reference count to knote structure, and
                    831:                         * do not release it at the end of this routine.
                    832:                         */
                    833:                        fp = NULL;
                    834:
                    835:                        kn->kn_sfflags = kev->fflags;
                    836:                        kn->kn_sdata = kev->data;
                    837:                        kev->fflags = 0;
                    838:                        kev->data = 0;
                    839:                        kn->kn_kevent = *kev;
                    840:
                    841:                        knote_attach(kn, fdp);
1.3       jdolecek  842:                        if ((error = kfilter->filtops->f_attach(kn)) != 0) {
1.25      christos  843:                                knote_drop(kn, l, fdp);
1.1       lukem     844:                                goto done;
                    845:                        }
                    846:                } else {
1.3       jdolecek  847:                        /* modify existing knote */
                    848:
1.1       lukem     849:                        /*
                    850:                         * The user may change some filter values after the
1.22      perry     851:                         * initial EV_ADD, but doing so will not reset any
1.1       lukem     852:                         * filter which have already been triggered.
                    853:                         */
                    854:                        kn->kn_sfflags = kev->fflags;
                    855:                        kn->kn_sdata = kev->data;
                    856:                        kn->kn_kevent.udata = kev->udata;
                    857:                }
                    858:
1.11      pk        859:                s = splsched();
1.1       lukem     860:                if (kn->kn_fop->f_event(kn, 0))
                    861:                        KNOTE_ACTIVATE(kn);
                    862:                splx(s);
                    863:
1.3       jdolecek  864:        } else if (kev->flags & EV_DELETE) {    /* delete knote */
1.1       lukem     865:                kn->kn_fop->f_detach(kn);
1.25      christos  866:                knote_drop(kn, l, fdp);
1.1       lukem     867:                goto done;
                    868:        }
                    869:
1.3       jdolecek  870:        /* disable knote */
1.1       lukem     871:        if ((kev->flags & EV_DISABLE) &&
                    872:            ((kn->kn_status & KN_DISABLED) == 0)) {
1.11      pk        873:                s = splsched();
1.1       lukem     874:                kn->kn_status |= KN_DISABLED;
                    875:                splx(s);
                    876:        }
                    877:
1.3       jdolecek  878:        /* enable knote */
1.1       lukem     879:        if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
1.11      pk        880:                s = splsched();
1.1       lukem     881:                kn->kn_status &= ~KN_DISABLED;
                    882:                if ((kn->kn_status & KN_ACTIVE) &&
                    883:                    ((kn->kn_status & KN_QUEUED) == 0))
                    884:                        knote_enqueue(kn);
                    885:                splx(s);
                    886:        }
                    887:
1.3       jdolecek  888:  done:
1.1       lukem     889:        if (fp != NULL)
1.25      christos  890:                FILE_UNUSE(fp, l);
1.1       lukem     891:        return (error);
                    892: }
                    893:
1.3       jdolecek  894: /*
                    895:  * Scan through the list of events on fp (for a maximum of maxevents),
                    896:  * returning the results in to ulistp. Timeout is determined by tsp; if
                    897:  * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
                    898:  * as appropriate.
                    899:  */
1.1       lukem     900: static int
1.3       jdolecek  901: kqueue_scan(struct file *fp, size_t maxevents, struct kevent *ulistp,
1.25      christos  902:     const struct timespec *tsp, struct lwp *l, register_t *retval,
1.24      cube      903:     const struct kevent_ops *keops)
1.1       lukem     904: {
1.25      christos  905:        struct proc     *p = l->l_proc;
1.3       jdolecek  906:        struct kqueue   *kq;
                    907:        struct kevent   *kevp;
                    908:        struct timeval  atv;
1.19      jdolecek  909:        struct knote    *kn, *marker=NULL;
1.24      cube      910:        size_t          count, nkev, nevents;
1.3       jdolecek  911:        int             s, timeout, error;
1.1       lukem     912:
1.3       jdolecek  913:        kq = (struct kqueue *)fp->f_data;
1.1       lukem     914:        count = maxevents;
1.24      cube      915:        nkev = nevents = error = 0;
1.1       lukem     916:        if (count == 0)
                    917:                goto done;
                    918:
1.9       jdolecek  919:        if (tsp) {                              /* timeout supplied */
1.1       lukem     920:                TIMESPEC_TO_TIMEVAL(&atv, tsp);
                    921:                if (itimerfix(&atv)) {
                    922:                        error = EINVAL;
                    923:                        goto done;
                    924:                }
1.3       jdolecek  925:                s = splclock();
                    926:                timeradd(&atv, &time, &atv);    /* calc. time to wait until */
                    927:                splx(s);
1.9       jdolecek  928:                timeout = hzto(&atv);
                    929:                if (timeout <= 0)
                    930:                        timeout = -1;           /* do poll */
1.1       lukem     931:        } else {
1.9       jdolecek  932:                /* no timeout, wait forever */
1.1       lukem     933:                timeout = 0;
                    934:        }
1.19      jdolecek  935:
                    936:        MALLOC(marker, struct knote *, sizeof(*marker), M_KEVENT, M_WAITOK);
                    937:        memset(marker, 0, sizeof(*marker));
                    938:
1.1       lukem     939:        goto start;
                    940:
1.3       jdolecek  941:  retry:
1.9       jdolecek  942:        if (tsp) {
                    943:                /*
                    944:                 * We have to recalculate the timeout on every retry.
                    945:                 */
                    946:                timeout = hzto(&atv);
                    947:                if (timeout <= 0)
                    948:                        goto done;
1.1       lukem     949:        }
                    950:
1.3       jdolecek  951:  start:
1.1       lukem     952:        kevp = kq->kq_kev;
1.11      pk        953:        s = splsched();
1.12      pk        954:        simple_lock(&kq->kq_lock);
1.1       lukem     955:        if (kq->kq_count == 0) {
1.22      perry     956:                if (timeout < 0) {
1.1       lukem     957:                        error = EWOULDBLOCK;
1.17      fvdl      958:                        simple_unlock(&kq->kq_lock);
1.1       lukem     959:                } else {
                    960:                        kq->kq_state |= KQ_SLEEP;
1.12      pk        961:                        error = ltsleep(kq, PSOCK | PCATCH | PNORELOCK,
                    962:                                        "kqread", timeout, &kq->kq_lock);
1.1       lukem     963:                }
                    964:                splx(s);
                    965:                if (error == 0)
                    966:                        goto retry;
                    967:                /* don't restart after signals... */
                    968:                if (error == ERESTART)
                    969:                        error = EINTR;
                    970:                else if (error == EWOULDBLOCK)
                    971:                        error = 0;
                    972:                goto done;
                    973:        }
                    974:
1.3       jdolecek  975:        /* mark end of knote list */
1.22      perry     976:        TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1.12      pk        977:        simple_unlock(&kq->kq_lock);
1.3       jdolecek  978:
                    979:        while (count) {                         /* while user wants data ... */
1.12      pk        980:                simple_lock(&kq->kq_lock);
1.3       jdolecek  981:                kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
1.22      perry     982:                TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1.19      jdolecek  983:                if (kn == marker) {             /* if it's our marker, stop */
1.12      pk        984:                        /* What if it's some else's marker? */
                    985:                        simple_unlock(&kq->kq_lock);
1.1       lukem     986:                        splx(s);
                    987:                        if (count == maxevents)
                    988:                                goto retry;
                    989:                        goto done;
                    990:                }
1.12      pk        991:                kq->kq_count--;
                    992:                simple_unlock(&kq->kq_lock);
                    993:
1.1       lukem     994:                if (kn->kn_status & KN_DISABLED) {
1.3       jdolecek  995:                        /* don't want disabled events */
1.1       lukem     996:                        kn->kn_status &= ~KN_QUEUED;
                    997:                        continue;
                    998:                }
                    999:                if ((kn->kn_flags & EV_ONESHOT) == 0 &&
                   1000:                    kn->kn_fop->f_event(kn, 0) == 0) {
1.3       jdolecek 1001:                        /*
                   1002:                         * non-ONESHOT event that hasn't
                   1003:                         * triggered again, so de-queue.
                   1004:                         */
1.1       lukem    1005:                        kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
                   1006:                        continue;
                   1007:                }
                   1008:                *kevp = kn->kn_kevent;
                   1009:                kevp++;
                   1010:                nkev++;
                   1011:                if (kn->kn_flags & EV_ONESHOT) {
1.3       jdolecek 1012:                        /* delete ONESHOT events after retrieval */
1.1       lukem    1013:                        kn->kn_status &= ~KN_QUEUED;
                   1014:                        splx(s);
                   1015:                        kn->kn_fop->f_detach(kn);
1.25      christos 1016:                        knote_drop(kn, l, p->p_fd);
1.11      pk       1017:                        s = splsched();
1.1       lukem    1018:                } else if (kn->kn_flags & EV_CLEAR) {
1.3       jdolecek 1019:                        /* clear state after retrieval */
1.1       lukem    1020:                        kn->kn_data = 0;
                   1021:                        kn->kn_fflags = 0;
                   1022:                        kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
                   1023:                } else {
1.3       jdolecek 1024:                        /* add event back on list */
1.12      pk       1025:                        simple_lock(&kq->kq_lock);
1.22      perry    1026:                        TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1.12      pk       1027:                        kq->kq_count++;
                   1028:                        simple_unlock(&kq->kq_lock);
1.1       lukem    1029:                }
                   1030:                count--;
                   1031:                if (nkev == KQ_NEVENTS) {
1.3       jdolecek 1032:                        /* do copyouts in KQ_NEVENTS chunks */
1.1       lukem    1033:                        splx(s);
1.24      cube     1034:                        error = (*keops->keo_put_events)(keops->keo_private,
                   1035:                            &kq->kq_kev[0], ulistp, nevents, nkev);
                   1036:                        nevents += nkev;
1.1       lukem    1037:                        nkev = 0;
                   1038:                        kevp = kq->kq_kev;
1.11      pk       1039:                        s = splsched();
1.1       lukem    1040:                        if (error)
                   1041:                                break;
                   1042:                }
                   1043:        }
1.3       jdolecek 1044:
                   1045:        /* remove marker */
1.12      pk       1046:        simple_lock(&kq->kq_lock);
1.22      perry    1047:        TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
1.12      pk       1048:        simple_unlock(&kq->kq_lock);
1.1       lukem    1049:        splx(s);
1.3       jdolecek 1050:  done:
1.19      jdolecek 1051:        if (marker)
                   1052:                FREE(marker, M_KEVENT);
                   1053:
1.24      cube     1054:        if (nkev != 0)
1.3       jdolecek 1055:                /* copyout remaining events */
1.24      cube     1056:                error = (*keops->keo_put_events)(keops->keo_private,
                   1057:                    &kq->kq_kev[0], ulistp, nevents, nkev);
1.3       jdolecek 1058:        *retval = maxevents - count;
                   1059:
1.1       lukem    1060:        return (error);
                   1061: }
                   1062:
                   1063: /*
1.3       jdolecek 1064:  * struct fileops read method for a kqueue descriptor.
                   1065:  * Not implemented.
                   1066:  * XXX: This could be expanded to call kqueue_scan, if desired.
1.1       lukem    1067:  */
                   1068: /*ARGSUSED*/
                   1069: static int
1.3       jdolecek 1070: kqueue_read(struct file *fp, off_t *offset, struct uio *uio,
1.25.10.1! elad     1071:        kauth_cred_t cred, int flags)
1.1       lukem    1072: {
1.3       jdolecek 1073:
1.1       lukem    1074:        return (ENXIO);
                   1075: }
                   1076:
1.3       jdolecek 1077: /*
                   1078:  * struct fileops write method for a kqueue descriptor.
                   1079:  * Not implemented.
                   1080:  */
1.1       lukem    1081: /*ARGSUSED*/
                   1082: static int
1.3       jdolecek 1083: kqueue_write(struct file *fp, off_t *offset, struct uio *uio,
1.25.10.1! elad     1084:        kauth_cred_t cred, int flags)
1.1       lukem    1085: {
1.3       jdolecek 1086:
1.1       lukem    1087:        return (ENXIO);
                   1088: }
                   1089:
1.3       jdolecek 1090: /*
                   1091:  * struct fileops ioctl method for a kqueue descriptor.
                   1092:  *
                   1093:  * Two ioctls are currently supported. They both use struct kfilter_mapping:
                   1094:  *     KFILTER_BYNAME          find name for filter, and return result in
                   1095:  *                             name, which is of size len.
                   1096:  *     KFILTER_BYFILTER        find filter for name. len is ignored.
                   1097:  */
1.1       lukem    1098: /*ARGSUSED*/
                   1099: static int
1.25      christos 1100: kqueue_ioctl(struct file *fp, u_long com, void *data, struct lwp *l)
1.1       lukem    1101: {
1.3       jdolecek 1102:        struct kfilter_mapping  *km;
                   1103:        const struct kfilter    *kfilter;
                   1104:        char                    *name;
                   1105:        int                     error;
                   1106:
1.22      perry    1107:        km = (struct kfilter_mapping *)data;
1.3       jdolecek 1108:        error = 0;
                   1109:
                   1110:        switch (com) {
                   1111:        case KFILTER_BYFILTER:  /* convert filter -> name */
                   1112:                kfilter = kfilter_byfilter(km->filter);
                   1113:                if (kfilter != NULL)
                   1114:                        error = copyoutstr(kfilter->name, km->name, km->len,
                   1115:                            NULL);
                   1116:                else
                   1117:                        error = ENOENT;
                   1118:                break;
                   1119:
                   1120:        case KFILTER_BYNAME:    /* convert name -> filter */
                   1121:                MALLOC(name, char *, KFILTER_MAXNAME, M_KEVENT, M_WAITOK);
                   1122:                error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
                   1123:                if (error) {
                   1124:                        FREE(name, M_KEVENT);
                   1125:                        break;
                   1126:                }
                   1127:                kfilter = kfilter_byname(name);
                   1128:                if (kfilter != NULL)
                   1129:                        km->filter = kfilter->filter;
                   1130:                else
                   1131:                        error = ENOENT;
                   1132:                FREE(name, M_KEVENT);
                   1133:                break;
                   1134:
                   1135:        default:
                   1136:                error = ENOTTY;
                   1137:
                   1138:        }
                   1139:        return (error);
                   1140: }
                   1141:
                   1142: /*
                   1143:  * struct fileops fcntl method for a kqueue descriptor.
                   1144:  * Not implemented.
                   1145:  */
                   1146: /*ARGSUSED*/
                   1147: static int
1.25      christos 1148: kqueue_fcntl(struct file *fp, u_int com, void *data, struct lwp *l)
1.3       jdolecek 1149: {
                   1150:
1.1       lukem    1151:        return (ENOTTY);
                   1152: }
                   1153:
1.3       jdolecek 1154: /*
                   1155:  * struct fileops poll method for a kqueue descriptor.
                   1156:  * Determine if kqueue has events pending.
                   1157:  */
1.1       lukem    1158: static int
1.25      christos 1159: kqueue_poll(struct file *fp, int events, struct lwp *l)
1.1       lukem    1160: {
1.3       jdolecek 1161:        struct kqueue   *kq;
                   1162:        int             revents;
                   1163:
                   1164:        kq = (struct kqueue *)fp->f_data;
                   1165:        revents = 0;
                   1166:        if (events & (POLLIN | POLLRDNORM)) {
                   1167:                if (kq->kq_count) {
                   1168:                        revents |= events & (POLLIN | POLLRDNORM);
1.1       lukem    1169:                } else {
1.25      christos 1170:                        selrecord(l, &kq->kq_sel);
1.1       lukem    1171:                }
                   1172:        }
                   1173:        return (revents);
                   1174: }
                   1175:
1.3       jdolecek 1176: /*
                   1177:  * struct fileops stat method for a kqueue descriptor.
                   1178:  * Returns dummy info, with st_size being number of events pending.
                   1179:  */
1.1       lukem    1180: static int
1.25      christos 1181: kqueue_stat(struct file *fp, struct stat *st, struct lwp *l)
1.1       lukem    1182: {
1.3       jdolecek 1183:        struct kqueue   *kq;
1.1       lukem    1184:
1.3       jdolecek 1185:        kq = (struct kqueue *)fp->f_data;
                   1186:        memset((void *)st, 0, sizeof(*st));
1.1       lukem    1187:        st->st_size = kq->kq_count;
                   1188:        st->st_blksize = sizeof(struct kevent);
                   1189:        st->st_mode = S_IFIFO;
                   1190:        return (0);
                   1191: }
                   1192:
1.3       jdolecek 1193: /*
                   1194:  * struct fileops close method for a kqueue descriptor.
                   1195:  * Cleans up kqueue.
                   1196:  */
1.1       lukem    1197: static int
1.25      christos 1198: kqueue_close(struct file *fp, struct lwp *l)
1.1       lukem    1199: {
1.25      christos 1200:        struct proc     *p = l->l_proc;
1.3       jdolecek 1201:        struct kqueue   *kq;
1.16      fvdl     1202:        struct filedesc *fdp;
1.15      darrenr  1203:        struct knote    **knp, *kn, *kn0;
1.3       jdolecek 1204:        int             i;
1.1       lukem    1205:
1.3       jdolecek 1206:        kq = (struct kqueue *)fp->f_data;
                   1207:        fdp = p->p_fd;
1.1       lukem    1208:        for (i = 0; i < fdp->fd_knlistsize; i++) {
                   1209:                knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
                   1210:                kn = *knp;
                   1211:                while (kn != NULL) {
                   1212:                        kn0 = SLIST_NEXT(kn, kn_link);
                   1213:                        if (kq == kn->kn_kq) {
                   1214:                                kn->kn_fop->f_detach(kn);
1.25      christos 1215:                                FILE_UNUSE(kn->kn_fp, l);
1.3       jdolecek 1216:                                pool_put(&knote_pool, kn);
1.1       lukem    1217:                                *knp = kn0;
                   1218:                        } else {
                   1219:                                knp = &SLIST_NEXT(kn, kn_link);
                   1220:                        }
                   1221:                        kn = kn0;
                   1222:                }
                   1223:        }
                   1224:        if (fdp->fd_knhashmask != 0) {
                   1225:                for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
                   1226:                        knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
                   1227:                        kn = *knp;
                   1228:                        while (kn != NULL) {
                   1229:                                kn0 = SLIST_NEXT(kn, kn_link);
                   1230:                                if (kq == kn->kn_kq) {
                   1231:                                        kn->kn_fop->f_detach(kn);
1.3       jdolecek 1232:                                        /* XXX non-fd release of kn->kn_ptr */
                   1233:                                        pool_put(&knote_pool, kn);
1.1       lukem    1234:                                        *knp = kn0;
                   1235:                                } else {
                   1236:                                        knp = &SLIST_NEXT(kn, kn_link);
                   1237:                                }
                   1238:                                kn = kn0;
                   1239:                        }
                   1240:                }
                   1241:        }
1.3       jdolecek 1242:        pool_put(&kqueue_pool, kq);
1.1       lukem    1243:        fp->f_data = NULL;
                   1244:
                   1245:        return (0);
                   1246: }
                   1247:
1.3       jdolecek 1248: /*
                   1249:  * wakeup a kqueue
                   1250:  */
1.1       lukem    1251: static void
                   1252: kqueue_wakeup(struct kqueue *kq)
                   1253: {
1.12      pk       1254:        int s;
1.1       lukem    1255:
1.12      pk       1256:        s = splsched();
                   1257:        simple_lock(&kq->kq_lock);
1.3       jdolecek 1258:        if (kq->kq_state & KQ_SLEEP) {          /* if currently sleeping ...  */
1.1       lukem    1259:                kq->kq_state &= ~KQ_SLEEP;
1.3       jdolecek 1260:                wakeup(kq);                     /* ... wakeup */
1.1       lukem    1261:        }
1.3       jdolecek 1262:
                   1263:        /* Notify select/poll and kevent. */
                   1264:        selnotify(&kq->kq_sel, 0);
1.12      pk       1265:        simple_unlock(&kq->kq_lock);
                   1266:        splx(s);
1.1       lukem    1267: }
                   1268:
                   1269: /*
1.3       jdolecek 1270:  * struct fileops kqfilter method for a kqueue descriptor.
                   1271:  * Event triggered when monitored kqueue changes.
                   1272:  */
                   1273: /*ARGSUSED*/
                   1274: static int
                   1275: kqueue_kqfilter(struct file *fp, struct knote *kn)
                   1276: {
                   1277:        struct kqueue *kq;
                   1278:
                   1279:        KASSERT(fp == kn->kn_fp);
                   1280:        kq = (struct kqueue *)kn->kn_fp->f_data;
                   1281:        if (kn->kn_filter != EVFILT_READ)
                   1282:                return (1);
                   1283:        kn->kn_fop = &kqread_filtops;
1.5       christos 1284:        SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext);
1.3       jdolecek 1285:        return (0);
                   1286: }
                   1287:
                   1288:
                   1289: /*
                   1290:  * Walk down a list of knotes, activating them if their event has triggered.
1.1       lukem    1291:  */
                   1292: void
                   1293: knote(struct klist *list, long hint)
                   1294: {
                   1295:        struct knote *kn;
                   1296:
                   1297:        SLIST_FOREACH(kn, list, kn_selnext)
                   1298:                if (kn->kn_fop->f_event(kn, hint))
                   1299:                        KNOTE_ACTIVATE(kn);
                   1300: }
                   1301:
                   1302: /*
1.3       jdolecek 1303:  * Remove all knotes from a specified klist
1.1       lukem    1304:  */
                   1305: void
1.25      christos 1306: knote_remove(struct lwp *l, struct klist *list)
1.1       lukem    1307: {
                   1308:        struct knote *kn;
                   1309:
                   1310:        while ((kn = SLIST_FIRST(list)) != NULL) {
                   1311:                kn->kn_fop->f_detach(kn);
1.25      christos 1312:                knote_drop(kn, l, l->l_proc->p_fd);
1.1       lukem    1313:        }
                   1314: }
                   1315:
                   1316: /*
1.3       jdolecek 1317:  * Remove all knotes referencing a specified fd
1.1       lukem    1318:  */
                   1319: void
1.25      christos 1320: knote_fdclose(struct lwp *l, int fd)
1.1       lukem    1321: {
1.3       jdolecek 1322:        struct filedesc *fdp;
                   1323:        struct klist    *list;
1.1       lukem    1324:
1.25      christos 1325:        fdp = l->l_proc->p_fd;
1.3       jdolecek 1326:        list = &fdp->fd_knlist[fd];
1.25      christos 1327:        knote_remove(l, list);
1.1       lukem    1328: }
                   1329:
1.3       jdolecek 1330: /*
                   1331:  * Attach a new knote to a file descriptor
                   1332:  */
1.1       lukem    1333: static void
                   1334: knote_attach(struct knote *kn, struct filedesc *fdp)
                   1335: {
1.3       jdolecek 1336:        struct klist    *list;
                   1337:        int             size;
1.1       lukem    1338:
                   1339:        if (! kn->kn_fop->f_isfd) {
1.3       jdolecek 1340:                /* if knote is not on an fd, store on internal hash table */
1.1       lukem    1341:                if (fdp->fd_knhashmask == 0)
1.3       jdolecek 1342:                        fdp->fd_knhash = hashinit(KN_HASHSIZE, HASH_LIST,
                   1343:                            M_KEVENT, M_WAITOK, &fdp->fd_knhashmask);
1.1       lukem    1344:                list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
                   1345:                goto done;
                   1346:        }
                   1347:
1.3       jdolecek 1348:        /*
                   1349:         * otherwise, knote is on an fd.
                   1350:         * knotes are stored in fd_knlist indexed by kn->kn_id.
                   1351:         */
1.1       lukem    1352:        if (fdp->fd_knlistsize <= kn->kn_id) {
1.3       jdolecek 1353:                /* expand list, it's too small */
1.1       lukem    1354:                size = fdp->fd_knlistsize;
1.3       jdolecek 1355:                while (size <= kn->kn_id) {
                   1356:                        /* grow in KQ_EXTENT chunks */
                   1357:                        size += KQ_EXTENT;
                   1358:                }
                   1359:                list = malloc(size * sizeof(struct klist *), M_KEVENT,M_WAITOK);
                   1360:                if (fdp->fd_knlist) {
                   1361:                        /* copy existing knlist */
                   1362:                        memcpy((caddr_t)list, (caddr_t)fdp->fd_knlist,
                   1363:                            fdp->fd_knlistsize * sizeof(struct klist *));
                   1364:                }
                   1365:                /*
                   1366:                 * Zero new memory. Stylistically, SLIST_INIT() should be
                   1367:                 * used here, but that does same thing as the memset() anyway.
                   1368:                 */
                   1369:                memset(&list[fdp->fd_knlistsize], 0,
1.1       lukem    1370:                    (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1.3       jdolecek 1371:
                   1372:                /* switch to new knlist */
1.1       lukem    1373:                if (fdp->fd_knlist != NULL)
1.3       jdolecek 1374:                        free(fdp->fd_knlist, M_KEVENT);
1.1       lukem    1375:                fdp->fd_knlistsize = size;
                   1376:                fdp->fd_knlist = list;
                   1377:        }
1.3       jdolecek 1378:
                   1379:        /* get list head for this fd */
1.1       lukem    1380:        list = &fdp->fd_knlist[kn->kn_id];
1.3       jdolecek 1381:  done:
                   1382:        /* add new knote */
1.1       lukem    1383:        SLIST_INSERT_HEAD(list, kn, kn_link);
                   1384:        kn->kn_status = 0;
                   1385: }
                   1386:
                   1387: /*
1.3       jdolecek 1388:  * Drop knote.
                   1389:  * Should be called at spl == 0, since we don't want to hold spl
                   1390:  * while calling FILE_UNUSE and free.
1.1       lukem    1391:  */
                   1392: static void
1.25      christos 1393: knote_drop(struct knote *kn, struct lwp *l, struct filedesc *fdp)
1.1       lukem    1394: {
1.3       jdolecek 1395:        struct klist    *list;
1.1       lukem    1396:
                   1397:        if (kn->kn_fop->f_isfd)
                   1398:                list = &fdp->fd_knlist[kn->kn_id];
                   1399:        else
                   1400:                list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
                   1401:
                   1402:        SLIST_REMOVE(list, kn, knote, kn_link);
                   1403:        if (kn->kn_status & KN_QUEUED)
                   1404:                knote_dequeue(kn);
                   1405:        if (kn->kn_fop->f_isfd)
1.25      christos 1406:                FILE_UNUSE(kn->kn_fp, l);
1.3       jdolecek 1407:        pool_put(&knote_pool, kn);
1.1       lukem    1408: }
                   1409:
                   1410:
1.3       jdolecek 1411: /*
                   1412:  * Queue new event for knote.
                   1413:  */
1.1       lukem    1414: static void
                   1415: knote_enqueue(struct knote *kn)
                   1416: {
1.3       jdolecek 1417:        struct kqueue   *kq;
                   1418:        int             s;
1.1       lukem    1419:
1.3       jdolecek 1420:        kq = kn->kn_kq;
                   1421:        KASSERT((kn->kn_status & KN_QUEUED) == 0);
1.1       lukem    1422:
1.12      pk       1423:        s = splsched();
                   1424:        simple_lock(&kq->kq_lock);
1.22      perry    1425:        TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1.1       lukem    1426:        kn->kn_status |= KN_QUEUED;
                   1427:        kq->kq_count++;
1.12      pk       1428:        simple_unlock(&kq->kq_lock);
1.1       lukem    1429:        splx(s);
                   1430:        kqueue_wakeup(kq);
                   1431: }
                   1432:
1.3       jdolecek 1433: /*
                   1434:  * Dequeue event for knote.
                   1435:  */
1.1       lukem    1436: static void
                   1437: knote_dequeue(struct knote *kn)
                   1438: {
1.3       jdolecek 1439:        struct kqueue   *kq;
                   1440:        int             s;
1.1       lukem    1441:
1.12      pk       1442:        KASSERT(kn->kn_status & KN_QUEUED);
1.3       jdolecek 1443:        kq = kn->kn_kq;
1.12      pk       1444:
1.11      pk       1445:        s = splsched();
1.12      pk       1446:        simple_lock(&kq->kq_lock);
1.22      perry    1447:        TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1.1       lukem    1448:        kn->kn_status &= ~KN_QUEUED;
                   1449:        kq->kq_count--;
1.12      pk       1450:        simple_unlock(&kq->kq_lock);
1.1       lukem    1451:        splx(s);
                   1452: }

CVSweb <webmaster@jp.NetBSD.org>