Annotation of src/sys/kern/kern_event.c, Revision 1.123
1.123 ! thorpej 1: /* $NetBSD: kern_event.c,v 1.122 2021/09/26 03:12:50 thorpej Exp $ */
1.49 ad 2:
3: /*-
1.64 ad 4: * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
1.49 ad 5: * All rights reserved.
6: *
1.64 ad 7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Andrew Doran.
9: *
1.49 ad 10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
1.28 kardel 31:
1.1 lukem 32: /*-
33: * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
1.108 christos 34: * Copyright (c) 2009 Apple, Inc
1.1 lukem 35: * All rights reserved.
36: *
37: * Redistribution and use in source and binary forms, with or without
38: * modification, are permitted provided that the following conditions
39: * are met:
40: * 1. Redistributions of source code must retain the above copyright
41: * notice, this list of conditions and the following disclaimer.
42: * 2. Redistributions in binary form must reproduce the above copyright
43: * notice, this list of conditions and the following disclaimer in the
44: * documentation and/or other materials provided with the distribution.
45: *
46: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
47: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
50: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56: * SUCH DAMAGE.
57: *
1.49 ad 58: * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp
1.1 lukem 59: */
1.14 jdolecek 60:
61: #include <sys/cdefs.h>
1.123 ! thorpej 62: __KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.122 2021/09/26 03:12:50 thorpej Exp $");
1.1 lukem 63:
64: #include <sys/param.h>
65: #include <sys/systm.h>
66: #include <sys/kernel.h>
1.86 christos 67: #include <sys/wait.h>
1.1 lukem 68: #include <sys/proc.h>
69: #include <sys/file.h>
1.3 jdolecek 70: #include <sys/select.h>
1.1 lukem 71: #include <sys/queue.h>
72: #include <sys/event.h>
73: #include <sys/eventvar.h>
74: #include <sys/poll.h>
1.49 ad 75: #include <sys/kmem.h>
1.1 lukem 76: #include <sys/stat.h>
1.3 jdolecek 77: #include <sys/filedesc.h>
78: #include <sys/syscallargs.h>
1.27 elad 79: #include <sys/kauth.h>
1.40 ad 80: #include <sys/conf.h>
1.49 ad 81: #include <sys/atomic.h>
1.1 lukem 82:
1.49 ad 83: static int kqueue_scan(file_t *, size_t, struct kevent *,
84: const struct timespec *, register_t *,
85: const struct kevent_ops *, struct kevent *,
86: size_t);
87: static int kqueue_ioctl(file_t *, u_long, void *);
88: static int kqueue_fcntl(file_t *, u_int, void *);
89: static int kqueue_poll(file_t *, int);
90: static int kqueue_kqfilter(file_t *, struct knote *);
91: static int kqueue_stat(file_t *, struct stat *);
92: static int kqueue_close(file_t *);
1.118 jdolecek 93: static void kqueue_restart(file_t *);
1.49 ad 94: static int kqueue_register(struct kqueue *, struct kevent *);
95: static void kqueue_doclose(struct kqueue *, struct klist *, int);
96:
97: static void knote_detach(struct knote *, filedesc_t *fdp, bool);
98: static void knote_enqueue(struct knote *);
99: static void knote_activate(struct knote *);
100:
101: static void filt_kqdetach(struct knote *);
102: static int filt_kqueue(struct knote *, long hint);
103: static int filt_procattach(struct knote *);
104: static void filt_procdetach(struct knote *);
105: static int filt_proc(struct knote *, long hint);
106: static int filt_fileattach(struct knote *);
107: static void filt_timerexpire(void *x);
108: static int filt_timerattach(struct knote *);
109: static void filt_timerdetach(struct knote *);
110: static int filt_timer(struct knote *, long hint);
1.102 christos 111: static int filt_fsattach(struct knote *kn);
112: static void filt_fsdetach(struct knote *kn);
113: static int filt_fs(struct knote *kn, long hint);
1.108 christos 114: static int filt_userattach(struct knote *);
115: static void filt_userdetach(struct knote *);
116: static int filt_user(struct knote *, long hint);
117: static void filt_usertouch(struct knote *, struct kevent *, long type);
1.1 lukem 118:
1.21 christos 119: static const struct fileops kqueueops = {
1.101 christos 120: .fo_name = "kqueue",
1.64 ad 121: .fo_read = (void *)enxio,
122: .fo_write = (void *)enxio,
123: .fo_ioctl = kqueue_ioctl,
124: .fo_fcntl = kqueue_fcntl,
125: .fo_poll = kqueue_poll,
126: .fo_stat = kqueue_stat,
127: .fo_close = kqueue_close,
128: .fo_kqfilter = kqueue_kqfilter,
1.118 jdolecek 129: .fo_restart = kqueue_restart,
1.1 lukem 130: };
131:
1.96 maya 132: static const struct filterops kqread_filtops = {
1.123 ! thorpej 133: .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
1.96 maya 134: .f_attach = NULL,
135: .f_detach = filt_kqdetach,
136: .f_event = filt_kqueue,
137: };
138:
139: static const struct filterops proc_filtops = {
1.121 thorpej 140: .f_flags = 0,
1.96 maya 141: .f_attach = filt_procattach,
142: .f_detach = filt_procdetach,
143: .f_event = filt_proc,
144: };
145:
1.122 thorpej 146: /*
147: * file_filtops is not marked MPSAFE because it's going to call
148: * fileops::fo_kqfilter(), which might not be. That function,
149: * however, will override the knote's filterops, and thus will
150: * inherit the MPSAFE-ness of the back-end at that time.
151: */
1.96 maya 152: static const struct filterops file_filtops = {
1.121 thorpej 153: .f_flags = FILTEROP_ISFD,
1.96 maya 154: .f_attach = filt_fileattach,
155: .f_detach = NULL,
156: .f_event = NULL,
157: };
158:
159: static const struct filterops timer_filtops = {
1.121 thorpej 160: .f_flags = 0,
1.96 maya 161: .f_attach = filt_timerattach,
162: .f_detach = filt_timerdetach,
163: .f_event = filt_timer,
164: };
1.1 lukem 165:
1.102 christos 166: static const struct filterops fs_filtops = {
1.121 thorpej 167: .f_flags = 0,
1.102 christos 168: .f_attach = filt_fsattach,
169: .f_detach = filt_fsdetach,
170: .f_event = filt_fs,
171: };
172:
1.108 christos 173: static const struct filterops user_filtops = {
1.123 ! thorpej 174: .f_flags = FILTEROP_MPSAFE,
1.108 christos 175: .f_attach = filt_userattach,
176: .f_detach = filt_userdetach,
177: .f_event = filt_user,
178: .f_touch = filt_usertouch,
179: };
180:
1.49 ad 181: static u_int kq_ncallouts = 0;
1.8 jdolecek 182: static int kq_calloutmax = (4 * 1024);
1.7 thorpej 183:
1.1 lukem 184: #define KN_HASHSIZE 64 /* XXX should be tunable */
1.3 jdolecek 185: #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
1.1 lukem 186:
1.3 jdolecek 187: extern const struct filterops sig_filtops;
1.1 lukem 188:
1.111 jdolecek 189: #define KQ_FLUX_WAKEUP(kq) cv_broadcast(&kq->kq_cv)
190:
1.1 lukem 191: /*
192: * Table for for all system-defined filters.
1.3 jdolecek 193: * These should be listed in the numeric order of the EVFILT_* defines.
194: * If filtops is NULL, the filter isn't implemented in NetBSD.
195: * End of list is when name is NULL.
1.93 riastrad 196: *
1.49 ad 197: * Note that 'refcnt' is meaningless for built-in filters.
1.1 lukem 198: */
1.3 jdolecek 199: struct kfilter {
1.49 ad 200: const char *name; /* name of filter */
201: uint32_t filter; /* id of filter */
202: unsigned refcnt; /* reference count */
1.3 jdolecek 203: const struct filterops *filtops;/* operations for filter */
1.49 ad 204: size_t namelen; /* length of name string */
1.3 jdolecek 205: };
206:
1.49 ad 207: /* System defined filters */
208: static struct kfilter sys_kfilters[] = {
209: { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 },
210: { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, },
211: { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 },
212: { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 },
213: { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 },
214: { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 },
215: { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 },
1.102 christos 216: { "EVFILT_FS", EVFILT_FS, 0, &fs_filtops, 0 },
1.108 christos 217: { "EVFILT_USER", EVFILT_USER, 0, &user_filtops, 0 },
1.49 ad 218: { NULL, 0, 0, NULL, 0 },
1.1 lukem 219: };
220:
1.49 ad 221: /* User defined kfilters */
1.3 jdolecek 222: static struct kfilter *user_kfilters; /* array */
223: static int user_kfilterc; /* current offset */
224: static int user_kfiltermaxc; /* max size so far */
1.49 ad 225: static size_t user_kfiltersz; /* size of allocated memory */
226:
1.95 riastrad 227: /*
228: * Global Locks.
229: *
230: * Lock order:
231: *
232: * kqueue_filter_lock
233: * -> kn_kq->kq_fdp->fd_lock
234: * -> object lock (e.g., device driver lock, kqueue_misc_lock, &c.)
235: * -> kn_kq->kq_lock
236: *
237: * Locking rules:
238: *
239: * f_attach: fdp->fd_lock, KERNEL_LOCK
240: * f_detach: fdp->fd_lock, KERNEL_LOCK
241: * f_event(!NOTE_SUBMIT) via kevent: fdp->fd_lock, _no_ object lock
242: * f_event via knote: whatever caller guarantees
243: * Typically, f_event(NOTE_SUBMIT) via knote: object lock
244: * f_event(!NOTE_SUBMIT) via knote: nothing,
245: * acquires/releases object lock inside.
246: */
1.49 ad 247: static krwlock_t kqueue_filter_lock; /* lock on filter lists */
248: static kmutex_t kqueue_misc_lock; /* miscellaneous */
249:
1.122 thorpej 250: static int
251: filter_attach(struct knote *kn)
252: {
253: int rv;
254:
255: KASSERT(kn->kn_fop != NULL);
256: KASSERT(kn->kn_fop->f_attach != NULL);
257:
258: /*
259: * N.B. that kn->kn_fop may change as the result of calling
260: * f_attach().
261: */
262: if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
263: rv = kn->kn_fop->f_attach(kn);
264: } else {
265: KERNEL_LOCK(1, NULL);
266: rv = kn->kn_fop->f_attach(kn);
267: KERNEL_UNLOCK_ONE(NULL);
268: }
269:
270: return rv;
271: }
272:
273: static void
274: filter_detach(struct knote *kn)
275: {
276: KASSERT(kn->kn_fop != NULL);
277: KASSERT(kn->kn_fop->f_detach != NULL);
278:
279: if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
280: kn->kn_fop->f_detach(kn);
281: } else {
282: KERNEL_LOCK(1, NULL);
283: kn->kn_fop->f_detach(kn);
284: KERNEL_UNLOCK_ONE(NULL);
285: }
286: }
287:
288: static int
289: filter_event(struct knote *kn, long hint)
290: {
291: int rv;
292:
293: KASSERT(kn->kn_fop != NULL);
294: KASSERT(kn->kn_fop->f_event != NULL);
295:
296: if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
297: rv = kn->kn_fop->f_event(kn, hint);
298: } else {
299: KERNEL_LOCK(1, NULL);
300: rv = kn->kn_fop->f_event(kn, hint);
301: KERNEL_UNLOCK_ONE(NULL);
302: }
303:
304: return rv;
305: }
306:
307: static void
308: filter_touch(struct knote *kn, struct kevent *kev, long type)
309: {
310: kn->kn_fop->f_touch(kn, kev, type);
311: }
312:
1.66 elad 313: static kauth_listener_t kqueue_listener;
314:
315: static int
316: kqueue_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
317: void *arg0, void *arg1, void *arg2, void *arg3)
318: {
319: struct proc *p;
320: int result;
321:
322: result = KAUTH_RESULT_DEFER;
323: p = arg0;
324:
325: if (action != KAUTH_PROCESS_KEVENT_FILTER)
326: return result;
327:
328: if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(cred) ||
329: ISSET(p->p_flag, PK_SUGID)))
330: return result;
331:
332: result = KAUTH_RESULT_ALLOW;
333:
334: return result;
335: }
336:
1.49 ad 337: /*
338: * Initialize the kqueue subsystem.
339: */
340: void
341: kqueue_init(void)
342: {
343:
344: rw_init(&kqueue_filter_lock);
345: mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE);
1.66 elad 346:
347: kqueue_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
348: kqueue_listener_cb, NULL);
1.49 ad 349: }
1.3 jdolecek 350:
351: /*
352: * Find kfilter entry by name, or NULL if not found.
353: */
1.49 ad 354: static struct kfilter *
1.3 jdolecek 355: kfilter_byname_sys(const char *name)
356: {
357: int i;
358:
1.49 ad 359: KASSERT(rw_lock_held(&kqueue_filter_lock));
360:
1.3 jdolecek 361: for (i = 0; sys_kfilters[i].name != NULL; i++) {
362: if (strcmp(name, sys_kfilters[i].name) == 0)
1.49 ad 363: return &sys_kfilters[i];
1.3 jdolecek 364: }
1.49 ad 365: return NULL;
1.3 jdolecek 366: }
367:
368: static struct kfilter *
369: kfilter_byname_user(const char *name)
370: {
371: int i;
372:
1.49 ad 373: KASSERT(rw_lock_held(&kqueue_filter_lock));
374:
1.31 seanb 375: /* user filter slots have a NULL name if previously deregistered */
376: for (i = 0; i < user_kfilterc ; i++) {
377: if (user_kfilters[i].name != NULL &&
1.3 jdolecek 378: strcmp(name, user_kfilters[i].name) == 0)
1.49 ad 379: return &user_kfilters[i];
1.3 jdolecek 380: }
1.49 ad 381: return NULL;
1.3 jdolecek 382: }
383:
1.49 ad 384: static struct kfilter *
1.3 jdolecek 385: kfilter_byname(const char *name)
386: {
1.49 ad 387: struct kfilter *kfilter;
388:
389: KASSERT(rw_lock_held(&kqueue_filter_lock));
1.3 jdolecek 390:
391: if ((kfilter = kfilter_byname_sys(name)) != NULL)
1.49 ad 392: return kfilter;
1.3 jdolecek 393:
1.49 ad 394: return kfilter_byname_user(name);
1.3 jdolecek 395: }
396:
397: /*
398: * Find kfilter entry by filter id, or NULL if not found.
399: * Assumes entries are indexed in filter id order, for speed.
400: */
1.49 ad 401: static struct kfilter *
1.3 jdolecek 402: kfilter_byfilter(uint32_t filter)
403: {
1.49 ad 404: struct kfilter *kfilter;
405:
406: KASSERT(rw_lock_held(&kqueue_filter_lock));
1.3 jdolecek 407:
408: if (filter < EVFILT_SYSCOUNT) /* it's a system filter */
409: kfilter = &sys_kfilters[filter];
410: else if (user_kfilters != NULL &&
411: filter < EVFILT_SYSCOUNT + user_kfilterc)
412: /* it's a user filter */
413: kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
414: else
415: return (NULL); /* out of range */
416: KASSERT(kfilter->filter == filter); /* sanity check! */
417: return (kfilter);
418: }
419:
420: /*
421: * Register a new kfilter. Stores the entry in user_kfilters.
422: * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
423: * If retfilter != NULL, the new filterid is returned in it.
424: */
425: int
426: kfilter_register(const char *name, const struct filterops *filtops,
1.49 ad 427: int *retfilter)
1.1 lukem 428: {
1.3 jdolecek 429: struct kfilter *kfilter;
1.49 ad 430: size_t len;
1.31 seanb 431: int i;
1.3 jdolecek 432:
433: if (name == NULL || name[0] == '\0' || filtops == NULL)
434: return (EINVAL); /* invalid args */
1.49 ad 435:
436: rw_enter(&kqueue_filter_lock, RW_WRITER);
437: if (kfilter_byname(name) != NULL) {
438: rw_exit(&kqueue_filter_lock);
1.3 jdolecek 439: return (EEXIST); /* already exists */
1.49 ad 440: }
441: if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) {
442: rw_exit(&kqueue_filter_lock);
1.3 jdolecek 443: return (EINVAL); /* too many */
1.49 ad 444: }
1.3 jdolecek 445:
1.31 seanb 446: for (i = 0; i < user_kfilterc; i++) {
447: kfilter = &user_kfilters[i];
448: if (kfilter->name == NULL) {
449: /* Previously deregistered slot. Reuse. */
450: goto reuse;
451: }
452: }
453:
1.3 jdolecek 454: /* check if need to grow user_kfilters */
455: if (user_kfilterc + 1 > user_kfiltermaxc) {
1.49 ad 456: /* Grow in KFILTER_EXTENT chunks. */
1.3 jdolecek 457: user_kfiltermaxc += KFILTER_EXTENT;
1.69 dsl 458: len = user_kfiltermaxc * sizeof(*kfilter);
1.49 ad 459: kfilter = kmem_alloc(len, KM_SLEEP);
460: memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz);
461: if (user_kfilters != NULL) {
462: memcpy(kfilter, user_kfilters, user_kfiltersz);
463: kmem_free(user_kfilters, user_kfiltersz);
464: }
465: user_kfiltersz = len;
1.3 jdolecek 466: user_kfilters = kfilter;
467: }
1.31 seanb 468: /* Adding new slot */
469: kfilter = &user_kfilters[user_kfilterc++];
470: reuse:
1.97 christos 471: kfilter->name = kmem_strdupsize(name, &kfilter->namelen, KM_SLEEP);
1.3 jdolecek 472:
1.31 seanb 473: kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT;
1.3 jdolecek 474:
1.49 ad 475: kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP);
476: memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops));
1.3 jdolecek 477:
478: if (retfilter != NULL)
1.31 seanb 479: *retfilter = kfilter->filter;
1.49 ad 480: rw_exit(&kqueue_filter_lock);
481:
1.3 jdolecek 482: return (0);
1.1 lukem 483: }
484:
1.3 jdolecek 485: /*
486: * Unregister a kfilter previously registered with kfilter_register.
487: * This retains the filter id, but clears the name and frees filtops (filter
488: * operations), so that the number isn't reused during a boot.
489: * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
490: */
491: int
492: kfilter_unregister(const char *name)
1.1 lukem 493: {
1.3 jdolecek 494: struct kfilter *kfilter;
495:
496: if (name == NULL || name[0] == '\0')
497: return (EINVAL); /* invalid name */
498:
1.49 ad 499: rw_enter(&kqueue_filter_lock, RW_WRITER);
500: if (kfilter_byname_sys(name) != NULL) {
501: rw_exit(&kqueue_filter_lock);
1.3 jdolecek 502: return (EINVAL); /* can't detach system filters */
1.49 ad 503: }
1.1 lukem 504:
1.3 jdolecek 505: kfilter = kfilter_byname_user(name);
1.49 ad 506: if (kfilter == NULL) {
507: rw_exit(&kqueue_filter_lock);
1.3 jdolecek 508: return (ENOENT);
1.49 ad 509: }
510: if (kfilter->refcnt != 0) {
511: rw_exit(&kqueue_filter_lock);
512: return (EBUSY);
513: }
1.1 lukem 514:
1.49 ad 515: /* Cast away const (but we know it's safe. */
516: kmem_free(__UNCONST(kfilter->name), kfilter->namelen);
1.31 seanb 517: kfilter->name = NULL; /* mark as `not implemented' */
518:
1.3 jdolecek 519: if (kfilter->filtops != NULL) {
1.49 ad 520: /* Cast away const (but we know it's safe. */
521: kmem_free(__UNCONST(kfilter->filtops),
522: sizeof(*kfilter->filtops));
1.3 jdolecek 523: kfilter->filtops = NULL; /* mark as `not implemented' */
524: }
1.49 ad 525: rw_exit(&kqueue_filter_lock);
526:
1.1 lukem 527: return (0);
528: }
529:
1.3 jdolecek 530:
531: /*
532: * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
1.49 ad 533: * descriptors. Calls fileops kqfilter method for given file descriptor.
1.3 jdolecek 534: */
535: static int
536: filt_fileattach(struct knote *kn)
537: {
1.49 ad 538: file_t *fp;
539:
540: fp = kn->kn_obj;
1.3 jdolecek 541:
1.49 ad 542: return (*fp->f_ops->fo_kqfilter)(fp, kn);
1.3 jdolecek 543: }
544:
545: /*
546: * Filter detach method for EVFILT_READ on kqueue descriptor.
547: */
1.1 lukem 548: static void
549: filt_kqdetach(struct knote *kn)
550: {
1.3 jdolecek 551: struct kqueue *kq;
1.1 lukem 552:
1.82 matt 553: kq = ((file_t *)kn->kn_obj)->f_kqueue;
1.49 ad 554:
555: mutex_spin_enter(&kq->kq_lock);
1.109 thorpej 556: selremove_knote(&kq->kq_sel, kn);
1.49 ad 557: mutex_spin_exit(&kq->kq_lock);
1.1 lukem 558: }
559:
1.3 jdolecek 560: /*
561: * Filter event method for EVFILT_READ on kqueue descriptor.
562: */
1.1 lukem 563: /*ARGSUSED*/
564: static int
1.33 yamt 565: filt_kqueue(struct knote *kn, long hint)
1.1 lukem 566: {
1.3 jdolecek 567: struct kqueue *kq;
1.49 ad 568: int rv;
569:
1.82 matt 570: kq = ((file_t *)kn->kn_obj)->f_kqueue;
1.1 lukem 571:
1.49 ad 572: if (hint != NOTE_SUBMIT)
573: mutex_spin_enter(&kq->kq_lock);
1.118 jdolecek 574: kn->kn_data = KQ_COUNT(kq);
1.49 ad 575: rv = (kn->kn_data > 0);
576: if (hint != NOTE_SUBMIT)
577: mutex_spin_exit(&kq->kq_lock);
578:
579: return rv;
1.1 lukem 580: }
581:
1.3 jdolecek 582: /*
583: * Filter attach method for EVFILT_PROC.
584: */
1.1 lukem 585: static int
586: filt_procattach(struct knote *kn)
587: {
1.78 pooka 588: struct proc *p;
1.30 ad 589: struct lwp *curl;
590:
591: curl = curlwp;
1.1 lukem 592:
1.107 ad 593: mutex_enter(&proc_lock);
1.77 joerg 594: if (kn->kn_flags & EV_FLAG1) {
595: /*
596: * NOTE_TRACK attaches to the child process too early
597: * for proc_find, so do a raw look up and check the state
598: * explicitly.
599: */
600: p = proc_find_raw(kn->kn_id);
601: if (p != NULL && p->p_stat != SIDL)
602: p = NULL;
603: } else {
604: p = proc_find(kn->kn_id);
605: }
606:
1.49 ad 607: if (p == NULL) {
1.107 ad 608: mutex_exit(&proc_lock);
1.49 ad 609: return ESRCH;
610: }
1.3 jdolecek 611:
612: /*
613: * Fail if it's not owned by you, or the last exec gave us
614: * setuid/setgid privs (unless you're root).
615: */
1.57 ad 616: mutex_enter(p->p_lock);
1.107 ad 617: mutex_exit(&proc_lock);
1.120 christos 618: if (kauth_authorize_process(curl->l_cred,
1.119 christos 619: KAUTH_PROCESS_KEVENT_FILTER, p, NULL, NULL, NULL) != 0) {
1.57 ad 620: mutex_exit(p->p_lock);
1.49 ad 621: return EACCES;
622: }
1.1 lukem 623:
1.49 ad 624: kn->kn_obj = p;
1.3 jdolecek 625: kn->kn_flags |= EV_CLEAR; /* automatically set */
1.1 lukem 626:
627: /*
628: * internal flag indicating registration done by kernel
629: */
630: if (kn->kn_flags & EV_FLAG1) {
1.3 jdolecek 631: kn->kn_data = kn->kn_sdata; /* ppid */
1.1 lukem 632: kn->kn_fflags = NOTE_CHILD;
633: kn->kn_flags &= ~EV_FLAG1;
634: }
635: SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
1.57 ad 636: mutex_exit(p->p_lock);
1.1 lukem 637:
1.49 ad 638: return 0;
1.1 lukem 639: }
640:
641: /*
1.3 jdolecek 642: * Filter detach method for EVFILT_PROC.
643: *
1.1 lukem 644: * The knote may be attached to a different process, which may exit,
645: * leaving nothing for the knote to be attached to. So when the process
646: * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
647: * it will be deleted when read out. However, as part of the knote deletion,
648: * this routine is called, so a check is needed to avoid actually performing
1.3 jdolecek 649: * a detach, because the original process might not exist any more.
1.1 lukem 650: */
651: static void
652: filt_procdetach(struct knote *kn)
653: {
1.3 jdolecek 654: struct proc *p;
1.1 lukem 655:
656: if (kn->kn_status & KN_DETACHED)
657: return;
658:
1.49 ad 659: p = kn->kn_obj;
1.3 jdolecek 660:
1.57 ad 661: mutex_enter(p->p_lock);
1.1 lukem 662: SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
1.57 ad 663: mutex_exit(p->p_lock);
1.1 lukem 664: }
665:
1.3 jdolecek 666: /*
667: * Filter event method for EVFILT_PROC.
668: */
1.1 lukem 669: static int
670: filt_proc(struct knote *kn, long hint)
671: {
1.49 ad 672: u_int event, fflag;
673: struct kevent kev;
674: struct kqueue *kq;
675: int error;
1.1 lukem 676:
677: event = (u_int)hint & NOTE_PCTRLMASK;
1.49 ad 678: kq = kn->kn_kq;
679: fflag = 0;
1.1 lukem 680:
1.49 ad 681: /* If the user is interested in this event, record it. */
1.1 lukem 682: if (kn->kn_sfflags & event)
1.49 ad 683: fflag |= event;
1.1 lukem 684:
685: if (event == NOTE_EXIT) {
1.83 christos 686: struct proc *p = kn->kn_obj;
687:
688: if (p != NULL)
1.86 christos 689: kn->kn_data = P_WAITSTATUS(p);
1.3 jdolecek 690: /*
1.49 ad 691: * Process is gone, so flag the event as finished.
692: *
1.3 jdolecek 693: * Detach the knote from watched process and mark
694: * it as such. We can't leave this to kqueue_scan(),
695: * since the process might not exist by then. And we
696: * have to do this now, since psignal KNOTE() is called
697: * also for zombies and we might end up reading freed
698: * memory if the kevent would already be picked up
1.22 perry 699: * and knote g/c'ed.
1.3 jdolecek 700: */
1.49 ad 701: filt_procdetach(kn);
702:
703: mutex_spin_enter(&kq->kq_lock);
1.1 lukem 704: kn->kn_status |= KN_DETACHED;
1.3 jdolecek 705: /* Mark as ONESHOT, so that the knote it g/c'ed when read */
1.22 perry 706: kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1.49 ad 707: kn->kn_fflags |= fflag;
708: mutex_spin_exit(&kq->kq_lock);
709:
710: return 1;
1.1 lukem 711: }
712:
1.49 ad 713: mutex_spin_enter(&kq->kq_lock);
1.1 lukem 714: if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
715: /*
1.49 ad 716: * Process forked, and user wants to track the new process,
717: * so attach a new knote to it, and immediately report an
718: * event with the parent's pid. Register knote with new
719: * process.
1.1 lukem 720: */
1.104 maxv 721: memset(&kev, 0, sizeof(kev));
1.1 lukem 722: kev.ident = hint & NOTE_PDATAMASK; /* pid */
723: kev.filter = kn->kn_filter;
724: kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
725: kev.fflags = kn->kn_sfflags;
726: kev.data = kn->kn_id; /* parent */
727: kev.udata = kn->kn_kevent.udata; /* preserve udata */
1.49 ad 728: mutex_spin_exit(&kq->kq_lock);
729: error = kqueue_register(kq, &kev);
730: mutex_spin_enter(&kq->kq_lock);
731: if (error != 0)
1.1 lukem 732: kn->kn_fflags |= NOTE_TRACKERR;
733: }
1.49 ad 734: kn->kn_fflags |= fflag;
735: fflag = kn->kn_fflags;
736: mutex_spin_exit(&kq->kq_lock);
1.1 lukem 737:
1.49 ad 738: return fflag != 0;
1.8 jdolecek 739: }
740:
741: static void
742: filt_timerexpire(void *knx)
743: {
744: struct knote *kn = knx;
745: int tticks;
746:
1.49 ad 747: mutex_enter(&kqueue_misc_lock);
1.8 jdolecek 748: kn->kn_data++;
1.49 ad 749: knote_activate(kn);
1.8 jdolecek 750: if ((kn->kn_flags & EV_ONESHOT) == 0) {
751: tticks = mstohz(kn->kn_sdata);
1.73 christos 752: if (tticks <= 0)
753: tticks = 1;
1.39 ad 754: callout_schedule((callout_t *)kn->kn_hook, tticks);
1.8 jdolecek 755: }
1.49 ad 756: mutex_exit(&kqueue_misc_lock);
1.8 jdolecek 757: }
758:
759: /*
760: * data contains amount of time to sleep, in milliseconds
1.22 perry 761: */
1.8 jdolecek 762: static int
763: filt_timerattach(struct knote *kn)
764: {
1.39 ad 765: callout_t *calloutp;
1.49 ad 766: struct kqueue *kq;
1.8 jdolecek 767: int tticks;
768:
769: tticks = mstohz(kn->kn_sdata);
770:
771: /* if the supplied value is under our resolution, use 1 tick */
772: if (tticks == 0) {
773: if (kn->kn_sdata == 0)
1.49 ad 774: return EINVAL;
1.8 jdolecek 775: tticks = 1;
776: }
777:
1.49 ad 778: if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax ||
779: (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) {
780: atomic_dec_uint(&kq_ncallouts);
781: return ENOMEM;
782: }
1.54 ad 783: callout_init(calloutp, CALLOUT_MPSAFE);
1.49 ad 784:
785: kq = kn->kn_kq;
786: mutex_spin_enter(&kq->kq_lock);
1.8 jdolecek 787: kn->kn_flags |= EV_CLEAR; /* automatically set */
1.49 ad 788: kn->kn_hook = calloutp;
789: mutex_spin_exit(&kq->kq_lock);
790:
1.8 jdolecek 791: callout_reset(calloutp, tticks, filt_timerexpire, kn);
792:
793: return (0);
794: }
795:
796: static void
797: filt_timerdetach(struct knote *kn)
798: {
1.39 ad 799: callout_t *calloutp;
1.103 christos 800: struct kqueue *kq = kn->kn_kq;
801:
802: mutex_spin_enter(&kq->kq_lock);
803: /* prevent rescheduling when we expire */
804: kn->kn_flags |= EV_ONESHOT;
805: mutex_spin_exit(&kq->kq_lock);
1.8 jdolecek 806:
1.39 ad 807: calloutp = (callout_t *)kn->kn_hook;
1.55 ad 808: callout_halt(calloutp, NULL);
1.39 ad 809: callout_destroy(calloutp);
1.49 ad 810: kmem_free(calloutp, sizeof(*calloutp));
811: atomic_dec_uint(&kq_ncallouts);
1.8 jdolecek 812: }
813:
814: static int
1.33 yamt 815: filt_timer(struct knote *kn, long hint)
1.8 jdolecek 816: {
1.49 ad 817: int rv;
818:
819: mutex_enter(&kqueue_misc_lock);
820: rv = (kn->kn_data != 0);
821: mutex_exit(&kqueue_misc_lock);
822:
823: return rv;
1.1 lukem 824: }
825:
1.3 jdolecek 826: /*
1.102 christos 827: * Filter event method for EVFILT_FS.
828: */
829: struct klist fs_klist = SLIST_HEAD_INITIALIZER(&fs_klist);
830:
831: static int
832: filt_fsattach(struct knote *kn)
833: {
834:
835: mutex_enter(&kqueue_misc_lock);
836: kn->kn_flags |= EV_CLEAR;
837: SLIST_INSERT_HEAD(&fs_klist, kn, kn_selnext);
838: mutex_exit(&kqueue_misc_lock);
839:
840: return 0;
841: }
842:
843: static void
844: filt_fsdetach(struct knote *kn)
845: {
846:
847: mutex_enter(&kqueue_misc_lock);
848: SLIST_REMOVE(&fs_klist, kn, knote, kn_selnext);
849: mutex_exit(&kqueue_misc_lock);
850: }
851:
852: static int
853: filt_fs(struct knote *kn, long hint)
854: {
855: int rv;
856:
857: mutex_enter(&kqueue_misc_lock);
858: kn->kn_fflags |= hint;
859: rv = (kn->kn_fflags != 0);
860: mutex_exit(&kqueue_misc_lock);
861:
862: return rv;
863: }
864:
1.108 christos 865: static int
866: filt_userattach(struct knote *kn)
867: {
868: struct kqueue *kq = kn->kn_kq;
869:
870: /*
871: * EVFILT_USER knotes are not attached to anything in the kernel.
872: */
873: mutex_spin_enter(&kq->kq_lock);
874: kn->kn_hook = NULL;
875: if (kn->kn_fflags & NOTE_TRIGGER)
876: kn->kn_hookid = 1;
877: else
878: kn->kn_hookid = 0;
879: mutex_spin_exit(&kq->kq_lock);
880: return (0);
881: }
882:
883: static void
884: filt_userdetach(struct knote *kn)
885: {
886:
887: /*
888: * EVFILT_USER knotes are not attached to anything in the kernel.
889: */
890: }
891:
892: static int
893: filt_user(struct knote *kn, long hint)
894: {
895: struct kqueue *kq = kn->kn_kq;
896: int hookid;
897:
898: mutex_spin_enter(&kq->kq_lock);
899: hookid = kn->kn_hookid;
900: mutex_spin_exit(&kq->kq_lock);
901:
902: return hookid;
903: }
904:
905: static void
906: filt_usertouch(struct knote *kn, struct kevent *kev, long type)
907: {
908: int ffctrl;
909:
1.117 skrll 910: KASSERT(mutex_owned(&kn->kn_kq->kq_lock));
1.116 jdolecek 911:
1.108 christos 912: switch (type) {
913: case EVENT_REGISTER:
914: if (kev->fflags & NOTE_TRIGGER)
915: kn->kn_hookid = 1;
916:
917: ffctrl = kev->fflags & NOTE_FFCTRLMASK;
918: kev->fflags &= NOTE_FFLAGSMASK;
919: switch (ffctrl) {
920: case NOTE_FFNOP:
921: break;
922:
923: case NOTE_FFAND:
924: kn->kn_sfflags &= kev->fflags;
925: break;
926:
927: case NOTE_FFOR:
928: kn->kn_sfflags |= kev->fflags;
929: break;
930:
931: case NOTE_FFCOPY:
932: kn->kn_sfflags = kev->fflags;
933: break;
934:
935: default:
936: /* XXX Return error? */
937: break;
938: }
939: kn->kn_sdata = kev->data;
940: if (kev->flags & EV_CLEAR) {
941: kn->kn_hookid = 0;
942: kn->kn_data = 0;
943: kn->kn_fflags = 0;
944: }
945: break;
946:
947: case EVENT_PROCESS:
948: *kev = kn->kn_kevent;
949: kev->fflags = kn->kn_sfflags;
950: kev->data = kn->kn_sdata;
951: if (kn->kn_flags & EV_CLEAR) {
952: kn->kn_hookid = 0;
953: kn->kn_data = 0;
954: kn->kn_fflags = 0;
955: }
956: break;
957:
958: default:
959: panic("filt_usertouch() - invalid type (%ld)", type);
960: break;
961: }
962: }
963:
1.102 christos 964: /*
1.3 jdolecek 965: * filt_seltrue:
966: *
967: * This filter "event" routine simulates seltrue().
968: */
1.1 lukem 969: int
1.33 yamt 970: filt_seltrue(struct knote *kn, long hint)
1.1 lukem 971: {
972:
1.3 jdolecek 973: /*
974: * We don't know how much data can be read/written,
975: * but we know that it *can* be. This is about as
976: * good as select/poll does as well.
977: */
978: kn->kn_data = 0;
979: return (1);
980: }
981:
982: /*
983: * This provides full kqfilter entry for device switch tables, which
984: * has same effect as filter using filt_seltrue() as filter method.
985: */
986: static void
1.33 yamt 987: filt_seltruedetach(struct knote *kn)
1.3 jdolecek 988: {
989: /* Nothing to do */
990: }
991:
1.96 maya 992: const struct filterops seltrue_filtops = {
1.123 ! thorpej 993: .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
1.96 maya 994: .f_attach = NULL,
995: .f_detach = filt_seltruedetach,
996: .f_event = filt_seltrue,
997: };
1.3 jdolecek 998:
999: int
1.33 yamt 1000: seltrue_kqfilter(dev_t dev, struct knote *kn)
1.3 jdolecek 1001: {
1002: switch (kn->kn_filter) {
1003: case EVFILT_READ:
1004: case EVFILT_WRITE:
1005: kn->kn_fop = &seltrue_filtops;
1006: break;
1007: default:
1.43 pooka 1008: return (EINVAL);
1.3 jdolecek 1009: }
1010:
1011: /* Nothing more to do */
1012: return (0);
1013: }
1014:
1015: /*
1016: * kqueue(2) system call.
1017: */
1.72 christos 1018: static int
1019: kqueue1(struct lwp *l, int flags, register_t *retval)
1.3 jdolecek 1020: {
1.49 ad 1021: struct kqueue *kq;
1022: file_t *fp;
1023: int fd, error;
1.3 jdolecek 1024:
1.49 ad 1025: if ((error = fd_allocfile(&fp, &fd)) != 0)
1026: return error;
1.75 christos 1027: fp->f_flag = FREAD | FWRITE | (flags & (FNONBLOCK|FNOSIGPIPE));
1.1 lukem 1028: fp->f_type = DTYPE_KQUEUE;
1029: fp->f_ops = &kqueueops;
1.49 ad 1030: kq = kmem_zalloc(sizeof(*kq), KM_SLEEP);
1031: mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED);
1032: cv_init(&kq->kq_cv, "kqueue");
1033: selinit(&kq->kq_sel);
1.1 lukem 1034: TAILQ_INIT(&kq->kq_head);
1.82 matt 1035: fp->f_kqueue = kq;
1.3 jdolecek 1036: *retval = fd;
1.49 ad 1037: kq->kq_fdp = curlwp->l_fd;
1.72 christos 1038: fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
1.49 ad 1039: fd_affix(curproc, fp, fd);
1040: return error;
1.1 lukem 1041: }
1042:
1.3 jdolecek 1043: /*
1.72 christos 1044: * kqueue(2) system call.
1045: */
1046: int
1047: sys_kqueue(struct lwp *l, const void *v, register_t *retval)
1048: {
1049: return kqueue1(l, 0, retval);
1050: }
1051:
1052: int
1053: sys_kqueue1(struct lwp *l, const struct sys_kqueue1_args *uap,
1054: register_t *retval)
1055: {
1056: /* {
1057: syscallarg(int) flags;
1058: } */
1059: return kqueue1(l, SCARG(uap, flags), retval);
1060: }
1061:
1062: /*
1.3 jdolecek 1063: * kevent(2) system call.
1064: */
1.61 christos 1065: int
1.81 matt 1066: kevent_fetch_changes(void *ctx, const struct kevent *changelist,
1.61 christos 1067: struct kevent *changes, size_t index, int n)
1.24 cube 1068: {
1.49 ad 1069:
1.24 cube 1070: return copyin(changelist + index, changes, n * sizeof(*changes));
1071: }
1072:
1.61 christos 1073: int
1.81 matt 1074: kevent_put_events(void *ctx, struct kevent *events,
1.61 christos 1075: struct kevent *eventlist, size_t index, int n)
1.24 cube 1076: {
1.49 ad 1077:
1.24 cube 1078: return copyout(events, eventlist + index, n * sizeof(*events));
1079: }
1080:
1081: static const struct kevent_ops kevent_native_ops = {
1.60 gmcgarry 1082: .keo_private = NULL,
1083: .keo_fetch_timeout = copyin,
1084: .keo_fetch_changes = kevent_fetch_changes,
1085: .keo_put_events = kevent_put_events,
1.24 cube 1086: };
1087:
1.1 lukem 1088: int
1.61 christos 1089: sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap,
1090: register_t *retval)
1.1 lukem 1091: {
1.44 dsl 1092: /* {
1.3 jdolecek 1093: syscallarg(int) fd;
1094: syscallarg(const struct kevent *) changelist;
1095: syscallarg(size_t) nchanges;
1096: syscallarg(struct kevent *) eventlist;
1097: syscallarg(size_t) nevents;
1098: syscallarg(const struct timespec *) timeout;
1.44 dsl 1099: } */
1.24 cube 1100:
1.49 ad 1101: return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist),
1.24 cube 1102: SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
1103: SCARG(uap, timeout), &kevent_native_ops);
1104: }
1105:
1106: int
1.49 ad 1107: kevent1(register_t *retval, int fd,
1108: const struct kevent *changelist, size_t nchanges,
1109: struct kevent *eventlist, size_t nevents,
1110: const struct timespec *timeout,
1111: const struct kevent_ops *keops)
1.24 cube 1112: {
1.49 ad 1113: struct kevent *kevp;
1114: struct kqueue *kq;
1.3 jdolecek 1115: struct timespec ts;
1.49 ad 1116: size_t i, n, ichange;
1117: int nerrors, error;
1.80 maxv 1118: struct kevent kevbuf[KQ_NEVENTS]; /* approx 300 bytes on 64-bit */
1.49 ad 1119: file_t *fp;
1.3 jdolecek 1120:
1121: /* check that we're dealing with a kq */
1.49 ad 1122: fp = fd_getfile(fd);
1.10 pk 1123: if (fp == NULL)
1.1 lukem 1124: return (EBADF);
1.10 pk 1125:
1126: if (fp->f_type != DTYPE_KQUEUE) {
1.49 ad 1127: fd_putfile(fd);
1.10 pk 1128: return (EBADF);
1129: }
1.1 lukem 1130:
1.24 cube 1131: if (timeout != NULL) {
1132: error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts));
1.1 lukem 1133: if (error)
1134: goto done;
1.24 cube 1135: timeout = &ts;
1.1 lukem 1136: }
1137:
1.82 matt 1138: kq = fp->f_kqueue;
1.1 lukem 1139: nerrors = 0;
1.24 cube 1140: ichange = 0;
1.1 lukem 1141:
1.3 jdolecek 1142: /* traverse list of events to register */
1.24 cube 1143: while (nchanges > 0) {
1.49 ad 1144: n = MIN(nchanges, __arraycount(kevbuf));
1.24 cube 1145: error = (*keops->keo_fetch_changes)(keops->keo_private,
1.49 ad 1146: changelist, kevbuf, ichange, n);
1.1 lukem 1147: if (error)
1148: goto done;
1149: for (i = 0; i < n; i++) {
1.49 ad 1150: kevp = &kevbuf[i];
1.1 lukem 1151: kevp->flags &= ~EV_SYSFLAGS;
1.3 jdolecek 1152: /* register each knote */
1.49 ad 1153: error = kqueue_register(kq, kevp);
1.89 abhinav 1154: if (!error && !(kevp->flags & EV_RECEIPT))
1155: continue;
1156: if (nevents == 0)
1157: goto done;
1158: kevp->flags = EV_ERROR;
1159: kevp->data = error;
1160: error = (*keops->keo_put_events)
1161: (keops->keo_private, kevp,
1162: eventlist, nerrors, 1);
1163: if (error)
1164: goto done;
1165: nevents--;
1166: nerrors++;
1.1 lukem 1167: }
1.24 cube 1168: nchanges -= n; /* update the results */
1169: ichange += n;
1.1 lukem 1170: }
1171: if (nerrors) {
1.3 jdolecek 1172: *retval = nerrors;
1.1 lukem 1173: error = 0;
1174: goto done;
1175: }
1176:
1.3 jdolecek 1177: /* actually scan through the events */
1.49 ad 1178: error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops,
1179: kevbuf, __arraycount(kevbuf));
1.3 jdolecek 1180: done:
1.49 ad 1181: fd_putfile(fd);
1.1 lukem 1182: return (error);
1183: }
1184:
1.3 jdolecek 1185: /*
1186: * Register a given kevent kev onto the kqueue
1187: */
1.49 ad 1188: static int
1189: kqueue_register(struct kqueue *kq, struct kevent *kev)
1.1 lukem 1190: {
1.49 ad 1191: struct kfilter *kfilter;
1192: filedesc_t *fdp;
1193: file_t *fp;
1194: fdfile_t *ff;
1195: struct knote *kn, *newkn;
1196: struct klist *list;
1197: int error, fd, rv;
1.3 jdolecek 1198:
1199: fdp = kq->kq_fdp;
1200: fp = NULL;
1201: kn = NULL;
1202: error = 0;
1.49 ad 1203: fd = 0;
1204:
1205: newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP);
1206:
1207: rw_enter(&kqueue_filter_lock, RW_READER);
1.3 jdolecek 1208: kfilter = kfilter_byfilter(kev->filter);
1209: if (kfilter == NULL || kfilter->filtops == NULL) {
1210: /* filter not found nor implemented */
1.49 ad 1211: rw_exit(&kqueue_filter_lock);
1212: kmem_free(newkn, sizeof(*newkn));
1.1 lukem 1213: return (EINVAL);
1214: }
1215:
1.3 jdolecek 1216: /* search if knote already exists */
1.121 thorpej 1217: if (kfilter->filtops->f_flags & FILTEROP_ISFD) {
1.3 jdolecek 1218: /* monitoring a file descriptor */
1.87 christos 1219: /* validate descriptor */
1.88 christos 1220: if (kev->ident > INT_MAX
1221: || (fp = fd_getfile(fd = kev->ident)) == NULL) {
1.49 ad 1222: rw_exit(&kqueue_filter_lock);
1223: kmem_free(newkn, sizeof(*newkn));
1224: return EBADF;
1225: }
1.74 rmind 1226: mutex_enter(&fdp->fd_lock);
1.65 ad 1227: ff = fdp->fd_dt->dt_ff[fd];
1.98 christos 1228: if (ff->ff_refcnt & FR_CLOSING) {
1229: error = EBADF;
1230: goto doneunlock;
1231: }
1.49 ad 1232: if (fd <= fdp->fd_lastkqfile) {
1233: SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) {
1.1 lukem 1234: if (kq == kn->kn_kq &&
1235: kev->filter == kn->kn_filter)
1236: break;
1.49 ad 1237: }
1.1 lukem 1238: }
1239: } else {
1.3 jdolecek 1240: /*
1241: * not monitoring a file descriptor, so
1242: * lookup knotes in internal hash table
1243: */
1.74 rmind 1244: mutex_enter(&fdp->fd_lock);
1.1 lukem 1245: if (fdp->fd_knhashmask != 0) {
1246: list = &fdp->fd_knhash[
1247: KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1.49 ad 1248: SLIST_FOREACH(kn, list, kn_link) {
1.1 lukem 1249: if (kev->ident == kn->kn_id &&
1250: kq == kn->kn_kq &&
1251: kev->filter == kn->kn_filter)
1252: break;
1.49 ad 1253: }
1.1 lukem 1254: }
1255: }
1256:
1257: /*
1258: * kn now contains the matching knote, or NULL if no match
1259: */
1.108 christos 1260: if (kn == NULL) {
1261: if (kev->flags & EV_ADD) {
1.3 jdolecek 1262: /* create new knote */
1.49 ad 1263: kn = newkn;
1264: newkn = NULL;
1265: kn->kn_obj = fp;
1.79 christos 1266: kn->kn_id = kev->ident;
1.1 lukem 1267: kn->kn_kq = kq;
1.3 jdolecek 1268: kn->kn_fop = kfilter->filtops;
1.49 ad 1269: kn->kn_kfilter = kfilter;
1270: kn->kn_sfflags = kev->fflags;
1271: kn->kn_sdata = kev->data;
1272: kev->fflags = 0;
1273: kev->data = 0;
1274: kn->kn_kevent = *kev;
1.1 lukem 1275:
1.85 christos 1276: KASSERT(kn->kn_fop != NULL);
1.1 lukem 1277: /*
1278: * apply reference count to knote structure, and
1279: * do not release it at the end of this routine.
1280: */
1281: fp = NULL;
1282:
1.121 thorpej 1283: if (!(kn->kn_fop->f_flags & FILTEROP_ISFD)) {
1.49 ad 1284: /*
1285: * If knote is not on an fd, store on
1286: * internal hash table.
1287: */
1288: if (fdp->fd_knhashmask == 0) {
1289: /* XXXAD can block with fd_lock held */
1290: fdp->fd_knhash = hashinit(KN_HASHSIZE,
1.59 ad 1291: HASH_LIST, true,
1.49 ad 1292: &fdp->fd_knhashmask);
1293: }
1294: list = &fdp->fd_knhash[KN_HASH(kn->kn_id,
1295: fdp->fd_knhashmask)];
1296: } else {
1297: /* Otherwise, knote is on an fd. */
1298: list = (struct klist *)
1.65 ad 1299: &fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist;
1.49 ad 1300: if ((int)kn->kn_id > fdp->fd_lastkqfile)
1301: fdp->fd_lastkqfile = kn->kn_id;
1302: }
1303: SLIST_INSERT_HEAD(list, kn, kn_link);
1.1 lukem 1304:
1.122 thorpej 1305: /*
1306: * N.B. kn->kn_fop may change as the result
1307: * of filter_attach()!
1308: */
1309: error = filter_attach(kn);
1.49 ad 1310: if (error != 0) {
1.100 christos 1311: #ifdef DEBUG
1.105 christos 1312: struct proc *p = curlwp->l_proc;
1.101 christos 1313: const file_t *ft = kn->kn_obj;
1.105 christos 1314: printf("%s: %s[%d]: event type %d not "
1315: "supported for file type %d/%s "
1316: "(error %d)\n", __func__,
1317: p->p_comm, p->p_pid,
1.101 christos 1318: kn->kn_filter, ft ? ft->f_type : -1,
1319: ft ? ft->f_ops->fo_name : "?", error);
1.100 christos 1320: #endif
1321:
1.49 ad 1322: /* knote_detach() drops fdp->fd_lock */
1323: knote_detach(kn, fdp, false);
1.1 lukem 1324: goto done;
1325: }
1.49 ad 1326: atomic_inc_uint(&kfilter->refcnt);
1.108 christos 1327: goto done_ev_add;
1.1 lukem 1328: } else {
1.108 christos 1329: /* No matching knote and the EV_ADD flag is not set. */
1330: error = ENOENT;
1331: goto doneunlock;
1.1 lukem 1332: }
1.108 christos 1333: }
1334:
1335: if (kev->flags & EV_DELETE) {
1336: /* knote_detach() drops fdp->fd_lock */
1337: knote_detach(kn, fdp, true);
1338: goto done;
1339: }
1340:
1341: /*
1342: * The user may change some filter values after the
1343: * initial EV_ADD, but doing so will not reset any
1344: * filter which have already been triggered.
1345: */
1346: kn->kn_kevent.udata = kev->udata;
1347: KASSERT(kn->kn_fop != NULL);
1.121 thorpej 1348: if (!(kn->kn_fop->f_flags & FILTEROP_ISFD) &&
1349: kn->kn_fop->f_touch != NULL) {
1.116 jdolecek 1350: mutex_spin_enter(&kq->kq_lock);
1.122 thorpej 1351: filter_touch(kn, kev, EVENT_REGISTER);
1.116 jdolecek 1352: mutex_spin_exit(&kq->kq_lock);
1.49 ad 1353: } else {
1.108 christos 1354: kn->kn_sfflags = kev->fflags;
1355: kn->kn_sdata = kev->data;
1.1 lukem 1356: }
1357:
1.108 christos 1358: /*
1359: * We can get here if we are trying to attach
1360: * an event to a file descriptor that does not
1361: * support events, and the attach routine is
1362: * broken and does not return an error.
1363: */
1364: done_ev_add:
1.122 thorpej 1365: rv = filter_event(kn, 0);
1.108 christos 1366: if (rv)
1367: knote_activate(kn);
1368:
1.3 jdolecek 1369: /* disable knote */
1.49 ad 1370: if ((kev->flags & EV_DISABLE)) {
1371: mutex_spin_enter(&kq->kq_lock);
1372: if ((kn->kn_status & KN_DISABLED) == 0)
1373: kn->kn_status |= KN_DISABLED;
1374: mutex_spin_exit(&kq->kq_lock);
1.1 lukem 1375: }
1376:
1.3 jdolecek 1377: /* enable knote */
1.49 ad 1378: if ((kev->flags & EV_ENABLE)) {
1379: knote_enqueue(kn);
1.1 lukem 1380: }
1.98 christos 1381: doneunlock:
1.49 ad 1382: mutex_exit(&fdp->fd_lock);
1.3 jdolecek 1383: done:
1.49 ad 1384: rw_exit(&kqueue_filter_lock);
1385: if (newkn != NULL)
1386: kmem_free(newkn, sizeof(*newkn));
1.1 lukem 1387: if (fp != NULL)
1.49 ad 1388: fd_putfile(fd);
1.1 lukem 1389: return (error);
1390: }
1391:
1.52 yamt 1392: #if defined(DEBUG)
1.94 christos 1393: #define KN_FMT(buf, kn) \
1394: (snprintb((buf), sizeof(buf), __KN_FLAG_BITS, (kn)->kn_status), buf)
1395:
1.52 yamt 1396: static void
1.94 christos 1397: kqueue_check(const char *func, size_t line, const struct kqueue *kq)
1.52 yamt 1398: {
1399: const struct knote *kn;
1.118 jdolecek 1400: u_int count;
1.52 yamt 1401: int nmarker;
1.94 christos 1402: char buf[128];
1.52 yamt 1403:
1404: KASSERT(mutex_owned(&kq->kq_lock));
1.118 jdolecek 1405: KASSERT(KQ_COUNT(kq) < UINT_MAX / 2);
1.52 yamt 1406:
1407: count = 0;
1408: nmarker = 0;
1409: TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
1410: if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) {
1.94 christos 1411: panic("%s,%zu: kq=%p kn=%p !(MARKER|QUEUED) %s",
1412: func, line, kq, kn, KN_FMT(buf, kn));
1.52 yamt 1413: }
1414: if ((kn->kn_status & KN_MARKER) == 0) {
1415: if (kn->kn_kq != kq) {
1.94 christos 1416: panic("%s,%zu: kq=%p kn(%p) != kn->kq(%p): %s",
1417: func, line, kq, kn, kn->kn_kq,
1418: KN_FMT(buf, kn));
1.52 yamt 1419: }
1420: if ((kn->kn_status & KN_ACTIVE) == 0) {
1.94 christos 1421: panic("%s,%zu: kq=%p kn=%p: !ACTIVE %s",
1422: func, line, kq, kn, KN_FMT(buf, kn));
1.52 yamt 1423: }
1424: count++;
1.118 jdolecek 1425: if (count > KQ_COUNT(kq)) {
1.112 jdolecek 1426: panic("%s,%zu: kq=%p kq->kq_count(%d) != "
1427: "count(%d), nmarker=%d",
1.118 jdolecek 1428: func, line, kq, KQ_COUNT(kq), count,
1.112 jdolecek 1429: nmarker);
1.52 yamt 1430: }
1431: } else {
1432: nmarker++;
1433: }
1434: }
1435: }
1.94 christos 1436: #define kq_check(a) kqueue_check(__func__, __LINE__, (a))
1.52 yamt 1437: #else /* defined(DEBUG) */
1438: #define kq_check(a) /* nothing */
1439: #endif /* defined(DEBUG) */
1440:
1.118 jdolecek 1441: static void
1442: kqueue_restart(file_t *fp)
1443: {
1444: struct kqueue *kq = fp->f_kqueue;
1445: KASSERT(kq != NULL);
1446:
1447: mutex_spin_enter(&kq->kq_lock);
1448: kq->kq_count |= KQ_RESTART;
1449: cv_broadcast(&kq->kq_cv);
1450: mutex_spin_exit(&kq->kq_lock);
1451: }
1452:
1.3 jdolecek 1453: /*
1454: * Scan through the list of events on fp (for a maximum of maxevents),
1455: * returning the results in to ulistp. Timeout is determined by tsp; if
1456: * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
1457: * as appropriate.
1458: */
1.1 lukem 1459: static int
1.49 ad 1460: kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp,
1461: const struct timespec *tsp, register_t *retval,
1462: const struct kevent_ops *keops, struct kevent *kevbuf,
1463: size_t kevcnt)
1.1 lukem 1464: {
1.3 jdolecek 1465: struct kqueue *kq;
1466: struct kevent *kevp;
1.62 christos 1467: struct timespec ats, sleepts;
1.85 christos 1468: struct knote *kn, *marker, morker;
1.24 cube 1469: size_t count, nkev, nevents;
1.111 jdolecek 1470: int timeout, error, touch, rv, influx;
1.49 ad 1471: filedesc_t *fdp;
1.1 lukem 1472:
1.49 ad 1473: fdp = curlwp->l_fd;
1.82 matt 1474: kq = fp->f_kqueue;
1.1 lukem 1475: count = maxevents;
1.24 cube 1476: nkev = nevents = error = 0;
1.49 ad 1477: if (count == 0) {
1478: *retval = 0;
1479: return 0;
1480: }
1.1 lukem 1481:
1.9 jdolecek 1482: if (tsp) { /* timeout supplied */
1.63 christos 1483: ats = *tsp;
1.62 christos 1484: if (inittimeleft(&ats, &sleepts) == -1) {
1.49 ad 1485: *retval = maxevents;
1486: return EINVAL;
1.1 lukem 1487: }
1.62 christos 1488: timeout = tstohz(&ats);
1.9 jdolecek 1489: if (timeout <= 0)
1.29 kardel 1490: timeout = -1; /* do poll */
1.1 lukem 1491: } else {
1.9 jdolecek 1492: /* no timeout, wait forever */
1.1 lukem 1493: timeout = 0;
1.93 riastrad 1494: }
1.1 lukem 1495:
1.85 christos 1496: memset(&morker, 0, sizeof(morker));
1497: marker = &morker;
1.49 ad 1498: marker->kn_status = KN_MARKER;
1499: mutex_spin_enter(&kq->kq_lock);
1.3 jdolecek 1500: retry:
1.49 ad 1501: kevp = kevbuf;
1.118 jdolecek 1502: if (KQ_COUNT(kq) == 0) {
1.49 ad 1503: if (timeout >= 0) {
1504: error = cv_timedwait_sig(&kq->kq_cv,
1505: &kq->kq_lock, timeout);
1506: if (error == 0) {
1.118 jdolecek 1507: if (KQ_COUNT(kq) == 0 &&
1508: (kq->kq_count & KQ_RESTART)) {
1509: /* return to clear file reference */
1510: error = ERESTART;
1511: } else if (tsp == NULL || (timeout =
1512: gettimeleft(&ats, &sleepts)) > 0) {
1.49 ad 1513: goto retry;
1.118 jdolecek 1514: }
1.49 ad 1515: } else {
1516: /* don't restart after signals... */
1517: if (error == ERESTART)
1518: error = EINTR;
1519: if (error == EWOULDBLOCK)
1520: error = 0;
1521: }
1.1 lukem 1522: }
1.92 christos 1523: mutex_spin_exit(&kq->kq_lock);
1.110 jdolecek 1524: goto done;
1525: }
1526:
1527: /* mark end of knote list */
1528: TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1.111 jdolecek 1529: influx = 0;
1.1 lukem 1530:
1.110 jdolecek 1531: /*
1532: * Acquire the fdp->fd_lock interlock to avoid races with
1533: * file creation/destruction from other threads.
1534: */
1.111 jdolecek 1535: relock:
1.110 jdolecek 1536: mutex_spin_exit(&kq->kq_lock);
1537: mutex_enter(&fdp->fd_lock);
1538: mutex_spin_enter(&kq->kq_lock);
1.92 christos 1539:
1.110 jdolecek 1540: while (count != 0) {
1541: kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
1.111 jdolecek 1542:
1543: if ((kn->kn_status & KN_MARKER) != 0 && kn != marker) {
1544: if (influx) {
1545: influx = 0;
1546: KQ_FLUX_WAKEUP(kq);
1547: }
1548: mutex_exit(&fdp->fd_lock);
1.114 jdolecek 1549: (void)cv_wait(&kq->kq_cv, &kq->kq_lock);
1.111 jdolecek 1550: goto relock;
1551: }
1552:
1553: TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1554: if (kn == marker) {
1555: /* it's our marker, stop */
1556: KQ_FLUX_WAKEUP(kq);
1557: if (count == maxevents) {
1.110 jdolecek 1558: mutex_exit(&fdp->fd_lock);
1559: goto retry;
1.49 ad 1560: }
1.111 jdolecek 1561: break;
1.110 jdolecek 1562: }
1.111 jdolecek 1563: KASSERT((kn->kn_status & KN_BUSY) == 0);
1564:
1.110 jdolecek 1565: kq_check(kq);
1.115 jdolecek 1566: kn->kn_status &= ~KN_QUEUED;
1.110 jdolecek 1567: kn->kn_status |= KN_BUSY;
1568: kq_check(kq);
1569: if (kn->kn_status & KN_DISABLED) {
1.115 jdolecek 1570: kn->kn_status &= ~KN_BUSY;
1.111 jdolecek 1571: kq->kq_count--;
1.110 jdolecek 1572: /* don't want disabled events */
1573: continue;
1574: }
1575: if ((kn->kn_flags & EV_ONESHOT) == 0) {
1576: mutex_spin_exit(&kq->kq_lock);
1577: KASSERT(mutex_owned(&fdp->fd_lock));
1.122 thorpej 1578: rv = filter_event(kn, 0);
1.110 jdolecek 1579: mutex_spin_enter(&kq->kq_lock);
1.115 jdolecek 1580: /* Re-poll if note was re-enqueued. */
1581: if ((kn->kn_status & KN_QUEUED) != 0) {
1582: kn->kn_status &= ~KN_BUSY;
1583: /* Re-enqueue raised kq_count, lower it again */
1584: kq->kq_count--;
1585: influx = 1;
1586: continue;
1587: }
1.110 jdolecek 1588: if (rv == 0) {
1589: /*
1590: * non-ONESHOT event that hasn't
1591: * triggered again, so de-queue.
1592: */
1.115 jdolecek 1593: kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
1.111 jdolecek 1594: kq->kq_count--;
1595: influx = 1;
1.110 jdolecek 1596: continue;
1.49 ad 1597: }
1.110 jdolecek 1598: }
1599: KASSERT(kn->kn_fop != NULL);
1.121 thorpej 1600: touch = (!(kn->kn_fop->f_flags & FILTEROP_ISFD) &&
1.110 jdolecek 1601: kn->kn_fop->f_touch != NULL);
1602: /* XXXAD should be got from f_event if !oneshot. */
1603: if (touch) {
1.122 thorpej 1604: filter_touch(kn, kevp, EVENT_PROCESS);
1.110 jdolecek 1605: } else {
1606: *kevp = kn->kn_kevent;
1607: }
1608: kevp++;
1609: nkev++;
1.111 jdolecek 1610: influx = 1;
1.110 jdolecek 1611: if (kn->kn_flags & EV_ONESHOT) {
1612: /* delete ONESHOT events after retrieval */
1.115 jdolecek 1613: kn->kn_status &= ~KN_BUSY;
1.111 jdolecek 1614: kq->kq_count--;
1.110 jdolecek 1615: mutex_spin_exit(&kq->kq_lock);
1616: knote_detach(kn, fdp, true);
1617: mutex_enter(&fdp->fd_lock);
1618: mutex_spin_enter(&kq->kq_lock);
1619: } else if (kn->kn_flags & EV_CLEAR) {
1620: /* clear state after retrieval */
1621: kn->kn_data = 0;
1622: kn->kn_fflags = 0;
1623: /*
1624: * Manually clear knotes who weren't
1625: * 'touch'ed.
1626: */
1627: if (touch == 0) {
1.49 ad 1628: kn->kn_data = 0;
1629: kn->kn_fflags = 0;
1630: }
1.115 jdolecek 1631: kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
1.111 jdolecek 1632: kq->kq_count--;
1.110 jdolecek 1633: } else if (kn->kn_flags & EV_DISPATCH) {
1634: kn->kn_status |= KN_DISABLED;
1.115 jdolecek 1635: kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
1.111 jdolecek 1636: kq->kq_count--;
1.110 jdolecek 1637: } else {
1638: /* add event back on list */
1639: kq_check(kq);
1.115 jdolecek 1640: kn->kn_status |= KN_QUEUED;
1.110 jdolecek 1641: kn->kn_status &= ~KN_BUSY;
1642: TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1643: kq_check(kq);
1644: }
1.111 jdolecek 1645:
1.110 jdolecek 1646: if (nkev == kevcnt) {
1647: /* do copyouts in kevcnt chunks */
1.111 jdolecek 1648: influx = 0;
1649: KQ_FLUX_WAKEUP(kq);
1.110 jdolecek 1650: mutex_spin_exit(&kq->kq_lock);
1651: mutex_exit(&fdp->fd_lock);
1652: error = (*keops->keo_put_events)
1653: (keops->keo_private,
1654: kevbuf, ulistp, nevents, nkev);
1655: mutex_enter(&fdp->fd_lock);
1656: mutex_spin_enter(&kq->kq_lock);
1657: nevents += nkev;
1658: nkev = 0;
1659: kevp = kevbuf;
1660: }
1661: count--;
1662: if (error != 0 || count == 0) {
1663: /* remove marker */
1664: TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
1665: break;
1.1 lukem 1666: }
1667: }
1.111 jdolecek 1668: KQ_FLUX_WAKEUP(kq);
1.110 jdolecek 1669: mutex_spin_exit(&kq->kq_lock);
1670: mutex_exit(&fdp->fd_lock);
1671:
1672: done:
1.49 ad 1673: if (nkev != 0) {
1.3 jdolecek 1674: /* copyout remaining events */
1.24 cube 1675: error = (*keops->keo_put_events)(keops->keo_private,
1.49 ad 1676: kevbuf, ulistp, nevents, nkev);
1677: }
1.3 jdolecek 1678: *retval = maxevents - count;
1679:
1.49 ad 1680: return error;
1.1 lukem 1681: }
1682:
1683: /*
1.49 ad 1684: * fileops ioctl method for a kqueue descriptor.
1.3 jdolecek 1685: *
1686: * Two ioctls are currently supported. They both use struct kfilter_mapping:
1687: * KFILTER_BYNAME find name for filter, and return result in
1688: * name, which is of size len.
1689: * KFILTER_BYFILTER find filter for name. len is ignored.
1690: */
1.1 lukem 1691: /*ARGSUSED*/
1692: static int
1.49 ad 1693: kqueue_ioctl(file_t *fp, u_long com, void *data)
1.1 lukem 1694: {
1.3 jdolecek 1695: struct kfilter_mapping *km;
1696: const struct kfilter *kfilter;
1697: char *name;
1698: int error;
1699:
1.49 ad 1700: km = data;
1.3 jdolecek 1701: error = 0;
1.49 ad 1702: name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP);
1.3 jdolecek 1703:
1704: switch (com) {
1705: case KFILTER_BYFILTER: /* convert filter -> name */
1.49 ad 1706: rw_enter(&kqueue_filter_lock, RW_READER);
1.3 jdolecek 1707: kfilter = kfilter_byfilter(km->filter);
1.49 ad 1708: if (kfilter != NULL) {
1709: strlcpy(name, kfilter->name, KFILTER_MAXNAME);
1710: rw_exit(&kqueue_filter_lock);
1711: error = copyoutstr(name, km->name, km->len, NULL);
1712: } else {
1713: rw_exit(&kqueue_filter_lock);
1.3 jdolecek 1714: error = ENOENT;
1.49 ad 1715: }
1.3 jdolecek 1716: break;
1717:
1718: case KFILTER_BYNAME: /* convert name -> filter */
1719: error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
1720: if (error) {
1721: break;
1722: }
1.49 ad 1723: rw_enter(&kqueue_filter_lock, RW_READER);
1.3 jdolecek 1724: kfilter = kfilter_byname(name);
1725: if (kfilter != NULL)
1726: km->filter = kfilter->filter;
1727: else
1728: error = ENOENT;
1.49 ad 1729: rw_exit(&kqueue_filter_lock);
1.3 jdolecek 1730: break;
1731:
1732: default:
1733: error = ENOTTY;
1.49 ad 1734: break;
1.3 jdolecek 1735:
1736: }
1.49 ad 1737: kmem_free(name, KFILTER_MAXNAME);
1.3 jdolecek 1738: return (error);
1739: }
1740:
1741: /*
1.49 ad 1742: * fileops fcntl method for a kqueue descriptor.
1.3 jdolecek 1743: */
1744: static int
1.49 ad 1745: kqueue_fcntl(file_t *fp, u_int com, void *data)
1.3 jdolecek 1746: {
1747:
1.1 lukem 1748: return (ENOTTY);
1749: }
1750:
1.3 jdolecek 1751: /*
1.49 ad 1752: * fileops poll method for a kqueue descriptor.
1.3 jdolecek 1753: * Determine if kqueue has events pending.
1754: */
1.1 lukem 1755: static int
1.49 ad 1756: kqueue_poll(file_t *fp, int events)
1.1 lukem 1757: {
1.3 jdolecek 1758: struct kqueue *kq;
1759: int revents;
1760:
1.82 matt 1761: kq = fp->f_kqueue;
1.49 ad 1762:
1.3 jdolecek 1763: revents = 0;
1764: if (events & (POLLIN | POLLRDNORM)) {
1.49 ad 1765: mutex_spin_enter(&kq->kq_lock);
1.118 jdolecek 1766: if (KQ_COUNT(kq) != 0) {
1.3 jdolecek 1767: revents |= events & (POLLIN | POLLRDNORM);
1.1 lukem 1768: } else {
1.49 ad 1769: selrecord(curlwp, &kq->kq_sel);
1.1 lukem 1770: }
1.52 yamt 1771: kq_check(kq);
1.49 ad 1772: mutex_spin_exit(&kq->kq_lock);
1.1 lukem 1773: }
1.49 ad 1774:
1775: return revents;
1.1 lukem 1776: }
1777:
1.3 jdolecek 1778: /*
1.49 ad 1779: * fileops stat method for a kqueue descriptor.
1.3 jdolecek 1780: * Returns dummy info, with st_size being number of events pending.
1781: */
1.1 lukem 1782: static int
1.49 ad 1783: kqueue_stat(file_t *fp, struct stat *st)
1.1 lukem 1784: {
1.49 ad 1785: struct kqueue *kq;
1786:
1.82 matt 1787: kq = fp->f_kqueue;
1.1 lukem 1788:
1.49 ad 1789: memset(st, 0, sizeof(*st));
1.118 jdolecek 1790: st->st_size = KQ_COUNT(kq);
1.1 lukem 1791: st->st_blksize = sizeof(struct kevent);
1792: st->st_mode = S_IFIFO;
1.49 ad 1793:
1794: return 0;
1795: }
1796:
1797: static void
1798: kqueue_doclose(struct kqueue *kq, struct klist *list, int fd)
1799: {
1800: struct knote *kn;
1801: filedesc_t *fdp;
1802:
1803: fdp = kq->kq_fdp;
1804:
1805: KASSERT(mutex_owned(&fdp->fd_lock));
1806:
1807: for (kn = SLIST_FIRST(list); kn != NULL;) {
1808: if (kq != kn->kn_kq) {
1809: kn = SLIST_NEXT(kn, kn_link);
1810: continue;
1811: }
1812: knote_detach(kn, fdp, true);
1813: mutex_enter(&fdp->fd_lock);
1814: kn = SLIST_FIRST(list);
1815: }
1.1 lukem 1816: }
1817:
1.49 ad 1818:
1.3 jdolecek 1819: /*
1.49 ad 1820: * fileops close method for a kqueue descriptor.
1.3 jdolecek 1821: */
1.1 lukem 1822: static int
1.49 ad 1823: kqueue_close(file_t *fp)
1.1 lukem 1824: {
1.49 ad 1825: struct kqueue *kq;
1826: filedesc_t *fdp;
1827: fdfile_t *ff;
1828: int i;
1829:
1.82 matt 1830: kq = fp->f_kqueue;
1831: fp->f_kqueue = NULL;
1.79 christos 1832: fp->f_type = 0;
1.49 ad 1833: fdp = curlwp->l_fd;
1.1 lukem 1834:
1.49 ad 1835: mutex_enter(&fdp->fd_lock);
1836: for (i = 0; i <= fdp->fd_lastkqfile; i++) {
1.65 ad 1837: if ((ff = fdp->fd_dt->dt_ff[i]) == NULL)
1.49 ad 1838: continue;
1839: kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i);
1.1 lukem 1840: }
1841: if (fdp->fd_knhashmask != 0) {
1842: for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
1.49 ad 1843: kqueue_doclose(kq, &fdp->fd_knhash[i], -1);
1.1 lukem 1844: }
1845: }
1.49 ad 1846: mutex_exit(&fdp->fd_lock);
1847:
1.118 jdolecek 1848: KASSERT(KQ_COUNT(kq) == 0);
1.49 ad 1849: mutex_destroy(&kq->kq_lock);
1850: cv_destroy(&kq->kq_cv);
1.48 rmind 1851: seldestroy(&kq->kq_sel);
1.49 ad 1852: kmem_free(kq, sizeof(*kq));
1.1 lukem 1853:
1854: return (0);
1855: }
1856:
1.3 jdolecek 1857: /*
1858: * struct fileops kqfilter method for a kqueue descriptor.
1859: * Event triggered when monitored kqueue changes.
1860: */
1861: static int
1.49 ad 1862: kqueue_kqfilter(file_t *fp, struct knote *kn)
1.3 jdolecek 1863: {
1864: struct kqueue *kq;
1.49 ad 1865:
1.82 matt 1866: kq = ((file_t *)kn->kn_obj)->f_kqueue;
1.49 ad 1867:
1868: KASSERT(fp == kn->kn_obj);
1.3 jdolecek 1869:
1870: if (kn->kn_filter != EVFILT_READ)
1.49 ad 1871: return 1;
1872:
1.3 jdolecek 1873: kn->kn_fop = &kqread_filtops;
1.49 ad 1874: mutex_enter(&kq->kq_lock);
1.109 thorpej 1875: selrecord_knote(&kq->kq_sel, kn);
1.49 ad 1876: mutex_exit(&kq->kq_lock);
1877:
1878: return 0;
1.3 jdolecek 1879: }
1880:
1881:
1882: /*
1.49 ad 1883: * Walk down a list of knotes, activating them if their event has
1884: * triggered. The caller's object lock (e.g. device driver lock)
1885: * must be held.
1.1 lukem 1886: */
1887: void
1888: knote(struct klist *list, long hint)
1889: {
1.71 drochner 1890: struct knote *kn, *tmpkn;
1.1 lukem 1891:
1.71 drochner 1892: SLIST_FOREACH_SAFE(kn, list, kn_selnext, tmpkn) {
1.85 christos 1893: KASSERT(kn->kn_fop != NULL);
1.84 christos 1894: KASSERT(kn->kn_fop->f_event != NULL);
1.49 ad 1895: if ((*kn->kn_fop->f_event)(kn, hint))
1896: knote_activate(kn);
1897: }
1.1 lukem 1898: }
1899:
1900: /*
1.49 ad 1901: * Remove all knotes referencing a specified fd
1.1 lukem 1902: */
1903: void
1.49 ad 1904: knote_fdclose(int fd)
1.1 lukem 1905: {
1.49 ad 1906: struct klist *list;
1.1 lukem 1907: struct knote *kn;
1.49 ad 1908: filedesc_t *fdp;
1.1 lukem 1909:
1.49 ad 1910: fdp = curlwp->l_fd;
1.106 riastrad 1911: mutex_enter(&fdp->fd_lock);
1.65 ad 1912: list = (struct klist *)&fdp->fd_dt->dt_ff[fd]->ff_knlist;
1.1 lukem 1913: while ((kn = SLIST_FIRST(list)) != NULL) {
1.49 ad 1914: knote_detach(kn, fdp, true);
1915: mutex_enter(&fdp->fd_lock);
1.1 lukem 1916: }
1.49 ad 1917: mutex_exit(&fdp->fd_lock);
1.1 lukem 1918: }
1919:
1920: /*
1.49 ad 1921: * Drop knote. Called with fdp->fd_lock held, and will drop before
1922: * returning.
1.3 jdolecek 1923: */
1.1 lukem 1924: static void
1.49 ad 1925: knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop)
1.1 lukem 1926: {
1.49 ad 1927: struct klist *list;
1.53 ad 1928: struct kqueue *kq;
1929:
1930: kq = kn->kn_kq;
1.1 lukem 1931:
1.49 ad 1932: KASSERT((kn->kn_status & KN_MARKER) == 0);
1933: KASSERT(mutex_owned(&fdp->fd_lock));
1.3 jdolecek 1934:
1.85 christos 1935: KASSERT(kn->kn_fop != NULL);
1.53 ad 1936: /* Remove from monitored object. */
1.49 ad 1937: if (dofop) {
1.122 thorpej 1938: filter_detach(kn);
1.1 lukem 1939: }
1.3 jdolecek 1940:
1.53 ad 1941: /* Remove from descriptor table. */
1.121 thorpej 1942: if (kn->kn_fop->f_flags & FILTEROP_ISFD)
1.65 ad 1943: list = (struct klist *)&fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist;
1.1 lukem 1944: else
1945: list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1946:
1947: SLIST_REMOVE(list, kn, knote, kn_link);
1.53 ad 1948:
1949: /* Remove from kqueue. */
1.85 christos 1950: again:
1.53 ad 1951: mutex_spin_enter(&kq->kq_lock);
1952: if ((kn->kn_status & KN_QUEUED) != 0) {
1953: kq_check(kq);
1.85 christos 1954: kq->kq_count--;
1.53 ad 1955: TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1956: kn->kn_status &= ~KN_QUEUED;
1957: kq_check(kq);
1.85 christos 1958: } else if (kn->kn_status & KN_BUSY) {
1959: mutex_spin_exit(&kq->kq_lock);
1960: goto again;
1.53 ad 1961: }
1962: mutex_spin_exit(&kq->kq_lock);
1963:
1.49 ad 1964: mutex_exit(&fdp->fd_lock);
1.121 thorpej 1965: if (kn->kn_fop->f_flags & FILTEROP_ISFD)
1.49 ad 1966: fd_putfile(kn->kn_id);
1967: atomic_dec_uint(&kn->kn_kfilter->refcnt);
1968: kmem_free(kn, sizeof(*kn));
1.1 lukem 1969: }
1970:
1.3 jdolecek 1971: /*
1972: * Queue new event for knote.
1973: */
1.1 lukem 1974: static void
1975: knote_enqueue(struct knote *kn)
1976: {
1.49 ad 1977: struct kqueue *kq;
1978:
1979: KASSERT((kn->kn_status & KN_MARKER) == 0);
1.1 lukem 1980:
1.3 jdolecek 1981: kq = kn->kn_kq;
1.1 lukem 1982:
1.49 ad 1983: mutex_spin_enter(&kq->kq_lock);
1984: if ((kn->kn_status & KN_DISABLED) != 0) {
1985: kn->kn_status &= ~KN_DISABLED;
1986: }
1987: if ((kn->kn_status & (KN_ACTIVE | KN_QUEUED)) == KN_ACTIVE) {
1.52 yamt 1988: kq_check(kq);
1.85 christos 1989: kn->kn_status |= KN_QUEUED;
1.49 ad 1990: TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1991: kq->kq_count++;
1.52 yamt 1992: kq_check(kq);
1.49 ad 1993: cv_broadcast(&kq->kq_cv);
1994: selnotify(&kq->kq_sel, 0, NOTE_SUBMIT);
1995: }
1996: mutex_spin_exit(&kq->kq_lock);
1.1 lukem 1997: }
1.49 ad 1998: /*
1999: * Queue new event for knote.
2000: */
2001: static void
2002: knote_activate(struct knote *kn)
2003: {
2004: struct kqueue *kq;
2005:
2006: KASSERT((kn->kn_status & KN_MARKER) == 0);
1.1 lukem 2007:
1.3 jdolecek 2008: kq = kn->kn_kq;
1.12 pk 2009:
1.49 ad 2010: mutex_spin_enter(&kq->kq_lock);
2011: kn->kn_status |= KN_ACTIVE;
2012: if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
1.52 yamt 2013: kq_check(kq);
1.85 christos 2014: kn->kn_status |= KN_QUEUED;
1.49 ad 2015: TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
2016: kq->kq_count++;
1.52 yamt 2017: kq_check(kq);
1.49 ad 2018: cv_broadcast(&kq->kq_cv);
2019: selnotify(&kq->kq_sel, 0, NOTE_SUBMIT);
2020: }
2021: mutex_spin_exit(&kq->kq_lock);
1.1 lukem 2022: }
CVSweb <webmaster@jp.NetBSD.org>