Annotation of src/sys/kern/sys_timerfd.c, Revision 1.8
1.8 ! thorpej 1: /* $NetBSD: sys_timerfd.c,v 1.7 2021/11/24 16:35:33 thorpej Exp $ */
1.2 thorpej 2:
3: /*-
4: * Copyright (c) 2020 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: #include <sys/cdefs.h>
1.8 ! thorpej 33: __KERNEL_RCSID(0, "$NetBSD: sys_timerfd.c,v 1.7 2021/11/24 16:35:33 thorpej Exp $");
1.2 thorpej 34:
35: /*
36: * timerfd
37: *
38: * Timerfd objects are similar to POSIX timers, except they are associated
39: * with a file descriptor rather than a process. Timerfd objects are
40: * created with the timerfd_create(2) system call, similar to timer_create(2).
41: * The timerfd analogues for timer_gettime(2) and timer_settime(2) are
42: * timerfd_gettime(2) and timerfd_settime(2), respectively.
43: *
44: * When a timerfd object's timer fires, an internal counter is incremented.
45: * When this counter is non-zero, the descriptor associated with the timerfd
46: * object is "readable". Note that this is slightly different than the
47: * POSIX timer "overrun" counter, which only increments if the timer fires
48: * again while the notification signal is already pending. Thus, we are
49: * responsible for incrementing the "overrun" counter each time the timerfd
50: * timer fires.
51: *
52: * This implementation is API compatible with the Linux timerfd interface.
53: */
54:
1.3 skrll 55: #include <sys/param.h>
1.2 thorpej 56: #include <sys/types.h>
57: #include <sys/condvar.h>
58: #include <sys/file.h>
59: #include <sys/filedesc.h>
60: #include <sys/kauth.h>
61: #include <sys/mutex.h>
62: #include <sys/poll.h>
63: #include <sys/proc.h>
64: #include <sys/select.h>
65: #include <sys/stat.h>
66: #include <sys/syscallargs.h>
67: #include <sys/timerfd.h>
68: #include <sys/uio.h>
69:
70: /* N.B. all timerfd state is protected by itimer_lock() */
71: struct timerfd {
72: struct itimer tfd_itimer;
73: kcondvar_t tfd_read_wait;
74: struct selinfo tfd_read_sel;
75: int64_t tfd_nwaiters;
76: bool tfd_cancel_on_set;
77: bool tfd_cancelled;
78: bool tfd_restarting;
79:
80: /*
81: * Information kept for stat(2).
82: */
83: struct timespec tfd_btime; /* time created */
84: struct timespec tfd_mtime; /* last timerfd_settime() */
85: struct timespec tfd_atime; /* last read */
86: };
87:
88: static void timerfd_wake(struct timerfd *);
89:
90: static inline uint64_t
91: timerfd_fire_count(const struct timerfd * const tfd)
92: {
93: return (unsigned int)tfd->tfd_itimer.it_overruns;
94: }
95:
96: static inline bool
97: timerfd_is_readable(const struct timerfd * const tfd)
98: {
99: return tfd->tfd_itimer.it_overruns != 0 || tfd->tfd_cancelled;
100: }
101:
102: /*
103: * timerfd_fire:
104: *
105: * Called when the timerfd's timer fires.
106: *
107: * Called from a callout with itimer lock held.
108: */
109: static void
110: timerfd_fire(struct itimer * const it)
111: {
112: struct timerfd * const tfd =
113: container_of(it, struct timerfd, tfd_itimer);
114:
115: it->it_overruns++;
116: timerfd_wake(tfd);
117: }
118:
119: /*
120: * timerfd_realtime_changed:
121: *
122: * Called when CLOCK_REALTIME is changed with clock_settime()
123: * or settimeofday().
124: *
125: * Called with itimer lock held.
126: */
127: static void
128: timerfd_realtime_changed(struct itimer * const it)
129: {
130: struct timerfd * const tfd =
131: container_of(it, struct timerfd, tfd_itimer);
132:
133: /* Should only be called when timer is armed. */
134: KASSERT(timespecisset(&it->it_time.it_value));
135:
136: if (tfd->tfd_cancel_on_set) {
137: tfd->tfd_cancelled = true;
138: timerfd_wake(tfd);
139: }
140: }
141:
142: static const struct itimer_ops timerfd_itimer_monotonic_ops = {
143: .ito_fire = timerfd_fire,
144: };
145:
146: static const struct itimer_ops timerfd_itimer_realtime_ops = {
147: .ito_fire = timerfd_fire,
148: .ito_realtime_changed = timerfd_realtime_changed,
149: };
150:
151: /*
152: * timerfd_create:
153: *
154: * Create a timerfd object.
155: */
156: static struct timerfd *
157: timerfd_create(clockid_t const clock_id, int const flags)
158: {
159: struct timerfd * const tfd = kmem_zalloc(sizeof(*tfd), KM_SLEEP);
160:
161: KASSERT(clock_id == CLOCK_REALTIME || clock_id == CLOCK_MONOTONIC);
162:
163: cv_init(&tfd->tfd_read_wait, "tfdread");
164: selinit(&tfd->tfd_read_sel);
165: getnanotime(&tfd->tfd_btime);
166:
167: /* Caller deals with TFD_CLOEXEC and TFD_NONBLOCK. */
168:
169: itimer_lock();
170: itimer_init(&tfd->tfd_itimer,
171: clock_id == CLOCK_REALTIME ? &timerfd_itimer_realtime_ops
172: : &timerfd_itimer_monotonic_ops,
173: clock_id, NULL);
174: itimer_unlock();
175:
176: return tfd;
177: }
178:
179: /*
180: * timerfd_destroy:
181: *
182: * Destroy a timerfd object.
183: */
184: static void
185: timerfd_destroy(struct timerfd * const tfd)
186: {
187:
188: KASSERT(tfd->tfd_nwaiters == 0);
189:
190: itimer_lock();
191: itimer_poison(&tfd->tfd_itimer);
192: itimer_fini(&tfd->tfd_itimer); /* drops itimer lock */
193:
194: cv_destroy(&tfd->tfd_read_wait);
195:
196: seldestroy(&tfd->tfd_read_sel);
197:
198: kmem_free(tfd, sizeof(*tfd));
199: }
200:
201: /*
202: * timerfd_wait:
203: *
204: * Block on a timerfd. Handles non-blocking, as well as
205: * the restart cases.
206: */
207: static int
208: timerfd_wait(struct timerfd * const tfd, int const fflag)
209: {
210: extern kmutex_t itimer_mutex; /* XXX */
211: int error;
212:
213: if (fflag & FNONBLOCK) {
214: return EAGAIN;
215: }
216:
217: /*
1.7 thorpej 218: * We're going to block. Check if we need to return ERESTART.
1.2 thorpej 219: */
1.7 thorpej 220: if (tfd->tfd_restarting) {
221: return ERESTART;
1.2 thorpej 222: }
223:
224: tfd->tfd_nwaiters++;
225: KASSERT(tfd->tfd_nwaiters > 0);
226: error = cv_wait_sig(&tfd->tfd_read_wait, &itimer_mutex);
227: tfd->tfd_nwaiters--;
228: KASSERT(tfd->tfd_nwaiters >= 0);
229:
230: /*
231: * If a restart was triggered while we were asleep, we need
1.7 thorpej 232: * to return ERESTART if no other error was returned.
1.2 thorpej 233: */
234: if (tfd->tfd_restarting) {
235: if (error == 0) {
236: error = ERESTART;
237: }
238: }
239:
240: return error;
241: }
242:
243: /*
244: * timerfd_wake:
245: *
246: * Wake LWPs blocked on a timerfd.
247: */
248: static void
249: timerfd_wake(struct timerfd * const tfd)
250: {
251:
252: if (tfd->tfd_nwaiters) {
253: cv_broadcast(&tfd->tfd_read_wait);
254: }
255: selnotify(&tfd->tfd_read_sel, POLLIN | POLLRDNORM, NOTE_SUBMIT);
256: }
257:
258: /*
259: * timerfd file operations
260: */
261:
262: static int
263: timerfd_fop_read(file_t * const fp, off_t * const offset,
264: struct uio * const uio, kauth_cred_t const cred, int const flags)
265: {
266: struct timerfd * const tfd = fp->f_timerfd;
267: struct itimer * const it = &tfd->tfd_itimer;
268: int const fflag = fp->f_flag;
269: uint64_t return_value;
270: int error;
271:
272: if (uio->uio_resid < sizeof(uint64_t)) {
273: return EINVAL;
274: }
275:
276: itimer_lock();
277:
278: while (!timerfd_is_readable(tfd)) {
279: if ((error = timerfd_wait(tfd, fflag)) != 0) {
280: itimer_unlock();
281: return error;
282: }
283: }
284:
285: if (tfd->tfd_cancelled) {
286: itimer_unlock();
287: return ECANCELED;
288: }
289:
290: return_value = timerfd_fire_count(tfd);
291: it->it_overruns = 0;
292:
293: getnanotime(&tfd->tfd_atime);
294:
295: itimer_unlock();
296:
297: error = uiomove(&return_value, sizeof(return_value), uio);
298:
299: return error;
300: }
301:
302: static int
303: timerfd_fop_ioctl(file_t * const fp, unsigned long const cmd, void * const data)
304: {
305: struct timerfd * const tfd = fp->f_timerfd;
306: int error = 0;
307:
308: switch (cmd) {
1.8 ! thorpej 309: case FIONBIO:
! 310: break;
! 311:
! 312: case FIONREAD:
! 313: itimer_lock();
! 314: *(int *)data = timerfd_is_readable(tfd) ? sizeof(uint64_t) : 0;
! 315: itimer_unlock();
! 316: break;
! 317:
1.2 thorpej 318: case TFD_IOC_SET_TICKS: {
319: const uint64_t * const new_ticksp = data;
320: if (*new_ticksp > INT_MAX) {
321: return EINVAL;
322: }
323: itimer_lock();
324: tfd->tfd_itimer.it_overruns = (int)*new_ticksp;
325: itimer_unlock();
326: break;
327: }
328:
329: default:
330: error = EPASSTHROUGH;
331: }
332:
333: return error;
334: }
335:
336: static int
337: timerfd_fop_poll(file_t * const fp, int const events)
338: {
339: struct timerfd * const tfd = fp->f_timerfd;
340: int revents = events & (POLLOUT | POLLWRNORM);
341:
342: if (events & (POLLIN | POLLRDNORM)) {
343: itimer_lock();
344: if (timerfd_is_readable(tfd)) {
345: revents |= events & (POLLIN | POLLRDNORM);
346: } else {
347: selrecord(curlwp, &tfd->tfd_read_sel);
348: }
349: itimer_unlock();
350: }
351:
352: return revents;
353: }
354:
355: static int
356: timerfd_fop_stat(file_t * const fp, struct stat * const st)
357: {
358: struct timerfd * const tfd = fp->f_timerfd;
359:
360: memset(st, 0, sizeof(*st));
361:
362: itimer_lock();
363: st->st_size = (off_t)timerfd_fire_count(tfd);
364: st->st_atimespec = tfd->tfd_atime;
365: st->st_mtimespec = tfd->tfd_mtime;
366: itimer_unlock();
367:
368: st->st_blksize = sizeof(uint64_t);
369: st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
370: st->st_blocks = 1;
371: st->st_birthtimespec = tfd->tfd_btime;
372: st->st_ctimespec = st->st_mtimespec;
373: st->st_uid = kauth_cred_geteuid(fp->f_cred);
374: st->st_gid = kauth_cred_getegid(fp->f_cred);
375:
376: return 0;
377: }
378:
379: static int
380: timerfd_fop_close(file_t * const fp)
381: {
382: struct timerfd * const tfd = fp->f_timerfd;
383:
384: fp->f_timerfd = NULL;
385: timerfd_destroy(tfd);
386:
387: return 0;
388: }
389:
390: static void
391: timerfd_filt_read_detach(struct knote * const kn)
392: {
393: struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
394:
395: itimer_lock();
396: KASSERT(kn->kn_hook == tfd);
397: selremove_knote(&tfd->tfd_read_sel, kn);
398: itimer_unlock();
399: }
400:
401: static int
402: timerfd_filt_read(struct knote * const kn, long const hint)
403: {
404: struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
1.6 thorpej 405: int rv;
1.2 thorpej 406:
407: if (hint & NOTE_SUBMIT) {
408: KASSERT(itimer_lock_held());
409: } else {
410: itimer_lock();
411: }
412:
413: kn->kn_data = (int64_t)timerfd_fire_count(tfd);
1.6 thorpej 414: rv = kn->kn_data != 0;
1.2 thorpej 415:
416: if ((hint & NOTE_SUBMIT) == 0) {
417: itimer_unlock();
418: }
419:
1.6 thorpej 420: return rv;
1.2 thorpej 421: }
422:
423: static const struct filterops timerfd_read_filterops = {
1.5 thorpej 424: .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
1.2 thorpej 425: .f_detach = timerfd_filt_read_detach,
426: .f_event = timerfd_filt_read,
427: };
428:
429: static int
430: timerfd_fop_kqfilter(file_t * const fp, struct knote * const kn)
431: {
432: struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
433: struct selinfo *sel;
434:
435: switch (kn->kn_filter) {
436: case EVFILT_READ:
437: sel = &tfd->tfd_read_sel;
438: kn->kn_fop = &timerfd_read_filterops;
439: break;
440:
441: default:
442: return EINVAL;
443: }
444:
445: kn->kn_hook = tfd;
446:
447: itimer_lock();
448: selrecord_knote(sel, kn);
449: itimer_unlock();
450:
451: return 0;
452: }
453:
454: static void
455: timerfd_fop_restart(file_t * const fp)
456: {
457: struct timerfd * const tfd = fp->f_timerfd;
458:
459: /*
460: * Unblock blocked reads in order to allow close() to complete.
461: * System calls return ERESTART so that the fd is revalidated.
462: */
463:
464: itimer_lock();
465:
466: if (tfd->tfd_nwaiters != 0) {
467: tfd->tfd_restarting = true;
468: cv_broadcast(&tfd->tfd_read_wait);
469: }
470:
471: itimer_unlock();
472: }
473:
474: static const struct fileops timerfd_fileops = {
475: .fo_name = "timerfd",
476: .fo_read = timerfd_fop_read,
477: .fo_write = fbadop_write,
478: .fo_ioctl = timerfd_fop_ioctl,
479: .fo_fcntl = fnullop_fcntl,
480: .fo_poll = timerfd_fop_poll,
481: .fo_stat = timerfd_fop_stat,
482: .fo_close = timerfd_fop_close,
483: .fo_kqfilter = timerfd_fop_kqfilter,
484: .fo_restart = timerfd_fop_restart,
485: };
486:
487: /*
488: * timerfd_create(2) system call
489: */
490: int
491: do_timerfd_create(struct lwp * const l, clockid_t const clock_id,
492: int const flags, register_t *retval)
493: {
494: file_t *fp;
495: int fd, error;
496:
497: if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) {
498: return EINVAL;
499: }
500:
501: switch (clock_id) {
502: case CLOCK_REALTIME:
503: case CLOCK_MONOTONIC:
504: /* allowed */
505: break;
506:
507: default:
508: return EINVAL;
509: }
510:
511: if ((error = fd_allocfile(&fp, &fd)) != 0) {
512: return error;
513: }
514:
515: fp->f_flag = FREAD;
516: if (flags & TFD_NONBLOCK) {
517: fp->f_flag |= FNONBLOCK;
518: }
519: fp->f_type = DTYPE_TIMERFD;
520: fp->f_ops = &timerfd_fileops;
521: fp->f_timerfd = timerfd_create(clock_id, flags);
522: fd_set_exclose(l, fd, !!(flags & TFD_CLOEXEC));
523: fd_affix(curproc, fp, fd);
524:
525: *retval = fd;
526: return 0;
527: }
528:
529: int
530: sys_timerfd_create(struct lwp *l, const struct sys_timerfd_create_args *uap,
531: register_t *retval)
532: {
533: /* {
534: syscallarg(clockid_t) clock_id;
535: syscallarg(int) flags;
536: } */
537:
538: return do_timerfd_create(l, SCARG(uap, clock_id), SCARG(uap, flags),
539: retval);
540: }
541:
542: /*
543: * timerfd_gettime(2) system call.
544: */
545: int
546: do_timerfd_gettime(struct lwp *l, int fd, struct itimerspec *curr_value,
547: register_t *retval)
548: {
549: file_t *fp;
550:
551: if ((fp = fd_getfile(fd)) == NULL) {
552: return EBADF;
553: }
554:
555: if (fp->f_ops != &timerfd_fileops) {
556: fd_putfile(fd);
557: return EINVAL;
558: }
559:
560: struct timerfd * const tfd = fp->f_timerfd;
561: itimer_lock();
562: itimer_gettime(&tfd->tfd_itimer, curr_value);
563: itimer_unlock();
564:
565: fd_putfile(fd);
566: return 0;
567: }
568:
569: int
570: sys_timerfd_gettime(struct lwp *l, const struct sys_timerfd_gettime_args *uap,
571: register_t *retval)
572: {
573: /* {
574: syscallarg(int) fd;
575: syscallarg(struct itimerspec *) curr_value;
576: } */
577:
578: struct itimerspec oits;
579: int error;
580:
581: error = do_timerfd_gettime(l, SCARG(uap, fd), &oits, retval);
582: if (error == 0) {
583: error = copyout(&oits, SCARG(uap, curr_value), sizeof(oits));
584: }
585: return error;
586: }
587:
588: /*
589: * timerfd_settime(2) system call.
590: */
591: int
592: do_timerfd_settime(struct lwp *l, int fd, int flags,
593: const struct itimerspec *new_value, struct itimerspec *old_value,
594: register_t *retval)
595: {
596: file_t *fp;
597: int error;
598:
599: if (flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) {
600: return EINVAL;
601: }
602:
603: if ((fp = fd_getfile(fd)) == NULL) {
604: return EBADF;
605: }
606:
607: if (fp->f_ops != &timerfd_fileops) {
608: fd_putfile(fd);
609: return EINVAL;
610: }
611:
612: struct timerfd * const tfd = fp->f_timerfd;
613: struct itimer * const it = &tfd->tfd_itimer;
614:
615: itimer_lock();
616:
617: restart:
618: if (old_value != NULL) {
619: *old_value = it->it_time;
620: }
621: it->it_time = *new_value;
622:
623: /*
624: * If we've been passed a relative value, convert it to an
625: * absolute, as that's what the itimer facility expects for
626: * non-virtual timers. Also ensure that this doesn't set it
627: * to zero or lets it go negative.
628: * XXXJRT re-factor.
629: */
630: if (timespecisset(&it->it_time.it_value) &&
631: (flags & TFD_TIMER_ABSTIME) == 0) {
632: struct timespec now;
633: if (it->it_clockid == CLOCK_REALTIME) {
634: getnanotime(&now);
635: } else { /* CLOCK_MONOTONIC */
636: getnanouptime(&now);
637: }
638: timespecadd(&it->it_time.it_value, &now,
639: &it->it_time.it_value);
640: }
641:
642: error = itimer_settime(it);
643: if (error == ERESTART) {
644: goto restart;
645: }
646: KASSERT(error == 0);
647:
648: /* Reset the expirations counter. */
649: it->it_overruns = 0;
650:
651: if (it->it_clockid == CLOCK_REALTIME) {
652: tfd->tfd_cancelled = false;
653: tfd->tfd_cancel_on_set = !!(flags & TFD_TIMER_CANCEL_ON_SET);
654: }
655:
656: getnanotime(&tfd->tfd_mtime);
657: itimer_unlock();
658:
659: fd_putfile(fd);
660: return error;
661: }
662:
663: int
664: sys_timerfd_settime(struct lwp *l, const struct sys_timerfd_settime_args *uap,
665: register_t *retval)
666: {
667: /* {
668: syscallarg(int) fd;
669: syscallarg(int) flags;
670: syscallarg(const struct itimerspec *) new_value;
671: syscallarg(struct itimerspec *) old_value;
672: } */
673:
674: struct itimerspec nits, oits, *oitsp = NULL;
675: int error;
676:
677: error = copyin(SCARG(uap, new_value), &nits, sizeof(nits));
678: if (error) {
679: return error;
680: }
681:
682: if (SCARG(uap, old_value) != NULL) {
683: oitsp = &oits;
684: }
685:
686: error = do_timerfd_settime(l, SCARG(uap, fd), SCARG(uap, flags),
687: &nits, oitsp, retval);
688: if (error == 0 && oitsp != NULL) {
689: error = copyout(oitsp, SCARG(uap, old_value), sizeof(*oitsp));
690: }
691: return error;
692: }
CVSweb <webmaster@jp.NetBSD.org>