Annotation of src/sys/dev/lockstat.c, Revision 1.25
1.25 ! chs 1: /* $NetBSD: lockstat.c,v 1.24 2015/08/20 14:40:17 christos Exp $ */
1.1 ad 2:
3: /*-
1.9 ad 4: * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
1.1 ad 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Andrew Doran.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: /*
33: * Lock statistics driver, providing kernel support for the lockstat(8)
34: * command.
1.5 ad 35: *
1.11 ad 36: * We use a global lock word (lockstat_lock) to track device opens.
37: * Only one thread can hold the device at a time, providing a global lock.
38: *
1.5 ad 39: * XXX Timings for contention on sleep locks are currently incorrect.
1.1 ad 40: */
41:
42: #include <sys/cdefs.h>
1.25 ! chs 43: __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.24 2015/08/20 14:40:17 christos Exp $");
1.1 ad 44:
45: #include <sys/types.h>
46: #include <sys/param.h>
47: #include <sys/proc.h>
48: #include <sys/resourcevar.h>
49: #include <sys/systm.h>
50: #include <sys/kernel.h>
1.11 ad 51: #include <sys/kmem.h>
1.1 ad 52: #include <sys/conf.h>
1.23 matt 53: #include <sys/cpu.h>
1.1 ad 54: #include <sys/syslog.h>
1.12 ad 55: #include <sys/atomic.h>
1.1 ad 56:
57: #include <dev/lockstat.h>
58:
1.13 ad 59: #include <machine/lock.h>
60:
1.24 christos 61: #include "ioconf.h"
62:
1.1 ad 63: #ifndef __HAVE_CPU_COUNTER
64: #error CPU counters not available
65: #endif
66:
67: #if LONG_BIT == 64
68: #define LOCKSTAT_HASH_SHIFT 3
69: #elif LONG_BIT == 32
70: #define LOCKSTAT_HASH_SHIFT 2
71: #endif
72:
1.10 ad 73: #define LOCKSTAT_MINBUFS 1000
74: #define LOCKSTAT_DEFBUFS 10000
1.18 chs 75: #define LOCKSTAT_MAXBUFS 1000000
1.1 ad 76:
1.11 ad 77: #define LOCKSTAT_HASH_SIZE 128
1.1 ad 78: #define LOCKSTAT_HASH_MASK (LOCKSTAT_HASH_SIZE - 1)
79: #define LOCKSTAT_HASH(key) \
80: ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
81:
82: typedef struct lscpu {
83: SLIST_HEAD(, lsbuf) lc_free;
84: u_int lc_overflow;
85: LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
86: } lscpu_t;
87:
88: typedef struct lslist lslist_t;
89:
90: void lockstat_start(lsenable_t *);
91: int lockstat_alloc(lsenable_t *);
92: void lockstat_init_tables(lsenable_t *);
93: int lockstat_stop(lsdisable_t *);
94: void lockstat_free(void);
95:
96: dev_type_open(lockstat_open);
97: dev_type_close(lockstat_close);
98: dev_type_read(lockstat_read);
99: dev_type_ioctl(lockstat_ioctl);
100:
101: volatile u_int lockstat_enabled;
1.21 christos 102: volatile u_int lockstat_dev_enabled;
1.1 ad 103: uintptr_t lockstat_csstart;
104: uintptr_t lockstat_csend;
105: uintptr_t lockstat_csmask;
1.10 ad 106: uintptr_t lockstat_lamask;
1.5 ad 107: uintptr_t lockstat_lockstart;
108: uintptr_t lockstat_lockend;
1.11 ad 109: __cpu_simple_lock_t lockstat_lock;
110: lwp_t *lockstat_lwp;
1.1 ad 111: lsbuf_t *lockstat_baseb;
112: size_t lockstat_sizeb;
113: int lockstat_busy;
114: struct timespec lockstat_stime;
115:
1.20 christos 116: #ifdef KDTRACE_HOOKS
1.21 christos 117: volatile u_int lockstat_dtrace_enabled;
1.20 christos 118: CTASSERT(LB_NEVENT <= 3);
119: CTASSERT(LB_NLOCK <= (7 << LB_LOCK_SHIFT));
120: void
121: lockstat_probe_stub(uint32_t id, uintptr_t lock, uintptr_t callsite,
122: uintptr_t flags, uintptr_t count, uintptr_t cycles)
123: {
124: }
125:
126: uint32_t lockstat_probemap[LS_NPROBES];
127: void (*lockstat_probe_func)(uint32_t, uintptr_t, uintptr_t,
128: uintptr_t, uintptr_t, uintptr_t) = &lockstat_probe_stub;
129: #endif
130:
1.1 ad 131: const struct cdevsw lockstat_cdevsw = {
1.17 dholland 132: .d_open = lockstat_open,
133: .d_close = lockstat_close,
134: .d_read = lockstat_read,
135: .d_write = nowrite,
136: .d_ioctl = lockstat_ioctl,
137: .d_stop = nostop,
138: .d_tty = notty,
139: .d_poll = nopoll,
140: .d_mmap = nommap,
141: .d_kqfilter = nokqfilter,
1.19 dholland 142: .d_discard = nodiscard,
1.17 dholland 143: .d_flag = D_OTHER | D_MPSAFE
1.1 ad 144: };
145:
146: /*
147: * Called when the pseudo-driver is attached.
148: */
149: void
150: lockstatattach(int nunits)
151: {
152:
153: (void)nunits;
154:
1.11 ad 155: __cpu_simple_lock_init(&lockstat_lock);
1.1 ad 156: }
157:
158: /*
159: * Prepare the per-CPU tables for use, or clear down tables when tracing is
160: * stopped.
161: */
162: void
163: lockstat_init_tables(lsenable_t *le)
164: {
1.7 ad 165: int i, per, slop, cpuno;
1.1 ad 166: CPU_INFO_ITERATOR cii;
167: struct cpu_info *ci;
168: lscpu_t *lc;
169: lsbuf_t *lb;
170:
1.22 christos 171: /* coverity[assert_side_effect] */
1.21 christos 172: KASSERT(!lockstat_dev_enabled);
1.1 ad 173:
174: for (CPU_INFO_FOREACH(cii, ci)) {
175: if (ci->ci_lockstat != NULL) {
1.11 ad 176: kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
1.1 ad 177: ci->ci_lockstat = NULL;
178: }
179: }
180:
181: if (le == NULL)
182: return;
183:
184: lb = lockstat_baseb;
185: per = le->le_nbufs / ncpu;
186: slop = le->le_nbufs - (per * ncpu);
187: cpuno = 0;
188: for (CPU_INFO_FOREACH(cii, ci)) {
1.11 ad 189: lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
1.1 ad 190: lc->lc_overflow = 0;
191: ci->ci_lockstat = lc;
192:
193: SLIST_INIT(&lc->lc_free);
194: for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
195: LIST_INIT(&lc->lc_hash[i]);
196:
197: for (i = per; i != 0; i--, lb++) {
198: lb->lb_cpu = (uint16_t)cpuno;
199: SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
200: }
201: if (--slop > 0) {
202: lb->lb_cpu = (uint16_t)cpuno;
203: SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
204: lb++;
205: }
206: cpuno++;
207: }
208: }
209:
210: /*
211: * Start collecting lock statistics.
212: */
213: void
214: lockstat_start(lsenable_t *le)
215: {
216:
1.22 christos 217: /* coverity[assert_side_effect] */
1.21 christos 218: KASSERT(!lockstat_dev_enabled);
1.1 ad 219:
220: lockstat_init_tables(le);
221:
222: if ((le->le_flags & LE_CALLSITE) != 0)
223: lockstat_csmask = (uintptr_t)-1LL;
224: else
225: lockstat_csmask = 0;
226:
1.10 ad 227: if ((le->le_flags & LE_LOCK) != 0)
228: lockstat_lamask = (uintptr_t)-1LL;
229: else
230: lockstat_lamask = 0;
231:
1.1 ad 232: lockstat_csstart = le->le_csstart;
233: lockstat_csend = le->le_csend;
1.5 ad 234: lockstat_lockstart = le->le_lockstart;
1.6 ad 235: lockstat_lockstart = le->le_lockstart;
1.5 ad 236: lockstat_lockend = le->le_lockend;
1.12 ad 237: membar_sync();
1.1 ad 238: getnanotime(&lockstat_stime);
1.21 christos 239: lockstat_dev_enabled = le->le_mask;
240: LOCKSTAT_ENABLED_UPDATE();
1.1 ad 241: }
242:
243: /*
244: * Stop collecting lock statistics.
245: */
246: int
247: lockstat_stop(lsdisable_t *ld)
248: {
249: CPU_INFO_ITERATOR cii;
250: struct cpu_info *ci;
251: u_int cpuno, overflow;
252: struct timespec ts;
253: int error;
1.14 ad 254: lwp_t *l;
1.1 ad 255:
1.22 christos 256: /* coverity[assert_side_effect] */
1.21 christos 257: KASSERT(lockstat_dev_enabled);
1.1 ad 258:
259: /*
260: * Set enabled false, force a write barrier, and wait for other CPUs
1.5 ad 261: * to exit lockstat_event().
1.1 ad 262: */
1.21 christos 263: lockstat_dev_enabled = 0;
264: LOCKSTAT_ENABLED_UPDATE();
1.1 ad 265: getnanotime(&ts);
266: tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10));
267:
268: /*
269: * Did we run out of buffers while tracing?
270: */
271: overflow = 0;
272: for (CPU_INFO_FOREACH(cii, ci))
273: overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
274:
275: if (overflow != 0) {
276: error = EOVERFLOW;
277: log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
278: overflow);
279: } else
280: error = 0;
281:
282: lockstat_init_tables(NULL);
283:
1.14 ad 284: /* Run through all LWPs and clear the slate for the next run. */
285: mutex_enter(proc_lock);
286: LIST_FOREACH(l, &alllwp, l_list) {
287: l->l_pfailaddr = 0;
288: l->l_pfailtime = 0;
289: l->l_pfaillock = 0;
290: }
291: mutex_exit(proc_lock);
292:
1.1 ad 293: if (ld == NULL)
1.11 ad 294: return error;
1.1 ad 295:
296: /*
297: * Fill out the disable struct for the caller.
298: */
299: timespecsub(&ts, &lockstat_stime, &ld->ld_time);
300: ld->ld_size = lockstat_sizeb;
301:
302: cpuno = 0;
303: for (CPU_INFO_FOREACH(cii, ci)) {
1.16 msaitoh 304: if (cpuno >= sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
1.1 ad 305: log(LOG_WARNING, "lockstat: too many CPUs\n");
306: break;
307: }
308: ld->ld_freq[cpuno++] = cpu_frequency(ci);
309: }
310:
1.11 ad 311: return error;
1.1 ad 312: }
313:
314: /*
315: * Allocate buffers for lockstat_start().
316: */
317: int
318: lockstat_alloc(lsenable_t *le)
319: {
320: lsbuf_t *lb;
321: size_t sz;
322:
1.22 christos 323: /* coverity[assert_side_effect] */
1.21 christos 324: KASSERT(!lockstat_dev_enabled);
1.1 ad 325: lockstat_free();
326:
327: sz = sizeof(*lb) * le->le_nbufs;
328:
1.11 ad 329: lb = kmem_zalloc(sz, KM_SLEEP);
1.1 ad 330:
1.22 christos 331: /* coverity[assert_side_effect] */
1.21 christos 332: KASSERT(!lockstat_dev_enabled);
1.1 ad 333: KASSERT(lockstat_baseb == NULL);
334: lockstat_sizeb = sz;
335: lockstat_baseb = lb;
336:
337: return (0);
338: }
339:
340: /*
341: * Free allocated buffers after tracing has stopped.
342: */
343: void
344: lockstat_free(void)
345: {
346:
1.22 christos 347: /* coverity[assert_side_effect] */
1.21 christos 348: KASSERT(!lockstat_dev_enabled);
1.1 ad 349:
350: if (lockstat_baseb != NULL) {
1.11 ad 351: kmem_free(lockstat_baseb, lockstat_sizeb);
1.1 ad 352: lockstat_baseb = NULL;
353: }
354: }
355:
356: /*
357: * Main entry point from lock primatives.
358: */
359: void
360: lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
1.6 ad 361: uint64_t cycles)
1.1 ad 362: {
363: lslist_t *ll;
364: lscpu_t *lc;
365: lsbuf_t *lb;
366: u_int event;
367: int s;
368:
1.20 christos 369: #ifdef KDTRACE_HOOKS
370: uint32_t id;
371: CTASSERT((LS_NPROBES & (LS_NPROBES - 1)) == 0);
372: if ((id = lockstat_probemap[LS_COMPRESS(flags)]) != 0)
373: (*lockstat_probe_func)(id, lock, callsite, flags, count,
374: cycles);
375: #endif
376:
1.21 christos 377: if ((flags & lockstat_dev_enabled) != flags || count == 0)
1.1 ad 378: return;
1.5 ad 379: if (lock < lockstat_lockstart || lock > lockstat_lockend)
1.1 ad 380: return;
381: if (callsite < lockstat_csstart || callsite > lockstat_csend)
382: return;
383:
384: callsite &= lockstat_csmask;
1.10 ad 385: lock &= lockstat_lamask;
1.1 ad 386:
387: /*
388: * Find the table for this lock+callsite pair, and try to locate a
389: * buffer with the same key.
390: */
1.11 ad 391: s = splhigh();
1.1 ad 392: lc = curcpu()->ci_lockstat;
393: ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
394: event = (flags & LB_EVENT_MASK) - 1;
395:
396: LIST_FOREACH(lb, ll, lb_chain.list) {
397: if (lb->lb_lock == lock && lb->lb_callsite == callsite)
398: break;
399: }
400:
401: if (lb != NULL) {
402: /*
403: * We found a record. Move it to the front of the list, as
404: * we're likely to hit it again soon.
405: */
406: if (lb != LIST_FIRST(ll)) {
407: LIST_REMOVE(lb, lb_chain.list);
408: LIST_INSERT_HEAD(ll, lb, lb_chain.list);
409: }
410: lb->lb_counts[event] += count;
1.6 ad 411: lb->lb_times[event] += cycles;
1.1 ad 412: } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
413: /*
414: * Pinch a new buffer and fill it out.
415: */
416: SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
417: LIST_INSERT_HEAD(ll, lb, lb_chain.list);
418: lb->lb_flags = (uint16_t)flags;
419: lb->lb_lock = lock;
420: lb->lb_callsite = callsite;
421: lb->lb_counts[event] = count;
1.6 ad 422: lb->lb_times[event] = cycles;
1.1 ad 423: } else {
424: /*
425: * We didn't find a buffer and there were none free.
426: * lockstat_stop() will notice later on and report the
427: * error.
428: */
429: lc->lc_overflow++;
430: }
431:
432: splx(s);
433: }
434:
435: /*
436: * Accept an open() on /dev/lockstat.
437: */
438: int
1.11 ad 439: lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
1.1 ad 440: {
441:
1.11 ad 442: if (!__cpu_simple_lock_try(&lockstat_lock))
443: return EBUSY;
444: lockstat_lwp = curlwp;
445: return 0;
1.1 ad 446: }
447:
448: /*
449: * Accept the last close() on /dev/lockstat.
450: */
451: int
1.11 ad 452: lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
1.1 ad 453: {
454:
1.11 ad 455: lockstat_lwp = NULL;
456: __cpu_simple_unlock(&lockstat_lock);
457: return 0;
1.1 ad 458: }
459:
460: /*
461: * Handle control operations.
462: */
463: int
1.11 ad 464: lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
1.1 ad 465: {
466: lsenable_t *le;
467: int error;
468:
1.11 ad 469: if (lockstat_lwp != curlwp)
470: return EBUSY;
1.1 ad 471:
472: switch (cmd) {
473: case IOC_LOCKSTAT_GVERSION:
474: *(int *)data = LS_VERSION;
475: error = 0;
476: break;
477:
478: case IOC_LOCKSTAT_ENABLE:
479: le = (lsenable_t *)data;
480:
481: if (!cpu_hascounter()) {
482: error = ENODEV;
483: break;
484: }
1.21 christos 485: if (lockstat_dev_enabled) {
1.1 ad 486: error = EBUSY;
487: break;
488: }
489:
490: /*
491: * Sanitize the arguments passed in and set up filtering.
492: */
493: if (le->le_nbufs == 0)
494: le->le_nbufs = LOCKSTAT_DEFBUFS;
495: else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
496: le->le_nbufs < LOCKSTAT_MINBUFS) {
497: error = EINVAL;
498: break;
499: }
500: if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
501: le->le_csstart = 0;
502: le->le_csend = le->le_csstart - 1;
503: }
1.5 ad 504: if ((le->le_flags & LE_ONE_LOCK) == 0) {
505: le->le_lockstart = 0;
506: le->le_lockend = le->le_lockstart - 1;
507: }
1.1 ad 508: if ((le->le_mask & LB_EVENT_MASK) == 0)
1.11 ad 509: return EINVAL;
1.1 ad 510: if ((le->le_mask & LB_LOCK_MASK) == 0)
1.11 ad 511: return EINVAL;
1.1 ad 512:
513: /*
514: * Start tracing.
515: */
516: if ((error = lockstat_alloc(le)) == 0)
517: lockstat_start(le);
518: break;
519:
520: case IOC_LOCKSTAT_DISABLE:
1.21 christos 521: if (!lockstat_dev_enabled)
1.1 ad 522: error = EINVAL;
523: else
524: error = lockstat_stop((lsdisable_t *)data);
525: break;
526:
527: default:
528: error = ENOTTY;
529: break;
530: }
531:
532: return error;
533: }
534:
535: /*
536: * Copy buffers out to user-space.
537: */
538: int
1.4 christos 539: lockstat_read(dev_t dev, struct uio *uio, int flag)
1.1 ad 540: {
541:
1.21 christos 542: if (curlwp != lockstat_lwp || lockstat_dev_enabled)
1.11 ad 543: return EBUSY;
544: return uiomove(lockstat_baseb, lockstat_sizeb, uio);
1.1 ad 545: }
CVSweb <webmaster@jp.NetBSD.org>