Annotation of src/sys/dev/lockstat.c, Revision 1.15.10.1
1.15.10.1! bouyer 1: /* $NetBSD$ */
1.1 ad 2:
3: /*-
1.9 ad 4: * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
1.1 ad 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Andrew Doran.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: /*
33: * Lock statistics driver, providing kernel support for the lockstat(8)
34: * command.
1.5 ad 35: *
1.11 ad 36: * We use a global lock word (lockstat_lock) to track device opens.
37: * Only one thread can hold the device at a time, providing a global lock.
38: *
1.5 ad 39: * XXX Timings for contention on sleep locks are currently incorrect.
1.1 ad 40: */
41:
42: #include <sys/cdefs.h>
1.15.10.1! bouyer 43: __KERNEL_RCSID(0, "$NetBSD$");
1.1 ad 44:
45: #include <sys/types.h>
46: #include <sys/param.h>
47: #include <sys/proc.h>
48: #include <sys/resourcevar.h>
49: #include <sys/systm.h>
50: #include <sys/kernel.h>
1.11 ad 51: #include <sys/kmem.h>
1.1 ad 52: #include <sys/conf.h>
53: #include <sys/syslog.h>
1.12 ad 54: #include <sys/atomic.h>
1.1 ad 55:
56: #include <dev/lockstat.h>
57:
1.13 ad 58: #include <machine/lock.h>
59:
1.1 ad 60: #ifndef __HAVE_CPU_COUNTER
61: #error CPU counters not available
62: #endif
63:
64: #if LONG_BIT == 64
65: #define LOCKSTAT_HASH_SHIFT 3
66: #elif LONG_BIT == 32
67: #define LOCKSTAT_HASH_SHIFT 2
68: #endif
69:
1.10 ad 70: #define LOCKSTAT_MINBUFS 1000
71: #define LOCKSTAT_DEFBUFS 10000
72: #define LOCKSTAT_MAXBUFS 50000
1.1 ad 73:
1.11 ad 74: #define LOCKSTAT_HASH_SIZE 128
1.1 ad 75: #define LOCKSTAT_HASH_MASK (LOCKSTAT_HASH_SIZE - 1)
76: #define LOCKSTAT_HASH(key) \
77: ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
78:
79: typedef struct lscpu {
80: SLIST_HEAD(, lsbuf) lc_free;
81: u_int lc_overflow;
82: LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
83: } lscpu_t;
84:
85: typedef struct lslist lslist_t;
86:
87: void lockstatattach(int);
88: void lockstat_start(lsenable_t *);
89: int lockstat_alloc(lsenable_t *);
90: void lockstat_init_tables(lsenable_t *);
91: int lockstat_stop(lsdisable_t *);
92: void lockstat_free(void);
93:
94: dev_type_open(lockstat_open);
95: dev_type_close(lockstat_close);
96: dev_type_read(lockstat_read);
97: dev_type_ioctl(lockstat_ioctl);
98:
99: volatile u_int lockstat_enabled;
100: uintptr_t lockstat_csstart;
101: uintptr_t lockstat_csend;
102: uintptr_t lockstat_csmask;
1.10 ad 103: uintptr_t lockstat_lamask;
1.5 ad 104: uintptr_t lockstat_lockstart;
105: uintptr_t lockstat_lockend;
1.11 ad 106: __cpu_simple_lock_t lockstat_lock;
107: lwp_t *lockstat_lwp;
1.1 ad 108: lsbuf_t *lockstat_baseb;
109: size_t lockstat_sizeb;
110: int lockstat_busy;
111: struct timespec lockstat_stime;
112:
113: const struct cdevsw lockstat_cdevsw = {
114: lockstat_open, lockstat_close, lockstat_read, nowrite, lockstat_ioctl,
1.11 ad 115: nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE
1.1 ad 116: };
117:
118: /*
119: * Called when the pseudo-driver is attached.
120: */
121: void
122: lockstatattach(int nunits)
123: {
124:
125: (void)nunits;
126:
1.11 ad 127: __cpu_simple_lock_init(&lockstat_lock);
1.1 ad 128: }
129:
130: /*
131: * Prepare the per-CPU tables for use, or clear down tables when tracing is
132: * stopped.
133: */
134: void
135: lockstat_init_tables(lsenable_t *le)
136: {
1.7 ad 137: int i, per, slop, cpuno;
1.1 ad 138: CPU_INFO_ITERATOR cii;
139: struct cpu_info *ci;
140: lscpu_t *lc;
141: lsbuf_t *lb;
142:
143: KASSERT(!lockstat_enabled);
144:
145: for (CPU_INFO_FOREACH(cii, ci)) {
146: if (ci->ci_lockstat != NULL) {
1.11 ad 147: kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
1.1 ad 148: ci->ci_lockstat = NULL;
149: }
150: }
151:
152: if (le == NULL)
153: return;
154:
155: lb = lockstat_baseb;
156: per = le->le_nbufs / ncpu;
157: slop = le->le_nbufs - (per * ncpu);
158: cpuno = 0;
159: for (CPU_INFO_FOREACH(cii, ci)) {
1.11 ad 160: lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
1.1 ad 161: lc->lc_overflow = 0;
162: ci->ci_lockstat = lc;
163:
164: SLIST_INIT(&lc->lc_free);
165: for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
166: LIST_INIT(&lc->lc_hash[i]);
167:
168: for (i = per; i != 0; i--, lb++) {
169: lb->lb_cpu = (uint16_t)cpuno;
170: SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
171: }
172: if (--slop > 0) {
173: lb->lb_cpu = (uint16_t)cpuno;
174: SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
175: lb++;
176: }
177: cpuno++;
178: }
179: }
180:
181: /*
182: * Start collecting lock statistics.
183: */
184: void
185: lockstat_start(lsenable_t *le)
186: {
187:
188: KASSERT(!lockstat_enabled);
189:
190: lockstat_init_tables(le);
191:
192: if ((le->le_flags & LE_CALLSITE) != 0)
193: lockstat_csmask = (uintptr_t)-1LL;
194: else
195: lockstat_csmask = 0;
196:
1.10 ad 197: if ((le->le_flags & LE_LOCK) != 0)
198: lockstat_lamask = (uintptr_t)-1LL;
199: else
200: lockstat_lamask = 0;
201:
1.1 ad 202: lockstat_csstart = le->le_csstart;
203: lockstat_csend = le->le_csend;
1.5 ad 204: lockstat_lockstart = le->le_lockstart;
1.6 ad 205: lockstat_lockstart = le->le_lockstart;
1.5 ad 206: lockstat_lockend = le->le_lockend;
1.12 ad 207: membar_sync();
1.1 ad 208: getnanotime(&lockstat_stime);
209: lockstat_enabled = le->le_mask;
1.12 ad 210: membar_producer();
1.1 ad 211: }
212:
213: /*
214: * Stop collecting lock statistics.
215: */
216: int
217: lockstat_stop(lsdisable_t *ld)
218: {
219: CPU_INFO_ITERATOR cii;
220: struct cpu_info *ci;
221: u_int cpuno, overflow;
222: struct timespec ts;
223: int error;
1.14 ad 224: lwp_t *l;
1.1 ad 225:
226: KASSERT(lockstat_enabled);
227:
228: /*
229: * Set enabled false, force a write barrier, and wait for other CPUs
1.5 ad 230: * to exit lockstat_event().
1.1 ad 231: */
232: lockstat_enabled = 0;
1.12 ad 233: membar_producer();
1.1 ad 234: getnanotime(&ts);
235: tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10));
236:
237: /*
238: * Did we run out of buffers while tracing?
239: */
240: overflow = 0;
241: for (CPU_INFO_FOREACH(cii, ci))
242: overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
243:
244: if (overflow != 0) {
245: error = EOVERFLOW;
246: log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
247: overflow);
248: } else
249: error = 0;
250:
251: lockstat_init_tables(NULL);
252:
1.14 ad 253: /* Run through all LWPs and clear the slate for the next run. */
254: mutex_enter(proc_lock);
255: LIST_FOREACH(l, &alllwp, l_list) {
256: l->l_pfailaddr = 0;
257: l->l_pfailtime = 0;
258: l->l_pfaillock = 0;
259: }
260: mutex_exit(proc_lock);
261:
1.1 ad 262: if (ld == NULL)
1.11 ad 263: return error;
1.1 ad 264:
265: /*
266: * Fill out the disable struct for the caller.
267: */
268: timespecsub(&ts, &lockstat_stime, &ld->ld_time);
269: ld->ld_size = lockstat_sizeb;
270:
271: cpuno = 0;
272: for (CPU_INFO_FOREACH(cii, ci)) {
1.15.10.1! bouyer 273: if (cpuno >= sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
1.1 ad 274: log(LOG_WARNING, "lockstat: too many CPUs\n");
275: break;
276: }
277: ld->ld_freq[cpuno++] = cpu_frequency(ci);
278: }
279:
1.11 ad 280: return error;
1.1 ad 281: }
282:
283: /*
284: * Allocate buffers for lockstat_start().
285: */
286: int
287: lockstat_alloc(lsenable_t *le)
288: {
289: lsbuf_t *lb;
290: size_t sz;
291:
292: KASSERT(!lockstat_enabled);
293: lockstat_free();
294:
295: sz = sizeof(*lb) * le->le_nbufs;
296:
1.11 ad 297: lb = kmem_zalloc(sz, KM_SLEEP);
1.1 ad 298: if (lb == NULL)
299: return (ENOMEM);
300:
301: KASSERT(!lockstat_enabled);
302: KASSERT(lockstat_baseb == NULL);
303: lockstat_sizeb = sz;
304: lockstat_baseb = lb;
305:
306: return (0);
307: }
308:
309: /*
310: * Free allocated buffers after tracing has stopped.
311: */
312: void
313: lockstat_free(void)
314: {
315:
316: KASSERT(!lockstat_enabled);
317:
318: if (lockstat_baseb != NULL) {
1.11 ad 319: kmem_free(lockstat_baseb, lockstat_sizeb);
1.1 ad 320: lockstat_baseb = NULL;
321: }
322: }
323:
324: /*
325: * Main entry point from lock primatives.
326: */
327: void
328: lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
1.6 ad 329: uint64_t cycles)
1.1 ad 330: {
331: lslist_t *ll;
332: lscpu_t *lc;
333: lsbuf_t *lb;
334: u_int event;
335: int s;
336:
337: if ((flags & lockstat_enabled) != flags || count == 0)
338: return;
1.5 ad 339: if (lock < lockstat_lockstart || lock > lockstat_lockend)
1.1 ad 340: return;
341: if (callsite < lockstat_csstart || callsite > lockstat_csend)
342: return;
343:
344: callsite &= lockstat_csmask;
1.10 ad 345: lock &= lockstat_lamask;
1.1 ad 346:
347: /*
348: * Find the table for this lock+callsite pair, and try to locate a
349: * buffer with the same key.
350: */
1.11 ad 351: s = splhigh();
1.1 ad 352: lc = curcpu()->ci_lockstat;
353: ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
354: event = (flags & LB_EVENT_MASK) - 1;
355:
356: LIST_FOREACH(lb, ll, lb_chain.list) {
357: if (lb->lb_lock == lock && lb->lb_callsite == callsite)
358: break;
359: }
360:
361: if (lb != NULL) {
362: /*
363: * We found a record. Move it to the front of the list, as
364: * we're likely to hit it again soon.
365: */
366: if (lb != LIST_FIRST(ll)) {
367: LIST_REMOVE(lb, lb_chain.list);
368: LIST_INSERT_HEAD(ll, lb, lb_chain.list);
369: }
370: lb->lb_counts[event] += count;
1.6 ad 371: lb->lb_times[event] += cycles;
1.1 ad 372: } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
373: /*
374: * Pinch a new buffer and fill it out.
375: */
376: SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
377: LIST_INSERT_HEAD(ll, lb, lb_chain.list);
378: lb->lb_flags = (uint16_t)flags;
379: lb->lb_lock = lock;
380: lb->lb_callsite = callsite;
381: lb->lb_counts[event] = count;
1.6 ad 382: lb->lb_times[event] = cycles;
1.1 ad 383: } else {
384: /*
385: * We didn't find a buffer and there were none free.
386: * lockstat_stop() will notice later on and report the
387: * error.
388: */
389: lc->lc_overflow++;
390: }
391:
392: splx(s);
393: }
394:
395: /*
396: * Accept an open() on /dev/lockstat.
397: */
398: int
1.11 ad 399: lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
1.1 ad 400: {
401:
1.11 ad 402: if (!__cpu_simple_lock_try(&lockstat_lock))
403: return EBUSY;
404: lockstat_lwp = curlwp;
405: return 0;
1.1 ad 406: }
407:
408: /*
409: * Accept the last close() on /dev/lockstat.
410: */
411: int
1.11 ad 412: lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
1.1 ad 413: {
414:
1.11 ad 415: lockstat_lwp = NULL;
416: __cpu_simple_unlock(&lockstat_lock);
417: return 0;
1.1 ad 418: }
419:
420: /*
421: * Handle control operations.
422: */
423: int
1.11 ad 424: lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
1.1 ad 425: {
426: lsenable_t *le;
427: int error;
428:
1.11 ad 429: if (lockstat_lwp != curlwp)
430: return EBUSY;
1.1 ad 431:
432: switch (cmd) {
433: case IOC_LOCKSTAT_GVERSION:
434: *(int *)data = LS_VERSION;
435: error = 0;
436: break;
437:
438: case IOC_LOCKSTAT_ENABLE:
439: le = (lsenable_t *)data;
440:
441: if (!cpu_hascounter()) {
442: error = ENODEV;
443: break;
444: }
445: if (lockstat_enabled) {
446: error = EBUSY;
447: break;
448: }
449:
450: /*
451: * Sanitize the arguments passed in and set up filtering.
452: */
453: if (le->le_nbufs == 0)
454: le->le_nbufs = LOCKSTAT_DEFBUFS;
455: else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
456: le->le_nbufs < LOCKSTAT_MINBUFS) {
457: error = EINVAL;
458: break;
459: }
460: if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
461: le->le_csstart = 0;
462: le->le_csend = le->le_csstart - 1;
463: }
1.5 ad 464: if ((le->le_flags & LE_ONE_LOCK) == 0) {
465: le->le_lockstart = 0;
466: le->le_lockend = le->le_lockstart - 1;
467: }
1.1 ad 468: if ((le->le_mask & LB_EVENT_MASK) == 0)
1.11 ad 469: return EINVAL;
1.1 ad 470: if ((le->le_mask & LB_LOCK_MASK) == 0)
1.11 ad 471: return EINVAL;
1.1 ad 472:
473: /*
474: * Start tracing.
475: */
476: if ((error = lockstat_alloc(le)) == 0)
477: lockstat_start(le);
478: break;
479:
480: case IOC_LOCKSTAT_DISABLE:
481: if (!lockstat_enabled)
482: error = EINVAL;
483: else
484: error = lockstat_stop((lsdisable_t *)data);
485: break;
486:
487: default:
488: error = ENOTTY;
489: break;
490: }
491:
492: return error;
493: }
494:
495: /*
496: * Copy buffers out to user-space.
497: */
498: int
1.4 christos 499: lockstat_read(dev_t dev, struct uio *uio, int flag)
1.1 ad 500: {
501:
1.11 ad 502: if (curlwp != lockstat_lwp || lockstat_enabled)
503: return EBUSY;
504: return uiomove(lockstat_baseb, lockstat_sizeb, uio);
1.1 ad 505: }
CVSweb <webmaster@jp.NetBSD.org>