[BACK]Return to lockstat.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev

Annotation of src/sys/dev/lockstat.c, Revision 1.15.10.1

1.15.10.1! bouyer      1: /*     $NetBSD$        */
1.1       ad          2:
                      3: /*-
1.9       ad          4:  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
1.1       ad          5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Andrew Doran.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     20:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     21:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     22:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     23:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     24:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     25:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     26:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     27:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     28:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     29:  * POSSIBILITY OF SUCH DAMAGE.
                     30:  */
                     31:
                     32: /*
                     33:  * Lock statistics driver, providing kernel support for the lockstat(8)
                     34:  * command.
1.5       ad         35:  *
1.11      ad         36:  * We use a global lock word (lockstat_lock) to track device opens.
                     37:  * Only one thread can hold the device at a time, providing a global lock.
                     38:  *
1.5       ad         39:  * XXX Timings for contention on sleep locks are currently incorrect.
1.1       ad         40:  */
                     41:
                     42: #include <sys/cdefs.h>
1.15.10.1! bouyer     43: __KERNEL_RCSID(0, "$NetBSD$");
1.1       ad         44:
                     45: #include <sys/types.h>
                     46: #include <sys/param.h>
                     47: #include <sys/proc.h>
                     48: #include <sys/resourcevar.h>
                     49: #include <sys/systm.h>
                     50: #include <sys/kernel.h>
1.11      ad         51: #include <sys/kmem.h>
1.1       ad         52: #include <sys/conf.h>
                     53: #include <sys/syslog.h>
1.12      ad         54: #include <sys/atomic.h>
1.1       ad         55:
                     56: #include <dev/lockstat.h>
                     57:
1.13      ad         58: #include <machine/lock.h>
                     59:
1.1       ad         60: #ifndef __HAVE_CPU_COUNTER
                     61: #error CPU counters not available
                     62: #endif
                     63:
                     64: #if LONG_BIT == 64
                     65: #define        LOCKSTAT_HASH_SHIFT     3
                     66: #elif LONG_BIT == 32
                     67: #define        LOCKSTAT_HASH_SHIFT     2
                     68: #endif
                     69:
1.10      ad         70: #define        LOCKSTAT_MINBUFS        1000
                     71: #define        LOCKSTAT_DEFBUFS        10000
                     72: #define        LOCKSTAT_MAXBUFS        50000
1.1       ad         73:
1.11      ad         74: #define        LOCKSTAT_HASH_SIZE      128
1.1       ad         75: #define        LOCKSTAT_HASH_MASK      (LOCKSTAT_HASH_SIZE - 1)
                     76: #define        LOCKSTAT_HASH(key)      \
                     77:        ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
                     78:
                     79: typedef struct lscpu {
                     80:        SLIST_HEAD(, lsbuf)     lc_free;
                     81:        u_int                   lc_overflow;
                     82:        LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
                     83: } lscpu_t;
                     84:
                     85: typedef struct lslist lslist_t;
                     86:
                     87: void   lockstatattach(int);
                     88: void   lockstat_start(lsenable_t *);
                     89: int    lockstat_alloc(lsenable_t *);
                     90: void   lockstat_init_tables(lsenable_t *);
                     91: int    lockstat_stop(lsdisable_t *);
                     92: void   lockstat_free(void);
                     93:
                     94: dev_type_open(lockstat_open);
                     95: dev_type_close(lockstat_close);
                     96: dev_type_read(lockstat_read);
                     97: dev_type_ioctl(lockstat_ioctl);
                     98:
                     99: volatile u_int lockstat_enabled;
                    100: uintptr_t      lockstat_csstart;
                    101: uintptr_t      lockstat_csend;
                    102: uintptr_t      lockstat_csmask;
1.10      ad        103: uintptr_t      lockstat_lamask;
1.5       ad        104: uintptr_t      lockstat_lockstart;
                    105: uintptr_t      lockstat_lockend;
1.11      ad        106: __cpu_simple_lock_t lockstat_lock;
                    107: lwp_t          *lockstat_lwp;
1.1       ad        108: lsbuf_t                *lockstat_baseb;
                    109: size_t         lockstat_sizeb;
                    110: int            lockstat_busy;
                    111: struct timespec        lockstat_stime;
                    112:
                    113: const struct cdevsw lockstat_cdevsw = {
                    114:        lockstat_open, lockstat_close, lockstat_read, nowrite, lockstat_ioctl,
1.11      ad        115:        nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE
1.1       ad        116: };
                    117:
                    118: /*
                    119:  * Called when the pseudo-driver is attached.
                    120:  */
                    121: void
                    122: lockstatattach(int nunits)
                    123: {
                    124:
                    125:        (void)nunits;
                    126:
1.11      ad        127:        __cpu_simple_lock_init(&lockstat_lock);
1.1       ad        128: }
                    129:
                    130: /*
                    131:  * Prepare the per-CPU tables for use, or clear down tables when tracing is
                    132:  * stopped.
                    133:  */
                    134: void
                    135: lockstat_init_tables(lsenable_t *le)
                    136: {
1.7       ad        137:        int i, per, slop, cpuno;
1.1       ad        138:        CPU_INFO_ITERATOR cii;
                    139:        struct cpu_info *ci;
                    140:        lscpu_t *lc;
                    141:        lsbuf_t *lb;
                    142:
                    143:        KASSERT(!lockstat_enabled);
                    144:
                    145:        for (CPU_INFO_FOREACH(cii, ci)) {
                    146:                if (ci->ci_lockstat != NULL) {
1.11      ad        147:                        kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
1.1       ad        148:                        ci->ci_lockstat = NULL;
                    149:                }
                    150:        }
                    151:
                    152:        if (le == NULL)
                    153:                return;
                    154:
                    155:        lb = lockstat_baseb;
                    156:        per = le->le_nbufs / ncpu;
                    157:        slop = le->le_nbufs - (per * ncpu);
                    158:        cpuno = 0;
                    159:        for (CPU_INFO_FOREACH(cii, ci)) {
1.11      ad        160:                lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
1.1       ad        161:                lc->lc_overflow = 0;
                    162:                ci->ci_lockstat = lc;
                    163:
                    164:                SLIST_INIT(&lc->lc_free);
                    165:                for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
                    166:                        LIST_INIT(&lc->lc_hash[i]);
                    167:
                    168:                for (i = per; i != 0; i--, lb++) {
                    169:                        lb->lb_cpu = (uint16_t)cpuno;
                    170:                        SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
                    171:                }
                    172:                if (--slop > 0) {
                    173:                        lb->lb_cpu = (uint16_t)cpuno;
                    174:                        SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
                    175:                        lb++;
                    176:                }
                    177:                cpuno++;
                    178:        }
                    179: }
                    180:
                    181: /*
                    182:  * Start collecting lock statistics.
                    183:  */
                    184: void
                    185: lockstat_start(lsenable_t *le)
                    186: {
                    187:
                    188:        KASSERT(!lockstat_enabled);
                    189:
                    190:        lockstat_init_tables(le);
                    191:
                    192:        if ((le->le_flags & LE_CALLSITE) != 0)
                    193:                lockstat_csmask = (uintptr_t)-1LL;
                    194:        else
                    195:                lockstat_csmask = 0;
                    196:
1.10      ad        197:        if ((le->le_flags & LE_LOCK) != 0)
                    198:                lockstat_lamask = (uintptr_t)-1LL;
                    199:        else
                    200:                lockstat_lamask = 0;
                    201:
1.1       ad        202:        lockstat_csstart = le->le_csstart;
                    203:        lockstat_csend = le->le_csend;
1.5       ad        204:        lockstat_lockstart = le->le_lockstart;
1.6       ad        205:        lockstat_lockstart = le->le_lockstart;
1.5       ad        206:        lockstat_lockend = le->le_lockend;
1.12      ad        207:        membar_sync();
1.1       ad        208:        getnanotime(&lockstat_stime);
                    209:        lockstat_enabled = le->le_mask;
1.12      ad        210:        membar_producer();
1.1       ad        211: }
                    212:
                    213: /*
                    214:  * Stop collecting lock statistics.
                    215:  */
                    216: int
                    217: lockstat_stop(lsdisable_t *ld)
                    218: {
                    219:        CPU_INFO_ITERATOR cii;
                    220:        struct cpu_info *ci;
                    221:        u_int cpuno, overflow;
                    222:        struct timespec ts;
                    223:        int error;
1.14      ad        224:        lwp_t *l;
1.1       ad        225:
                    226:        KASSERT(lockstat_enabled);
                    227:
                    228:        /*
                    229:         * Set enabled false, force a write barrier, and wait for other CPUs
1.5       ad        230:         * to exit lockstat_event().
1.1       ad        231:         */
                    232:        lockstat_enabled = 0;
1.12      ad        233:        membar_producer();
1.1       ad        234:        getnanotime(&ts);
                    235:        tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10));
                    236:
                    237:        /*
                    238:         * Did we run out of buffers while tracing?
                    239:         */
                    240:        overflow = 0;
                    241:        for (CPU_INFO_FOREACH(cii, ci))
                    242:                overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
                    243:
                    244:        if (overflow != 0) {
                    245:                error = EOVERFLOW;
                    246:                log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
                    247:                    overflow);
                    248:        } else
                    249:                error = 0;
                    250:
                    251:        lockstat_init_tables(NULL);
                    252:
1.14      ad        253:        /* Run through all LWPs and clear the slate for the next run. */
                    254:        mutex_enter(proc_lock);
                    255:        LIST_FOREACH(l, &alllwp, l_list) {
                    256:                l->l_pfailaddr = 0;
                    257:                l->l_pfailtime = 0;
                    258:                l->l_pfaillock = 0;
                    259:        }
                    260:        mutex_exit(proc_lock);
                    261:
1.1       ad        262:        if (ld == NULL)
1.11      ad        263:                return error;
1.1       ad        264:
                    265:        /*
                    266:         * Fill out the disable struct for the caller.
                    267:         */
                    268:        timespecsub(&ts, &lockstat_stime, &ld->ld_time);
                    269:        ld->ld_size = lockstat_sizeb;
                    270:
                    271:        cpuno = 0;
                    272:        for (CPU_INFO_FOREACH(cii, ci)) {
1.15.10.1! bouyer    273:                if (cpuno >= sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
1.1       ad        274:                        log(LOG_WARNING, "lockstat: too many CPUs\n");
                    275:                        break;
                    276:                }
                    277:                ld->ld_freq[cpuno++] = cpu_frequency(ci);
                    278:        }
                    279:
1.11      ad        280:        return error;
1.1       ad        281: }
                    282:
                    283: /*
                    284:  * Allocate buffers for lockstat_start().
                    285:  */
                    286: int
                    287: lockstat_alloc(lsenable_t *le)
                    288: {
                    289:        lsbuf_t *lb;
                    290:        size_t sz;
                    291:
                    292:        KASSERT(!lockstat_enabled);
                    293:        lockstat_free();
                    294:
                    295:        sz = sizeof(*lb) * le->le_nbufs;
                    296:
1.11      ad        297:        lb = kmem_zalloc(sz, KM_SLEEP);
1.1       ad        298:        if (lb == NULL)
                    299:                return (ENOMEM);
                    300:
                    301:        KASSERT(!lockstat_enabled);
                    302:        KASSERT(lockstat_baseb == NULL);
                    303:        lockstat_sizeb = sz;
                    304:        lockstat_baseb = lb;
                    305:
                    306:        return (0);
                    307: }
                    308:
                    309: /*
                    310:  * Free allocated buffers after tracing has stopped.
                    311:  */
                    312: void
                    313: lockstat_free(void)
                    314: {
                    315:
                    316:        KASSERT(!lockstat_enabled);
                    317:
                    318:        if (lockstat_baseb != NULL) {
1.11      ad        319:                kmem_free(lockstat_baseb, lockstat_sizeb);
1.1       ad        320:                lockstat_baseb = NULL;
                    321:        }
                    322: }
                    323:
                    324: /*
                    325:  * Main entry point from lock primatives.
                    326:  */
                    327: void
                    328: lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
1.6       ad        329:               uint64_t cycles)
1.1       ad        330: {
                    331:        lslist_t *ll;
                    332:        lscpu_t *lc;
                    333:        lsbuf_t *lb;
                    334:        u_int event;
                    335:        int s;
                    336:
                    337:        if ((flags & lockstat_enabled) != flags || count == 0)
                    338:                return;
1.5       ad        339:        if (lock < lockstat_lockstart || lock > lockstat_lockend)
1.1       ad        340:                return;
                    341:        if (callsite < lockstat_csstart || callsite > lockstat_csend)
                    342:                return;
                    343:
                    344:        callsite &= lockstat_csmask;
1.10      ad        345:        lock &= lockstat_lamask;
1.1       ad        346:
                    347:        /*
                    348:         * Find the table for this lock+callsite pair, and try to locate a
                    349:         * buffer with the same key.
                    350:         */
1.11      ad        351:        s = splhigh();
1.1       ad        352:        lc = curcpu()->ci_lockstat;
                    353:        ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
                    354:        event = (flags & LB_EVENT_MASK) - 1;
                    355:
                    356:        LIST_FOREACH(lb, ll, lb_chain.list) {
                    357:                if (lb->lb_lock == lock && lb->lb_callsite == callsite)
                    358:                        break;
                    359:        }
                    360:
                    361:        if (lb != NULL) {
                    362:                /*
                    363:                 * We found a record.  Move it to the front of the list, as
                    364:                 * we're likely to hit it again soon.
                    365:                 */
                    366:                if (lb != LIST_FIRST(ll)) {
                    367:                        LIST_REMOVE(lb, lb_chain.list);
                    368:                        LIST_INSERT_HEAD(ll, lb, lb_chain.list);
                    369:                }
                    370:                lb->lb_counts[event] += count;
1.6       ad        371:                lb->lb_times[event] += cycles;
1.1       ad        372:        } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
                    373:                /*
                    374:                 * Pinch a new buffer and fill it out.
                    375:                 */
                    376:                SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
                    377:                LIST_INSERT_HEAD(ll, lb, lb_chain.list);
                    378:                lb->lb_flags = (uint16_t)flags;
                    379:                lb->lb_lock = lock;
                    380:                lb->lb_callsite = callsite;
                    381:                lb->lb_counts[event] = count;
1.6       ad        382:                lb->lb_times[event] = cycles;
1.1       ad        383:        } else {
                    384:                /*
                    385:                 * We didn't find a buffer and there were none free.
                    386:                 * lockstat_stop() will notice later on and report the
                    387:                 * error.
                    388:                 */
                    389:                 lc->lc_overflow++;
                    390:        }
                    391:
                    392:        splx(s);
                    393: }
                    394:
                    395: /*
                    396:  * Accept an open() on /dev/lockstat.
                    397:  */
                    398: int
1.11      ad        399: lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
1.1       ad        400: {
                    401:
1.11      ad        402:        if (!__cpu_simple_lock_try(&lockstat_lock))
                    403:                return EBUSY;
                    404:        lockstat_lwp = curlwp;
                    405:        return 0;
1.1       ad        406: }
                    407:
                    408: /*
                    409:  * Accept the last close() on /dev/lockstat.
                    410:  */
                    411: int
1.11      ad        412: lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
1.1       ad        413: {
                    414:
1.11      ad        415:        lockstat_lwp = NULL;
                    416:        __cpu_simple_unlock(&lockstat_lock);
                    417:        return 0;
1.1       ad        418: }
                    419:
                    420: /*
                    421:  * Handle control operations.
                    422:  */
                    423: int
1.11      ad        424: lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
1.1       ad        425: {
                    426:        lsenable_t *le;
                    427:        int error;
                    428:
1.11      ad        429:        if (lockstat_lwp != curlwp)
                    430:                return EBUSY;
1.1       ad        431:
                    432:        switch (cmd) {
                    433:        case IOC_LOCKSTAT_GVERSION:
                    434:                *(int *)data = LS_VERSION;
                    435:                error = 0;
                    436:                break;
                    437:
                    438:        case IOC_LOCKSTAT_ENABLE:
                    439:                le = (lsenable_t *)data;
                    440:
                    441:                if (!cpu_hascounter()) {
                    442:                        error = ENODEV;
                    443:                        break;
                    444:                }
                    445:                if (lockstat_enabled) {
                    446:                        error = EBUSY;
                    447:                        break;
                    448:                }
                    449:
                    450:                /*
                    451:                 * Sanitize the arguments passed in and set up filtering.
                    452:                 */
                    453:                if (le->le_nbufs == 0)
                    454:                        le->le_nbufs = LOCKSTAT_DEFBUFS;
                    455:                else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
                    456:                    le->le_nbufs < LOCKSTAT_MINBUFS) {
                    457:                        error = EINVAL;
                    458:                        break;
                    459:                }
                    460:                if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
                    461:                        le->le_csstart = 0;
                    462:                        le->le_csend = le->le_csstart - 1;
                    463:                }
1.5       ad        464:                if ((le->le_flags & LE_ONE_LOCK) == 0) {
                    465:                        le->le_lockstart = 0;
                    466:                        le->le_lockend = le->le_lockstart - 1;
                    467:                }
1.1       ad        468:                if ((le->le_mask & LB_EVENT_MASK) == 0)
1.11      ad        469:                        return EINVAL;
1.1       ad        470:                if ((le->le_mask & LB_LOCK_MASK) == 0)
1.11      ad        471:                        return EINVAL;
1.1       ad        472:
                    473:                /*
                    474:                 * Start tracing.
                    475:                 */
                    476:                if ((error = lockstat_alloc(le)) == 0)
                    477:                        lockstat_start(le);
                    478:                break;
                    479:
                    480:        case IOC_LOCKSTAT_DISABLE:
                    481:                if (!lockstat_enabled)
                    482:                        error = EINVAL;
                    483:                else
                    484:                        error = lockstat_stop((lsdisable_t *)data);
                    485:                break;
                    486:
                    487:        default:
                    488:                error = ENOTTY;
                    489:                break;
                    490:        }
                    491:
                    492:        return error;
                    493: }
                    494:
                    495: /*
                    496:  * Copy buffers out to user-space.
                    497:  */
                    498: int
1.4       christos  499: lockstat_read(dev_t dev, struct uio *uio, int flag)
1.1       ad        500: {
                    501:
1.11      ad        502:        if (curlwp != lockstat_lwp || lockstat_enabled)
                    503:                return EBUSY;
                    504:        return uiomove(lockstat_baseb, lockstat_sizeb, uio);
1.1       ad        505: }

CVSweb <webmaster@jp.NetBSD.org>