[BACK]Return to subr_percpu.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/subr_percpu.c, Revision 1.16

1.16    ! para        1: /*     $NetBSD: subr_percpu.c,v 1.15 2011/09/02 22:25:08 dyoung Exp $  */
1.1       yamt        2:
                      3: /*-
                      4:  * Copyright (c)2007,2008 YAMAMOTO Takashi,
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     17:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     18:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     19:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     20:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     21:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     22:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     23:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     24:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     25:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     26:  * SUCH DAMAGE.
                     27:  */
                     28:
                     29: /*
                     30:  * per-cpu storage.
                     31:  */
                     32:
                     33: #include <sys/cdefs.h>
1.16    ! para       34: __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.15 2011/09/02 22:25:08 dyoung Exp $");
1.1       yamt       35:
                     36: #include <sys/param.h>
                     37: #include <sys/cpu.h>
                     38: #include <sys/kmem.h>
                     39: #include <sys/kernel.h>
                     40: #include <sys/mutex.h>
                     41: #include <sys/percpu.h>
                     42: #include <sys/rwlock.h>
                     43: #include <sys/vmem.h>
                     44: #include <sys/xcall.h>
                     45:
                     46: #define        PERCPU_QUANTUM_SIZE     (ALIGNBYTES + 1)
                     47: #define        PERCPU_QCACHE_MAX       0
                     48: #define        PERCPU_IMPORT_SIZE      2048
                     49:
1.8       yamt       50: #if defined(DIAGNOSTIC)
                     51: #define        MAGIC   0x50435055      /* "PCPU" */
                     52: #define        percpu_encrypt(pc)      ((pc) ^ MAGIC)
                     53: #define        percpu_decrypt(pc)      ((pc) ^ MAGIC)
                     54: #else /* defined(DIAGNOSTIC) */
                     55: #define        percpu_encrypt(pc)      (pc)
                     56: #define        percpu_decrypt(pc)      (pc)
                     57: #endif /* defined(DIAGNOSTIC) */
                     58:
1.13      rmind      59: static krwlock_t       percpu_swap_lock        __cacheline_aligned;
                     60: static kmutex_t                percpu_allocation_lock  __cacheline_aligned;
                     61: static vmem_t *                percpu_offset_arena     __cacheline_aligned;
                     62: static unsigned int    percpu_nextoff          __cacheline_aligned;
1.9       ad         63:
1.1       yamt       64: static percpu_cpu_t *
                     65: cpu_percpu(struct cpu_info *ci)
                     66: {
                     67:
                     68:        return &ci->ci_data.cpu_percpu;
                     69: }
                     70:
                     71: static unsigned int
                     72: percpu_offset(percpu_t *pc)
                     73: {
1.8       yamt       74:        const unsigned int off = percpu_decrypt((uintptr_t)pc);
1.1       yamt       75:
1.8       yamt       76:        KASSERT(off < percpu_nextoff);
                     77:        return off;
1.1       yamt       78: }
                     79:
                     80: /*
                     81:  * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge
                     82:  */
                     83:
                     84: static void
                     85: percpu_cpu_swap(void *p1, void *p2)
                     86: {
                     87:        struct cpu_info * const ci = p1;
                     88:        percpu_cpu_t * const newpcc = p2;
                     89:        percpu_cpu_t * const pcc = cpu_percpu(ci);
                     90:
1.12      martin     91:        KASSERT(ci == curcpu() || !mp_online);
1.11      matt       92:
1.1       yamt       93:        /*
                     94:         * swap *pcc and *newpcc unless anyone has beaten us.
                     95:         */
                     96:        rw_enter(&percpu_swap_lock, RW_WRITER);
                     97:        if (newpcc->pcc_size > pcc->pcc_size) {
                     98:                percpu_cpu_t tmp;
                     99:                int s;
                    100:
                    101:                tmp = *pcc;
                    102:
                    103:                /*
                    104:                 * block interrupts so that we don't lose their modifications.
                    105:                 */
                    106:
                    107:                s = splhigh();
                    108:
                    109:                /*
                    110:                 * copy data to new storage.
                    111:                 */
                    112:
                    113:                memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size);
                    114:
                    115:                /*
                    116:                 * this assignment needs to be atomic for percpu_getptr_remote.
                    117:                 */
                    118:
                    119:                pcc->pcc_data = newpcc->pcc_data;
                    120:
                    121:                splx(s);
                    122:
                    123:                pcc->pcc_size = newpcc->pcc_size;
                    124:                *newpcc = tmp;
                    125:        }
                    126:        rw_exit(&percpu_swap_lock);
                    127: }
                    128:
                    129: /*
                    130:  * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space
                    131:  */
                    132:
                    133: static void
                    134: percpu_cpu_enlarge(size_t size)
                    135: {
                    136:        CPU_INFO_ITERATOR cii;
                    137:        struct cpu_info *ci;
                    138:
                    139:        for (CPU_INFO_FOREACH(cii, ci)) {
                    140:                percpu_cpu_t pcc;
                    141:
                    142:                pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */
                    143:                pcc.pcc_size = size;
                    144:                if (!mp_online) {
                    145:                        percpu_cpu_swap(ci, &pcc);
                    146:                } else {
                    147:                        uint64_t where;
                    148:
                    149:                        where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci);
                    150:                        xc_wait(where);
                    151:                }
                    152:                KASSERT(pcc.pcc_size < size);
                    153:                if (pcc.pcc_data != NULL) {
                    154:                        kmem_free(pcc.pcc_data, pcc.pcc_size);
                    155:                }
                    156:        }
                    157: }
                    158:
                    159: /*
                    160:  * percpu_backend_alloc: vmem import callback for percpu_offset_arena
                    161:  */
                    162:
1.15      dyoung    163: static int
1.16    ! para      164: percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize,
1.15      dyoung    165:     vm_flag_t vmflags, vmem_addr_t *addrp)
1.1       yamt      166: {
                    167:        unsigned int offset;
                    168:        unsigned int nextoff;
                    169:
1.3       yamt      170:        ASSERT_SLEEPABLE();
1.1       yamt      171:        KASSERT(dummy == NULL);
                    172:
                    173:        if ((vmflags & VM_NOSLEEP) != 0)
1.15      dyoung    174:                return ENOMEM;
1.1       yamt      175:
                    176:        size = roundup(size, PERCPU_IMPORT_SIZE);
                    177:        mutex_enter(&percpu_allocation_lock);
                    178:        offset = percpu_nextoff;
                    179:        percpu_nextoff = nextoff = percpu_nextoff + size;
                    180:        mutex_exit(&percpu_allocation_lock);
                    181:
                    182:        percpu_cpu_enlarge(nextoff);
                    183:
                    184:        *resultsize = size;
1.15      dyoung    185:        *addrp = (vmem_addr_t)offset;
                    186:        return 0;
1.1       yamt      187: }
                    188:
1.2       yamt      189: static void
                    190: percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci)
                    191: {
                    192:        size_t sz = (uintptr_t)vp2;
                    193:
                    194:        memset(vp, 0, sz);
                    195: }
                    196:
                    197: /*
                    198:  * percpu_zero: initialize percpu storage with zero.
                    199:  */
                    200:
                    201: static void
                    202: percpu_zero(percpu_t *pc, size_t sz)
                    203: {
                    204:
                    205:        percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz);
                    206: }
                    207:
1.1       yamt      208: /*
                    209:  * percpu_init: subsystem initialization
                    210:  */
                    211:
                    212: void
                    213: percpu_init(void)
                    214: {
                    215:
1.3       yamt      216:        ASSERT_SLEEPABLE();
1.1       yamt      217:        rw_init(&percpu_swap_lock);
                    218:        mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE);
1.13      rmind     219:        percpu_nextoff = PERCPU_QUANTUM_SIZE;
1.1       yamt      220:
1.16    ! para      221:        percpu_offset_arena = vmem_xcreate("percpu", 0, 0, PERCPU_QUANTUM_SIZE,
1.1       yamt      222:            percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP,
                    223:            IPL_NONE);
                    224: }
                    225:
                    226: /*
                    227:  * percpu_init_cpu: cpu initialization
                    228:  *
                    229:  * => should be called before the cpu appears on the list for CPU_INFO_FOREACH.
                    230:  */
                    231:
                    232: void
                    233: percpu_init_cpu(struct cpu_info *ci)
                    234: {
                    235:        percpu_cpu_t * const pcc = cpu_percpu(ci);
                    236:        size_t size = percpu_nextoff; /* XXX racy */
                    237:
1.3       yamt      238:        ASSERT_SLEEPABLE();
1.1       yamt      239:        pcc->pcc_size = size;
                    240:        if (size) {
                    241:                pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP);
                    242:        }
                    243: }
                    244:
                    245: /*
                    246:  * percpu_alloc: allocate percpu storage
                    247:  *
                    248:  * => called in thread context.
                    249:  * => considered as an expensive and rare operation.
1.2       yamt      250:  * => allocated storage is initialized with zeros.
1.1       yamt      251:  */
                    252:
                    253: percpu_t *
                    254: percpu_alloc(size_t size)
                    255: {
1.15      dyoung    256:        vmem_addr_t offset;
1.1       yamt      257:        percpu_t *pc;
                    258:
1.3       yamt      259:        ASSERT_SLEEPABLE();
1.15      dyoung    260:        if (vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT,
                    261:            &offset) != 0)
                    262:                return NULL;
1.8       yamt      263:        pc = (percpu_t *)percpu_encrypt((uintptr_t)offset);
1.1       yamt      264:        percpu_zero(pc, size);
                    265:        return pc;
                    266: }
                    267:
                    268: /*
1.5       yamt      269:  * percpu_free: free percpu storage
1.1       yamt      270:  *
                    271:  * => called in thread context.
                    272:  * => considered as an expensive and rare operation.
                    273:  */
                    274:
                    275: void
                    276: percpu_free(percpu_t *pc, size_t size)
                    277: {
                    278:
1.3       yamt      279:        ASSERT_SLEEPABLE();
1.1       yamt      280:        vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size);
                    281: }
                    282:
                    283: /*
1.4       thorpej   284:  * percpu_getref:
1.1       yamt      285:  *
                    286:  * => safe to be used in either thread or interrupt context
1.4       thorpej   287:  * => disables preemption; must be bracketed with a percpu_putref()
1.1       yamt      288:  */
                    289:
                    290: void *
1.4       thorpej   291: percpu_getref(percpu_t *pc)
1.1       yamt      292: {
                    293:
1.7       ad        294:        KPREEMPT_DISABLE(curlwp);
1.1       yamt      295:        return percpu_getptr_remote(pc, curcpu());
                    296: }
                    297:
                    298: /*
1.4       thorpej   299:  * percpu_putref:
                    300:  *
                    301:  * => drops the preemption-disabled count after caller is done with per-cpu
                    302:  *    data
                    303:  */
                    304:
                    305: void
                    306: percpu_putref(percpu_t *pc)
                    307: {
                    308:
1.7       ad        309:        KPREEMPT_ENABLE(curlwp);
1.4       thorpej   310: }
                    311:
                    312: /*
1.1       yamt      313:  * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote:
                    314:  * helpers to access remote cpu's percpu data.
                    315:  *
                    316:  * => called in thread context.
1.2       yamt      317:  * => percpu_traverse_enter can block low-priority xcalls.
1.1       yamt      318:  * => typical usage would be:
                    319:  *
                    320:  *     sum = 0;
                    321:  *     percpu_traverse_enter();
                    322:  *     for (CPU_INFO_FOREACH(cii, ci)) {
                    323:  *             unsigned int *p = percpu_getptr_remote(pc, ci);
                    324:  *             sum += *p;
                    325:  *     }
                    326:  *     percpu_traverse_exit();
                    327:  */
                    328:
                    329: void
                    330: percpu_traverse_enter(void)
                    331: {
                    332:
1.3       yamt      333:        ASSERT_SLEEPABLE();
1.1       yamt      334:        rw_enter(&percpu_swap_lock, RW_READER);
                    335: }
                    336:
                    337: void
                    338: percpu_traverse_exit(void)
                    339: {
                    340:
                    341:        rw_exit(&percpu_swap_lock);
                    342: }
                    343:
                    344: void *
                    345: percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci)
                    346: {
                    347:
                    348:        return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)];
                    349: }
                    350:
                    351: /*
                    352:  * percpu_foreach: call the specified callback function for each cpus.
                    353:  *
1.2       yamt      354:  * => called in thread context.
1.1       yamt      355:  * => caller should not rely on the cpu iteration order.
1.2       yamt      356:  * => the callback function should be minimum because it is executed with
                    357:  *    holding a global lock, which can block low-priority xcalls.
                    358:  *    eg. it's illegal for a callback function to sleep for memory allocation.
1.1       yamt      359:  */
                    360: void
                    361: percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg)
                    362: {
                    363:        CPU_INFO_ITERATOR cii;
                    364:        struct cpu_info *ci;
                    365:
                    366:        percpu_traverse_enter();
                    367:        for (CPU_INFO_FOREACH(cii, ci)) {
1.2       yamt      368:                (*cb)(percpu_getptr_remote(pc, ci), arg, ci);
1.1       yamt      369:        }
                    370:        percpu_traverse_exit();
                    371: }

CVSweb <webmaster@jp.NetBSD.org>