Annotation of src/sys/kern/subr_percpu.c, Revision 1.16
1.16 ! para 1: /* $NetBSD: subr_percpu.c,v 1.15 2011/09/02 22:25:08 dyoung Exp $ */
1.1 yamt 2:
3: /*-
4: * Copyright (c)2007,2008 YAMAMOTO Takashi,
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26: * SUCH DAMAGE.
27: */
28:
29: /*
30: * per-cpu storage.
31: */
32:
33: #include <sys/cdefs.h>
1.16 ! para 34: __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.15 2011/09/02 22:25:08 dyoung Exp $");
1.1 yamt 35:
36: #include <sys/param.h>
37: #include <sys/cpu.h>
38: #include <sys/kmem.h>
39: #include <sys/kernel.h>
40: #include <sys/mutex.h>
41: #include <sys/percpu.h>
42: #include <sys/rwlock.h>
43: #include <sys/vmem.h>
44: #include <sys/xcall.h>
45:
46: #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1)
47: #define PERCPU_QCACHE_MAX 0
48: #define PERCPU_IMPORT_SIZE 2048
49:
1.8 yamt 50: #if defined(DIAGNOSTIC)
51: #define MAGIC 0x50435055 /* "PCPU" */
52: #define percpu_encrypt(pc) ((pc) ^ MAGIC)
53: #define percpu_decrypt(pc) ((pc) ^ MAGIC)
54: #else /* defined(DIAGNOSTIC) */
55: #define percpu_encrypt(pc) (pc)
56: #define percpu_decrypt(pc) (pc)
57: #endif /* defined(DIAGNOSTIC) */
58:
1.13 rmind 59: static krwlock_t percpu_swap_lock __cacheline_aligned;
60: static kmutex_t percpu_allocation_lock __cacheline_aligned;
61: static vmem_t * percpu_offset_arena __cacheline_aligned;
62: static unsigned int percpu_nextoff __cacheline_aligned;
1.9 ad 63:
1.1 yamt 64: static percpu_cpu_t *
65: cpu_percpu(struct cpu_info *ci)
66: {
67:
68: return &ci->ci_data.cpu_percpu;
69: }
70:
71: static unsigned int
72: percpu_offset(percpu_t *pc)
73: {
1.8 yamt 74: const unsigned int off = percpu_decrypt((uintptr_t)pc);
1.1 yamt 75:
1.8 yamt 76: KASSERT(off < percpu_nextoff);
77: return off;
1.1 yamt 78: }
79:
80: /*
81: * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge
82: */
83:
84: static void
85: percpu_cpu_swap(void *p1, void *p2)
86: {
87: struct cpu_info * const ci = p1;
88: percpu_cpu_t * const newpcc = p2;
89: percpu_cpu_t * const pcc = cpu_percpu(ci);
90:
1.12 martin 91: KASSERT(ci == curcpu() || !mp_online);
1.11 matt 92:
1.1 yamt 93: /*
94: * swap *pcc and *newpcc unless anyone has beaten us.
95: */
96: rw_enter(&percpu_swap_lock, RW_WRITER);
97: if (newpcc->pcc_size > pcc->pcc_size) {
98: percpu_cpu_t tmp;
99: int s;
100:
101: tmp = *pcc;
102:
103: /*
104: * block interrupts so that we don't lose their modifications.
105: */
106:
107: s = splhigh();
108:
109: /*
110: * copy data to new storage.
111: */
112:
113: memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size);
114:
115: /*
116: * this assignment needs to be atomic for percpu_getptr_remote.
117: */
118:
119: pcc->pcc_data = newpcc->pcc_data;
120:
121: splx(s);
122:
123: pcc->pcc_size = newpcc->pcc_size;
124: *newpcc = tmp;
125: }
126: rw_exit(&percpu_swap_lock);
127: }
128:
129: /*
130: * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space
131: */
132:
133: static void
134: percpu_cpu_enlarge(size_t size)
135: {
136: CPU_INFO_ITERATOR cii;
137: struct cpu_info *ci;
138:
139: for (CPU_INFO_FOREACH(cii, ci)) {
140: percpu_cpu_t pcc;
141:
142: pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */
143: pcc.pcc_size = size;
144: if (!mp_online) {
145: percpu_cpu_swap(ci, &pcc);
146: } else {
147: uint64_t where;
148:
149: where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci);
150: xc_wait(where);
151: }
152: KASSERT(pcc.pcc_size < size);
153: if (pcc.pcc_data != NULL) {
154: kmem_free(pcc.pcc_data, pcc.pcc_size);
155: }
156: }
157: }
158:
159: /*
160: * percpu_backend_alloc: vmem import callback for percpu_offset_arena
161: */
162:
1.15 dyoung 163: static int
1.16 ! para 164: percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize,
1.15 dyoung 165: vm_flag_t vmflags, vmem_addr_t *addrp)
1.1 yamt 166: {
167: unsigned int offset;
168: unsigned int nextoff;
169:
1.3 yamt 170: ASSERT_SLEEPABLE();
1.1 yamt 171: KASSERT(dummy == NULL);
172:
173: if ((vmflags & VM_NOSLEEP) != 0)
1.15 dyoung 174: return ENOMEM;
1.1 yamt 175:
176: size = roundup(size, PERCPU_IMPORT_SIZE);
177: mutex_enter(&percpu_allocation_lock);
178: offset = percpu_nextoff;
179: percpu_nextoff = nextoff = percpu_nextoff + size;
180: mutex_exit(&percpu_allocation_lock);
181:
182: percpu_cpu_enlarge(nextoff);
183:
184: *resultsize = size;
1.15 dyoung 185: *addrp = (vmem_addr_t)offset;
186: return 0;
1.1 yamt 187: }
188:
1.2 yamt 189: static void
190: percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci)
191: {
192: size_t sz = (uintptr_t)vp2;
193:
194: memset(vp, 0, sz);
195: }
196:
197: /*
198: * percpu_zero: initialize percpu storage with zero.
199: */
200:
201: static void
202: percpu_zero(percpu_t *pc, size_t sz)
203: {
204:
205: percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz);
206: }
207:
1.1 yamt 208: /*
209: * percpu_init: subsystem initialization
210: */
211:
212: void
213: percpu_init(void)
214: {
215:
1.3 yamt 216: ASSERT_SLEEPABLE();
1.1 yamt 217: rw_init(&percpu_swap_lock);
218: mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE);
1.13 rmind 219: percpu_nextoff = PERCPU_QUANTUM_SIZE;
1.1 yamt 220:
1.16 ! para 221: percpu_offset_arena = vmem_xcreate("percpu", 0, 0, PERCPU_QUANTUM_SIZE,
1.1 yamt 222: percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP,
223: IPL_NONE);
224: }
225:
226: /*
227: * percpu_init_cpu: cpu initialization
228: *
229: * => should be called before the cpu appears on the list for CPU_INFO_FOREACH.
230: */
231:
232: void
233: percpu_init_cpu(struct cpu_info *ci)
234: {
235: percpu_cpu_t * const pcc = cpu_percpu(ci);
236: size_t size = percpu_nextoff; /* XXX racy */
237:
1.3 yamt 238: ASSERT_SLEEPABLE();
1.1 yamt 239: pcc->pcc_size = size;
240: if (size) {
241: pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP);
242: }
243: }
244:
245: /*
246: * percpu_alloc: allocate percpu storage
247: *
248: * => called in thread context.
249: * => considered as an expensive and rare operation.
1.2 yamt 250: * => allocated storage is initialized with zeros.
1.1 yamt 251: */
252:
253: percpu_t *
254: percpu_alloc(size_t size)
255: {
1.15 dyoung 256: vmem_addr_t offset;
1.1 yamt 257: percpu_t *pc;
258:
1.3 yamt 259: ASSERT_SLEEPABLE();
1.15 dyoung 260: if (vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT,
261: &offset) != 0)
262: return NULL;
1.8 yamt 263: pc = (percpu_t *)percpu_encrypt((uintptr_t)offset);
1.1 yamt 264: percpu_zero(pc, size);
265: return pc;
266: }
267:
268: /*
1.5 yamt 269: * percpu_free: free percpu storage
1.1 yamt 270: *
271: * => called in thread context.
272: * => considered as an expensive and rare operation.
273: */
274:
275: void
276: percpu_free(percpu_t *pc, size_t size)
277: {
278:
1.3 yamt 279: ASSERT_SLEEPABLE();
1.1 yamt 280: vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size);
281: }
282:
283: /*
1.4 thorpej 284: * percpu_getref:
1.1 yamt 285: *
286: * => safe to be used in either thread or interrupt context
1.4 thorpej 287: * => disables preemption; must be bracketed with a percpu_putref()
1.1 yamt 288: */
289:
290: void *
1.4 thorpej 291: percpu_getref(percpu_t *pc)
1.1 yamt 292: {
293:
1.7 ad 294: KPREEMPT_DISABLE(curlwp);
1.1 yamt 295: return percpu_getptr_remote(pc, curcpu());
296: }
297:
298: /*
1.4 thorpej 299: * percpu_putref:
300: *
301: * => drops the preemption-disabled count after caller is done with per-cpu
302: * data
303: */
304:
305: void
306: percpu_putref(percpu_t *pc)
307: {
308:
1.7 ad 309: KPREEMPT_ENABLE(curlwp);
1.4 thorpej 310: }
311:
312: /*
1.1 yamt 313: * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote:
314: * helpers to access remote cpu's percpu data.
315: *
316: * => called in thread context.
1.2 yamt 317: * => percpu_traverse_enter can block low-priority xcalls.
1.1 yamt 318: * => typical usage would be:
319: *
320: * sum = 0;
321: * percpu_traverse_enter();
322: * for (CPU_INFO_FOREACH(cii, ci)) {
323: * unsigned int *p = percpu_getptr_remote(pc, ci);
324: * sum += *p;
325: * }
326: * percpu_traverse_exit();
327: */
328:
329: void
330: percpu_traverse_enter(void)
331: {
332:
1.3 yamt 333: ASSERT_SLEEPABLE();
1.1 yamt 334: rw_enter(&percpu_swap_lock, RW_READER);
335: }
336:
337: void
338: percpu_traverse_exit(void)
339: {
340:
341: rw_exit(&percpu_swap_lock);
342: }
343:
344: void *
345: percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci)
346: {
347:
348: return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)];
349: }
350:
351: /*
352: * percpu_foreach: call the specified callback function for each cpus.
353: *
1.2 yamt 354: * => called in thread context.
1.1 yamt 355: * => caller should not rely on the cpu iteration order.
1.2 yamt 356: * => the callback function should be minimum because it is executed with
357: * holding a global lock, which can block low-priority xcalls.
358: * eg. it's illegal for a callback function to sleep for memory allocation.
1.1 yamt 359: */
360: void
361: percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg)
362: {
363: CPU_INFO_ITERATOR cii;
364: struct cpu_info *ci;
365:
366: percpu_traverse_enter();
367: for (CPU_INFO_FOREACH(cii, ci)) {
1.2 yamt 368: (*cb)(percpu_getptr_remote(pc, ci), arg, ci);
1.1 yamt 369: }
370: percpu_traverse_exit();
371: }
CVSweb <webmaster@jp.NetBSD.org>