Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/subr_pool.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/subr_pool.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.128.2.10 retrieving revision 1.133.4.1 diff -u -p -r1.128.2.10 -r1.133.4.1 --- src/sys/kern/subr_pool.c 2007/09/25 01:36:19 1.128.2.10 +++ src/sys/kern/subr_pool.c 2007/11/19 00:48:50 1.133.4.1 @@ -1,4 +1,4 @@ -/* $NetBSD: subr_pool.c,v 1.128.2.10 2007/09/25 01:36:19 ad Exp $ */ +/* $NetBSD: subr_pool.c,v 1.133.4.1 2007/11/19 00:48:50 mjf Exp $ */ /*- * Copyright (c) 1997, 1999, 2000, 2002, 2007 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.128.2.10 2007/09/25 01:36:19 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.133.4.1 2007/11/19 00:48:50 mjf Exp $"); #include "opt_pool.h" #include "opt_poollog.h" @@ -46,6 +46,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_pool.c, #include #include +#include #include #include #include @@ -55,6 +56,8 @@ __KERNEL_RCSID(0, "$NetBSD: subr_pool.c, #include #include #include +#include +#include #include @@ -81,7 +84,8 @@ LIST_HEAD(,pool_cache) pool_cache_head = /* Private pool for page header structures */ #define PHPOOL_MAX 8 static struct pool phpool[PHPOOL_MAX]; -#define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) +#define PHPOOL_FREELIST_NELEM(idx) \ + (((idx) == 0) ? 0 : BITMAP_SIZE * (1 << (idx))) #ifdef POOL_SUBPAGE /* Pool of subpages for use by normal pools. */ @@ -110,7 +114,9 @@ static struct pool *drainpp; static kmutex_t pool_head_lock; static kcondvar_t pool_busy; -typedef uint8_t pool_item_freelist_t; +typedef uint32_t pool_item_bitmap_t; +#define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) +#define BITMAP_MASK (BITMAP_SIZE - 1) struct pool_item_header { /* Page headers */ @@ -120,6 +126,7 @@ struct pool_item_header { ph_node; /* Off-page page headers */ void * ph_page; /* this page's address */ struct timeval ph_time; /* last referenced */ + uint16_t ph_nmissing; /* # of chunks in use */ union { /* !PR_NOTOUCH */ struct { @@ -128,27 +135,20 @@ struct pool_item_header { } phu_normal; /* PR_NOTOUCH */ struct { - uint16_t - phu_off; /* start offset in page */ - pool_item_freelist_t - phu_firstfree; /* first free item */ - /* - * XXX it might be better to use - * a simple bitmap and ffs(3) - */ + uint16_t phu_off; /* start offset in page */ + pool_item_bitmap_t phu_bitmap[]; } phu_notouch; } ph_u; - uint16_t ph_nmissing; /* # of chunks in use */ }; #define ph_itemlist ph_u.phu_normal.phu_itemlist #define ph_off ph_u.phu_notouch.phu_off -#define ph_firstfree ph_u.phu_notouch.phu_firstfree +#define ph_bitmap ph_u.phu_notouch.phu_bitmap struct pool_item { #ifdef DIAGNOSTIC u_int pi_magic; #endif -#define PI_MAGIC 0xdeadbeefU +#define PI_MAGIC 0xdeaddeadU /* Other entries use only this list entry */ LIST_ENTRY(pool_item) pi_list; }; @@ -191,6 +191,7 @@ static pool_cache_cpu_t *pool_cache_get_ void **, paddr_t *, int); static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); static void pool_cache_invalidate_groups(pool_cache_t, pcg_t *); +static void pool_cache_xcall(pool_cache_t); static int pool_catchup(struct pool *); static void pool_prime_page(struct pool *, void *, @@ -327,12 +328,12 @@ pr_enter_check(struct pool *pp, void (*p #define pr_enter_check(pp, pr) #endif /* POOL_DIAGNOSTIC */ -static inline int +static inline unsigned int pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, const void *v) { const char *cp = v; - int idx; + unsigned int idx; KASSERT(pp->pr_roflags & PR_NOTOUCH); idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; @@ -340,37 +341,57 @@ pr_item_notouch_index(const struct pool return idx; } -#define PR_FREELIST_ALIGN(p) \ - roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) -#define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) -#define PR_INDEX_USED ((pool_item_freelist_t)-1) -#define PR_INDEX_EOL ((pool_item_freelist_t)-2) - static inline void pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, void *obj) { - int idx = pr_item_notouch_index(pp, ph, obj); - pool_item_freelist_t *freelist = PR_FREELIST(ph); + unsigned int idx = pr_item_notouch_index(pp, ph, obj); + pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); + pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); - KASSERT(freelist[idx] == PR_INDEX_USED); - freelist[idx] = ph->ph_firstfree; - ph->ph_firstfree = idx; + KASSERT((*bitmap & mask) == 0); + *bitmap |= mask; } static inline void * pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) { - int idx = ph->ph_firstfree; - pool_item_freelist_t *freelist = PR_FREELIST(ph); + pool_item_bitmap_t *bitmap = ph->ph_bitmap; + unsigned int idx; + int i; - KASSERT(freelist[idx] != PR_INDEX_USED); - ph->ph_firstfree = freelist[idx]; - freelist[idx] = PR_INDEX_USED; + for (i = 0; ; i++) { + int bit; + KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); + bit = ffs32(bitmap[i]); + if (bit) { + pool_item_bitmap_t mask; + + bit--; + idx = (i * BITMAP_SIZE) + bit; + mask = 1 << bit; + KASSERT((bitmap[i] & mask) != 0); + bitmap[i] &= ~mask; + break; + } + } + KASSERT(idx < pp->pr_itemsperpage); return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; } +static inline void +pr_item_notouch_init(const struct pool *pp, struct pool_item_header *ph) +{ + pool_item_bitmap_t *bitmap = ph->ph_bitmap; + const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); + int i; + + for (i = 0; i < n; i++) { + bitmap[i] = (pool_item_bitmap_t)-1; + } +} + static inline int phtree_compare(struct pool_item_header *a, struct pool_item_header *b) { @@ -600,9 +621,6 @@ pool_init(struct pool *pp, size_t size, size_t trysize, phsize; int off, slack; - KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= - PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); - #ifdef DEBUG /* * Check that the pool hasn't already been initialised and @@ -805,8 +823,8 @@ pool_init(struct pool *pp, size_t size, "phpool-%d", nelem); sz = sizeof(struct pool_item_header); if (nelem) { - sz = PR_FREELIST_ALIGN(sz) - + nelem * sizeof(pool_item_freelist_t); + sz = offsetof(struct pool_item_header, + ph_bitmap[howmany(nelem, BITMAP_SIZE)]); } pool_init(&phpool[idx], sz, 0, 0, 0, phpool_names[idx], &pool_allocator_meta, IPL_VM); @@ -921,7 +939,7 @@ pool_alloc_item_header(struct pool *pp, } /* - * Grab an item from the pool; must be called at appropriate spl level + * Grab an item from the pool. */ void * #ifdef POOL_DIAGNOSTIC @@ -1269,7 +1287,7 @@ pool_do_put(struct pool *pp, void *v, st } /* - * Return resource to the pool; must be called at appropriate spl level + * Return resource to the pool. */ #ifdef POOL_DIAGNOSTIC void @@ -1430,14 +1448,7 @@ pool_prime_page(struct pool *pp, void *s pp->pr_nitems += n; if (pp->pr_roflags & PR_NOTOUCH) { - pool_item_freelist_t *freelist = PR_FREELIST(ph); - int i; - - ph->ph_off = (char *)cp - (char *)storage; - ph->ph_firstfree = 0; - for (i = 0; i < n - 1; i++) - freelist[i] = i + 1; - freelist[n - 1] = PR_INDEX_EOL; + pr_item_notouch_init(pp, ph); } else { while (n--) { pi = (struct pool_item *)cp; @@ -1570,6 +1581,8 @@ pool_reclaim(struct pool *pp) struct pool_item_header *ph, *phnext; struct pool_pagelist pq; struct timeval curtime, diff; + bool klock; + int rv; if (pp->pr_drain_hook != NULL) { /* @@ -1578,12 +1591,28 @@ pool_reclaim(struct pool *pp) (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); } + /* + * XXXSMP Because mutexes at IPL_SOFTXXX are still spinlocks, + * and we are called from the pagedaemon without kernel_lock. + * Does not apply to IPL_SOFTBIO. + */ + if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || + pp->pr_ipl == IPL_SOFTSERIAL) { + KERNEL_LOCK(1, NULL); + klock = true; + } else + klock = false; + /* Reclaim items from the pool's cache (if any). */ if (pp->pr_cache != NULL) pool_cache_invalidate(pp->pr_cache); - if (mutex_tryenter(&pp->pr_lock) == 0) + if (mutex_tryenter(&pp->pr_lock) == 0) { + if (klock) { + KERNEL_UNLOCK_ONE(NULL); + } return (0); + } pr_enter(pp, file, line); LIST_INIT(&pq); @@ -1616,49 +1645,88 @@ pool_reclaim(struct pool *pp) pr_leave(pp); mutex_exit(&pp->pr_lock); + if (LIST_EMPTY(&pq)) - return 0; + rv = 0; + else { + pr_pagelist_free(pp, &pq); + rv = 1; + } - pr_pagelist_free(pp, &pq); + if (klock) { + KERNEL_UNLOCK_ONE(NULL); + } - return (1); + return (rv); } /* - * Drain pools, one at a time. + * Drain pools, one at a time. This is a two stage process; + * drain_start kicks off a cross call to drain CPU-level caches + * if the pool has an associated pool_cache. drain_end waits + * for those cross calls to finish, and then drains the cache + * (if any) and pool. * - * Note, we must never be called from an interrupt context. + * Note, must never be called from interrupt context. */ void -pool_drain(void *arg) +pool_drain_start(struct pool **ppp, uint64_t *wp) { struct pool *pp; + KASSERT(!LIST_EMPTY(&pool_head)); + pp = NULL; /* Find next pool to drain, and add a reference. */ mutex_enter(&pool_head_lock); - if (drainpp == NULL) { - drainpp = LIST_FIRST(&pool_head); - } - if (drainpp != NULL) { - pp = drainpp; - drainpp = LIST_NEXT(pp, pr_poollist); - } - if (pp != NULL) - pp->pr_refcnt++; + do { + if (drainpp == NULL) { + drainpp = LIST_FIRST(&pool_head); + } + if (drainpp != NULL) { + pp = drainpp; + drainpp = LIST_NEXT(pp, pr_poollist); + } + /* + * Skip completely idle pools. We depend on at least + * one pool in the system being active. + */ + } while (pp == NULL || pp->pr_npages == 0); + pp->pr_refcnt++; mutex_exit(&pool_head_lock); - /* If we have a candidate, drain it and unlock. */ - if (pp != NULL) { - pool_reclaim(pp); - mutex_enter(&pool_head_lock); - pp->pr_refcnt--; - cv_broadcast(&pool_busy); - mutex_exit(&pool_head_lock); + /* If there is a pool_cache, drain CPU level caches. */ + *ppp = pp; + if (pp->pr_cache != NULL) { + *wp = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, + pp->pr_cache, NULL); } } +void +pool_drain_end(struct pool *pp, uint64_t where) +{ + + if (pp == NULL) + return; + + KASSERT(pp->pr_refcnt > 0); + + /* Wait for remote draining to complete. */ + if (pp->pr_cache != NULL) + xc_wait(where); + + /* Drain the cache (if any) and pool.. */ + pool_reclaim(pp); + + /* Finally, unlock the pool. */ + mutex_enter(&pool_head_lock); + pp->pr_refcnt--; + cv_broadcast(&pool_busy); + mutex_exit(&pool_head_lock); +} + /* * Diagnostic helpers. */ @@ -1983,21 +2051,29 @@ pool_cache_bootstrap(pool_cache_t pc, si pc->pc_emptygroups = NULL; pc->pc_fullgroups = NULL; + pc->pc_partgroups = NULL; pc->pc_ctor = ctor; pc->pc_dtor = dtor; pc->pc_arg = arg; pc->pc_hits = 0; pc->pc_misses = 0; pc->pc_nempty = 0; + pc->pc_npart = 0; pc->pc_nfull = 0; pc->pc_contended = 0; pc->pc_refcnt = 0; + pc->pc_freecheck = NULL; /* Allocate per-CPU caches. */ memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); pc->pc_ncpu = 0; - for (CPU_INFO_FOREACH(cii, ci)) { - pool_cache_cpu_init1(ci, pc); + if (ncpu == 0) { + /* XXX For sparc: boot CPU is not attached yet. */ + pool_cache_cpu_init1(curcpu(), pc); + } else { + for (CPU_INFO_FOREACH(cii, ci)) { + pool_cache_cpu_init1(ci, pc); + } } if (__predict_true(!cold)) { @@ -2072,11 +2148,15 @@ static void pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) { pool_cache_cpu_t *cc; + int index; + + index = ci->ci_index; + KASSERT(index < MAXCPUS); KASSERT(((uintptr_t)pc->pc_cpus & (CACHE_LINE_SIZE - 1)) == 0); - if ((cc = pc->pc_cpus[ci->ci_index]) != NULL) { - KASSERT(cc->cc_cpu = ci); + if ((cc = pc->pc_cpus[index]) != NULL) { + KASSERT(cc->cc_cpuindex == index); return; } @@ -2097,13 +2177,13 @@ pool_cache_cpu_init1(struct cpu_info *ci cc->cc_ipl = pc->pc_pool.pr_ipl; cc->cc_iplcookie = makeiplcookie(cc->cc_ipl); cc->cc_cache = pc; - cc->cc_cpu = ci; + cc->cc_cpuindex = index; cc->cc_hits = 0; cc->cc_misses = 0; cc->cc_current = NULL; cc->cc_previous = NULL; - pc->pc_cpus[ci->ci_index] = cc; + pc->pc_cpus[index] = cc; } /* @@ -2142,6 +2222,14 @@ pool_cache_reclaim(pool_cache_t pc) return pool_reclaim(&pc->pc_pool); } +static void +pool_cache_destruct_object1(pool_cache_t pc, void *object) +{ + + (*pc->pc_dtor)(pc->pc_arg, object); + pool_put(&pc->pc_pool, object); +} + /* * pool_cache_destruct_object: * @@ -2152,8 +2240,9 @@ void pool_cache_destruct_object(pool_cache_t pc, void *object) { - (*pc->pc_dtor)(pc->pc_arg, object); - pool_put(&pc->pc_pool, object); + FREECHECK_IN(&pc->pc_freecheck, object); + + pool_cache_destruct_object1(pc, object); } /* @@ -2173,7 +2262,7 @@ pool_cache_invalidate_groups(pool_cache_ for (i = 0; i < pcg->pcg_avail; i++) { object = pcg->pcg_objects[i].pcgo_va; - pool_cache_destruct_object(pc, object); + pool_cache_destruct_object1(pc, object); } pool_put(&pcgpool, pcg); @@ -2189,19 +2278,23 @@ pool_cache_invalidate_groups(pool_cache_ void pool_cache_invalidate(pool_cache_t pc) { - pcg_t *full, *empty; + pcg_t *full, *empty, *part; mutex_enter(&pc->pc_lock); full = pc->pc_fullgroups; empty = pc->pc_emptygroups; + part = pc->pc_partgroups; pc->pc_fullgroups = NULL; pc->pc_emptygroups = NULL; + pc->pc_partgroups = NULL; pc->pc_nfull = 0; pc->pc_nempty = 0; + pc->pc_npart = 0; mutex_exit(&pc->pc_lock); pool_cache_invalidate_groups(pc, full); pool_cache_invalidate_groups(pc, empty); + pool_cache_invalidate_groups(pc, part); } void @@ -2236,33 +2329,19 @@ static inline pool_cache_cpu_t * pool_cache_cpu_enter(pool_cache_t pc, int *s) { pool_cache_cpu_t *cc; - struct cpu_info *ci; /* * Prevent other users of the cache from accessing our * CPU-local data. To avoid touching shared state, we * pull the neccessary information from CPU local data. */ - ci = curcpu(); - KASSERT(ci->ci_data.cpu_index < MAXCPUS); - cc = pc->pc_cpus[ci->ci_data.cpu_index]; + crit_enter(); + cc = pc->pc_cpus[curcpu()->ci_index]; KASSERT(cc->cc_cache == pc); - if (cc->cc_ipl == IPL_NONE) { - crit_enter(); - } else { + if (cc->cc_ipl != IPL_NONE) { *s = splraiseipl(cc->cc_iplcookie); } - - /* Moved to another CPU before disabling preemption? */ - if (__predict_false(ci != curcpu())) { - ci = curcpu(); - cc = pc->pc_cpus[ci->ci_data.cpu_index]; - } - -#ifdef DIAGNOSTIC - KASSERT(cc->cc_cpu == ci); KASSERT(((uintptr_t)cc & (CACHE_LINE_SIZE - 1)) == 0); -#endif return cc; } @@ -2272,11 +2351,10 @@ pool_cache_cpu_exit(pool_cache_cpu_t *cc { /* No longer need exclusive access to the per-CPU data. */ - if (cc->cc_ipl == IPL_NONE) { - crit_exit(); - } else { + if (cc->cc_ipl != IPL_NONE) { splx(*s); } + crit_exit(); } #if __GNUC_PREREQ__(3, 0) @@ -2572,6 +2650,66 @@ pool_cache_put_paddr(pool_cache_t pc, vo } /* + * pool_cache_xcall: + * + * Transfer objects from the per-CPU cache to the global cache. + * Run within a cross-call thread. + */ +static void +pool_cache_xcall(pool_cache_t pc) +{ + pool_cache_cpu_t *cc; + pcg_t *prev, *cur, **list; + int s = 0; /* XXXgcc */ + + cc = pool_cache_cpu_enter(pc, &s); + cur = cc->cc_current; + cc->cc_current = NULL; + prev = cc->cc_previous; + cc->cc_previous = NULL; + pool_cache_cpu_exit(cc, &s); + + /* + * XXXSMP Go to splvm to prevent kernel_lock from being taken, + * because locks at IPL_SOFTXXX are still spinlocks. Does not + * apply to IPL_SOFTBIO. Cross-call threads do not take the + * kernel_lock. + */ + s = splvm(); + mutex_enter(&pc->pc_lock); + if (cur != NULL) { + if (cur->pcg_avail == PCG_NOBJECTS) { + list = &pc->pc_fullgroups; + pc->pc_nfull++; + } else if (cur->pcg_avail == 0) { + list = &pc->pc_emptygroups; + pc->pc_nempty++; + } else { + list = &pc->pc_partgroups; + pc->pc_npart++; + } + cur->pcg_next = *list; + *list = cur; + } + if (prev != NULL) { + if (prev->pcg_avail == PCG_NOBJECTS) { + list = &pc->pc_fullgroups; + pc->pc_nfull++; + } else if (prev->pcg_avail == 0) { + list = &pc->pc_emptygroups; + pc->pc_nempty++; + } else { + list = &pc->pc_partgroups; + pc->pc_npart++; + } + prev->pcg_next = *list; + *list = prev; + } + mutex_exit(&pc->pc_lock); + splx(s); +} + +/* * Pool backend allocators. * * Each pool has a backend allocator that handles allocation, deallocation,