[BACK]Return to vfs_cache.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_cache.c, Revision 1.123

1.123   ! maya        1: /*     $NetBSD: vfs_cache.c,v 1.122 2019/09/15 17:36:43 maya Exp $     */
1.73      ad          2:
                      3: /*-
                      4:  * Copyright (c) 2008 The NetBSD Foundation, Inc.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     17:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     18:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     19:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     20:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     22:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     23:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     24:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     25:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     26:  * POSSIBILITY OF SUCH DAMAGE.
                     27:  */
1.6       cgd        28:
1.1       cgd        29: /*
1.5       mycroft    30:  * Copyright (c) 1989, 1993
                     31:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        32:  *
                     33:  * Redistribution and use in source and binary forms, with or without
                     34:  * modification, are permitted provided that the following conditions
                     35:  * are met:
                     36:  * 1. Redistributions of source code must retain the above copyright
                     37:  *    notice, this list of conditions and the following disclaimer.
                     38:  * 2. Redistributions in binary form must reproduce the above copyright
                     39:  *    notice, this list of conditions and the following disclaimer in the
                     40:  *    documentation and/or other materials provided with the distribution.
1.51      agc        41:  * 3. Neither the name of the University nor the names of its contributors
1.1       cgd        42:  *    may be used to endorse or promote products derived from this software
                     43:  *    without specific prior written permission.
                     44:  *
                     45:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     46:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     47:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     48:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     49:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     50:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     51:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     52:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     53:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     54:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     55:  * SUCH DAMAGE.
                     56:  *
1.10      mycroft    57:  *     @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94
1.1       cgd        58:  */
1.32      lukem      59:
                     60: #include <sys/cdefs.h>
1.123   ! maya       61: __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.122 2019/09/15 17:36:43 maya Exp $");
1.1       cgd        62:
1.121     christos   63: #define __NAMECACHE_PRIVATE
1.107     pooka      64: #ifdef _KERNEL_OPT
1.28      chs        65: #include "opt_ddb.h"
1.115     riastrad   66: #include "opt_dtrace.h"
1.29      fvdl       67: #include "opt_revcache.h"
1.107     pooka      68: #endif
1.28      chs        69:
1.4       mycroft    70: #include <sys/param.h>
1.115     riastrad   71: #include <sys/atomic.h>
                     72: #include <sys/cpu.h>
                     73: #include <sys/errno.h>
                     74: #include <sys/evcnt.h>
                     75: #include <sys/kernel.h>
                     76: #include <sys/kthread.h>
1.4       mycroft    77: #include <sys/mount.h>
1.115     riastrad   78: #include <sys/mutex.h>
1.4       mycroft    79: #include <sys/namei.h>
1.18      thorpej    80: #include <sys/pool.h>
1.108     christos   81: #include <sys/sdt.h>
1.115     riastrad   82: #include <sys/sysctl.h>
                     83: #include <sys/systm.h>
                     84: #include <sys/time.h>
                     85: #include <sys/vnode_impl.h>
1.1       cgd        86:
1.66      christos   87: #define NAMECACHE_ENTER_REVERSE
1.1       cgd        88: /*
                     89:  * Name caching works as follows:
                     90:  *
                     91:  * Names found by directory scans are retained in a cache
                     92:  * for future reference.  It is managed LRU, so frequently
                     93:  * used names will hang around.  Cache is indexed by hash value
1.20      jdolecek   94:  * obtained from (dvp, name) where dvp refers to the directory
1.1       cgd        95:  * containing name.
                     96:  *
                     97:  * Upon reaching the last segment of a path, if the reference
                     98:  * is for DELETE, or NOCACHE is set (rewrite), and the
                     99:  * name is located in the cache, it will be dropped.
                    100:  */
                    101:
                    102: /*
1.120     riastrad  103:  * Cache entry lifetime:
                    104:  *
                    105:  *     nonexistent
                    106:  *     ---create---> active
                    107:  *     ---invalidate---> queued
                    108:  *     ---reclaim---> nonexistent.
                    109:  *
                    110:  * States:
                    111:  * - Nonexistent.  Cache entry does not exist.
                    112:  *
                    113:  * - Active.  cache_lookup, cache_lookup_raw, cache_revlookup can look
                    114:  *   up, acquire references, and hand off references to vnodes,
                    115:  *   e.g. via v_interlock.  Marked by nonnull ncp->nc_dvp.
                    116:  *
                    117:  * - Queued.  Pending desstruction by cache_reclaim.  Cannot be used by
                    118:  *   cache_lookup, cache_lookup_raw, or cache_revlookup.  May still be
                    119:  *   on lists.  Marked by null ncp->nc_dvp.
                    120:  *
                    121:  * Transitions:
                    122:  *
                    123:  * - Create: nonexistent--->active
                    124:  *
                    125:  *   Done by cache_enter(dvp, vp, name, namelen, cnflags), called by
                    126:  *   VOP_LOOKUP after the answer is found.  Allocates a struct
                    127:  *   namecache object, initializes it with the above fields, and
                    128:  *   activates it by inserting it into the forward and reverse tables.
                    129:  *
                    130:  * - Invalidate: active--->queued
                    131:  *
                    132:  *   Done by cache_invalidate.  If not already invalidated, nullify
                    133:  *   ncp->nc_dvp and ncp->nc_vp, and add to cache_gcqueue.  Called,
                    134:  *   among various other places, in cache_lookup(dvp, name, namelen,
                    135:  *   nameiop, cnflags, &iswht, &vp) when MAKEENTRY is missing from
                    136:  *   cnflags.
                    137:  *
                    138:  * - Reclaim: queued--->nonexistent
                    139:  *
                    140:  *   Done by cache_reclaim.  Disassociate ncp from any lists it is on
                    141:  *   and free memory.
                    142:  */
                    143:
                    144: /*
1.117     riastrad  145:  * Locking.
1.102     dennis    146:  *
1.117     riastrad  147:  * L namecache_lock            Global lock for namecache table and queues.
                    148:  * C struct nchcpu::cpu_lock   Per-CPU lock to reduce read contention.
                    149:  * N struct namecache::nc_lock Per-entry lock.
                    150:  * V struct vnode::v_interlock Vnode interlock.
                    151:  *
1.118     riastrad  152:  * Lock order: L -> C -> N -> V
                    153:  *
                    154:  *     Examples:
                    155:  *     . L->C: cache_reclaim
                    156:  *     . C->N->V: cache_lookup
                    157:  *     . L->N->V: cache_purge1, cache_revlookup
1.117     riastrad  158:  *
                    159:  * All use serialized by namecache_lock:
                    160:  *
                    161:  *     nclruhead / struct namecache::nc_lru
                    162:  *     ncvhashtbl / struct namecache::nc_vhash
                    163:  *     struct vnode_impl::vi_dnclist / struct namecache::nc_dvlist
                    164:  *     struct vnode_impl::vi_nclist / struct namecache::nc_vlist
                    165:  *     nchstats
                    166:  *
                    167:  * - Insertion serialized by namecache_lock,
                    168:  * - read protected by per-CPU lock,
                    169:  * - insert/read ordering guaranteed by memory barriers, and
                    170:  * - deletion allowed only under namecache_lock and *all* per-CPU locks
                    171:  *   in CPU_INFO_FOREACH order:
                    172:  *
                    173:  *     nchashtbl / struct namecache::nc_hash
                    174:  *
                    175:  *   The per-CPU locks exist only to reduce the probability of
                    176:  *   contention between readers.  We do not bind to a CPU, so
                    177:  *   contention is still possible.
                    178:  *
                    179:  * All use serialized by struct namecache::nc_lock:
                    180:  *
                    181:  *     struct namecache::nc_dvp
                    182:  *     struct namecache::nc_vp
                    183:  *     struct namecache::nc_gcqueue (*)
                    184:  *     struct namecache::nc_hittime (**)
                    185:  *
                    186:  * (*) Once on the queue, only cache_thread uses this nc_gcqueue, unlocked.
                    187:  * (**) cache_prune reads nc_hittime unlocked, since approximate is OK.
                    188:  *
                    189:  * Unlocked because stable after initialization:
                    190:  *
                    191:  *     struct namecache::nc_dvp
                    192:  *     struct namecache::nc_vp
                    193:  *     struct namecache::nc_flags
                    194:  *     struct namecache::nc_nlen
                    195:  *     struct namecache::nc_name
                    196:  *
                    197:  * Unlocked because approximation is OK:
                    198:  *
                    199:  *     struct nchcpu::cpu_stats
                    200:  *     struct nchcpu::cpu_stats_last
                    201:  *
                    202:  * Updates under namecache_lock or any per-CPU lock are marked with
                    203:  * COUNT, while updates outside those locks are marked with COUNT_UNL.
                    204:  *
                    205:  * - The theory seems to have been that you could replace COUNT_UNL by
                    206:  *   atomic operations -- except that doesn't help unless you also
                    207:  *   replace COUNT by atomic operations, because mixing atomics and
                    208:  *   nonatomics is a recipe for failure.
                    209:  * - We use 32-bit per-CPU counters and 64-bit global counters under
                    210:  *   the theory that 32-bit counters are less likely to be hosed by
                    211:  *   nonatomic increment.
                    212:  */
                    213:
                    214: /*
                    215:  * The comment below is preserved for posterity in case it is
                    216:  * important, but it is clear that everywhere the namecache_count_*()
                    217:  * functions are called, other cache_*() functions that take the same
                    218:  * locks are also called, so I can't imagine how this could be a
                    219:  * problem:
1.103     dennis    220:  *
                    221:  * N.B.: Attempting to protect COUNT_UNL() increments by taking
                    222:  * a per-cpu lock in the namecache_count_*() functions causes
                    223:  * a deadlock.  Don't do that, use atomic increments instead if
                    224:  * the imperfections here bug you.
1.117     riastrad  225:  */
                    226:
                    227: /*
                    228:  * struct nchstats_percpu:
1.103     dennis    229:  *
1.117     riastrad  230:  *     Per-CPU counters.
1.77      ad        231:  */
1.103     dennis    232: struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t);
                    233:
1.117     riastrad  234: /*
                    235:  * struct nchcpu:
                    236:  *
                    237:  *     Per-CPU namecache state: lock and per-CPU counters.
                    238:  */
1.77      ad        239: struct nchcpu {
1.103     dennis    240:        kmutex_t                cpu_lock;
                    241:        struct nchstats_percpu  cpu_stats;
                    242:        /* XXX maybe __cacheline_aligned would improve this? */
                    243:        struct nchstats_percpu  cpu_stats_last; /* from last sample */
1.77      ad        244: };
                    245:
                    246: /*
1.90      dholland  247:  * The type for the hash code. While the hash function generates a
                    248:  * u32, the hash code has historically been passed around as a u_long,
                    249:  * and the value is modified by xor'ing a uintptr_t, so it's not
                    250:  * entirely clear what the best type is. For now I'll leave it
                    251:  * unchanged as u_long.
                    252:  */
                    253:
                    254: typedef u_long nchash_t;
                    255:
                    256: /*
1.1       cgd       257:  * Structures associated with name cacheing.
                    258:  */
1.89      rmind     259:
                    260: static kmutex_t *namecache_lock __read_mostly;
                    261: static pool_cache_t namecache_cache __read_mostly;
                    262: static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
                    263:
                    264: static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly;
                    265: static u_long  nchash __read_mostly;
                    266:
1.90      dholland  267: #define        NCHASH2(hash, dvp)      \
                    268:        (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash)
1.19      sommerfe  269:
1.89      rmind     270: static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly;
                    271: static u_long  ncvhash __read_mostly;
                    272:
1.48      yamt      273: #define        NCVHASH(vp)             (((uintptr_t)(vp) >> 3) & ncvhash)
1.19      sommerfe  274:
1.89      rmind     275: /* Number of cache entries allocated. */
                    276: static long    numcache __cacheline_aligned;
1.73      ad        277:
1.89      rmind     278: /* Garbage collection queue and number of entries pending in it. */
                    279: static void    *cache_gcqueue;
                    280: static u_int   cache_gcpend;
                    281:
1.103     dennis    282: /* Cache effectiveness statistics.  This holds total from per-cpu stats */
1.89      rmind     283: struct nchstats        nchstats __cacheline_aligned;
1.103     dennis    284:
                    285: /*
                    286:  * Macros to count an event, update the central stats with per-cpu
                    287:  * values and add current per-cpu increments to the subsystem total
                    288:  * last collected by cache_reclaim().
                    289:  */
                    290: #define        CACHE_STATS_CURRENT     /* nothing */
                    291:
                    292: #define        COUNT(cpup, f)  ((cpup)->cpu_stats.f++)
                    293:
                    294: #define        UPDATE(cpup, f) do { \
                    295:        struct nchcpu *Xcpup = (cpup); \
                    296:        uint32_t Xcnt = (volatile uint32_t) Xcpup->cpu_stats.f; \
                    297:        nchstats.f += Xcnt - Xcpup->cpu_stats_last.f; \
                    298:        Xcpup->cpu_stats_last.f = Xcnt; \
                    299: } while (/* CONSTCOND */ 0)
                    300:
                    301: #define        ADD(stats, cpup, f) do { \
                    302:        struct nchcpu *Xcpup = (cpup); \
                    303:        stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \
                    304: } while (/* CONSTCOND */ 0)
                    305:
                    306: /* Do unlocked stats the same way. Use a different name to allow mind changes */
                    307: #define        COUNT_UNL(cpup, f)      COUNT((cpup), f)
1.38      thorpej   308:
1.89      rmind     309: static const int cache_lowat = 95;
                    310: static const int cache_hiwat = 98;
                    311: static const int cache_hottime = 5;    /* number of seconds */
                    312: static int doingcache = 1;             /* 1 => enable the cache */
1.1       cgd       313:
1.73      ad        314: static struct evcnt cache_ev_scan;
                    315: static struct evcnt cache_ev_gc;
                    316: static struct evcnt cache_ev_over;
                    317: static struct evcnt cache_ev_under;
                    318: static struct evcnt cache_ev_forced;
                    319:
1.89      rmind     320: static struct namecache *cache_lookup_entry(
1.91      dholland  321:     const struct vnode *, const char *, size_t);
1.73      ad        322: static void cache_thread(void *);
                    323: static void cache_invalidate(struct namecache *);
                    324: static void cache_disassociate(struct namecache *);
                    325: static void cache_reclaim(void);
                    326: static int cache_ctor(void *, void *, int);
                    327: static void cache_dtor(void *, void *);
1.46      yamt      328:
1.104     pooka     329: static struct sysctllog *sysctllog;
                    330: static void sysctl_cache_stat_setup(void);
                    331:
1.108     christos  332: SDT_PROVIDER_DEFINE(vfs);
                    333:
                    334: SDT_PROBE_DEFINE1(vfs, namecache, invalidate, done, "struct vnode *");
                    335: SDT_PROBE_DEFINE1(vfs, namecache, purge, parents, "struct vnode *");
                    336: SDT_PROBE_DEFINE1(vfs, namecache, purge, children, "struct vnode *");
                    337: SDT_PROBE_DEFINE2(vfs, namecache, purge, name, "char *", "size_t");
                    338: SDT_PROBE_DEFINE1(vfs, namecache, purge, vfs, "struct mount *");
                    339: SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *",
                    340:     "char *", "size_t");
                    341: SDT_PROBE_DEFINE3(vfs, namecache, lookup, miss, "struct vnode *",
                    342:     "char *", "size_t");
                    343: SDT_PROBE_DEFINE3(vfs, namecache, lookup, toolong, "struct vnode *",
                    344:     "char *", "size_t");
                    345: SDT_PROBE_DEFINE2(vfs, namecache, revlookup, success, "struct vnode *",
                    346:      "struct vnode *");
                    347: SDT_PROBE_DEFINE2(vfs, namecache, revlookup, fail, "struct vnode *",
                    348:      "int");
                    349: SDT_PROBE_DEFINE2(vfs, namecache, prune, done, "int", "int");
                    350: SDT_PROBE_DEFINE3(vfs, namecache, enter, toolong, "struct vnode *",
                    351:     "char *", "size_t");
                    352: SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *",
                    353:     "char *", "size_t");
                    354:
1.73      ad        355: /*
1.90      dholland  356:  * Compute the hash for an entry.
                    357:  *
                    358:  * (This is for now a wrapper around namei_hash, whose interface is
                    359:  * for the time being slightly inconvenient.)
                    360:  */
                    361: static nchash_t
1.91      dholland  362: cache_hash(const char *name, size_t namelen)
1.90      dholland  363: {
                    364:        const char *endptr;
                    365:
1.91      dholland  366:        endptr = name + namelen;
                    367:        return namei_hash(name, &endptr);
1.90      dholland  368: }
                    369:
                    370: /*
1.73      ad        371:  * Invalidate a cache entry and enqueue it for garbage collection.
1.103     dennis    372:  * The caller needs to hold namecache_lock or a per-cpu lock to hold
                    373:  * off cache_reclaim().
1.73      ad        374:  */
1.46      yamt      375: static void
1.73      ad        376: cache_invalidate(struct namecache *ncp)
1.46      yamt      377: {
1.73      ad        378:        void *head;
1.46      yamt      379:
1.73      ad        380:        KASSERT(mutex_owned(&ncp->nc_lock));
1.46      yamt      381:
1.73      ad        382:        if (ncp->nc_dvp != NULL) {
1.108     christos  383:                SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp,
                    384:                    0, 0, 0, 0);
                    385:
1.73      ad        386:                ncp->nc_vp = NULL;
                    387:                ncp->nc_dvp = NULL;
                    388:                do {
                    389:                        head = cache_gcqueue;
                    390:                        ncp->nc_gcqueue = head;
                    391:                } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
                    392:                atomic_inc_uint(&cache_gcpend);
                    393:        }
                    394: }
1.46      yamt      395:
1.73      ad        396: /*
                    397:  * Disassociate a namecache entry from any vnodes it is attached to,
                    398:  * and remove from the global LRU list.
                    399:  */
                    400: static void
                    401: cache_disassociate(struct namecache *ncp)
                    402: {
                    403:
                    404:        KASSERT(mutex_owned(namecache_lock));
                    405:        KASSERT(ncp->nc_dvp == NULL);
                    406:
                    407:        if (ncp->nc_lru.tqe_prev != NULL) {
                    408:                TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
                    409:                ncp->nc_lru.tqe_prev = NULL;
1.46      yamt      410:        }
                    411:        if (ncp->nc_vhash.le_prev != NULL) {
                    412:                LIST_REMOVE(ncp, nc_vhash);
                    413:                ncp->nc_vhash.le_prev = NULL;
                    414:        }
                    415:        if (ncp->nc_vlist.le_prev != NULL) {
                    416:                LIST_REMOVE(ncp, nc_vlist);
                    417:                ncp->nc_vlist.le_prev = NULL;
                    418:        }
                    419:        if (ncp->nc_dvlist.le_prev != NULL) {
                    420:                LIST_REMOVE(ncp, nc_dvlist);
                    421:                ncp->nc_dvlist.le_prev = NULL;
                    422:        }
                    423: }
                    424:
1.73      ad        425: /*
                    426:  * Lock all CPUs to prevent any cache lookup activity.  Conceptually,
                    427:  * this locks out all "readers".
                    428:  */
1.46      yamt      429: static void
1.73      ad        430: cache_lock_cpus(void)
1.46      yamt      431: {
1.73      ad        432:        CPU_INFO_ITERATOR cii;
                    433:        struct cpu_info *ci;
1.77      ad        434:        struct nchcpu *cpup;
1.46      yamt      435:
1.103     dennis    436:        /*
                    437:         * Lock out all CPUs first, then harvest per-cpu stats.  This
                    438:         * is probably not quite as cache-efficient as doing the lock
                    439:         * and harvest at the same time, but allows cache_stat_sysctl()
                    440:         * to make do with a per-cpu lock.
                    441:         */
1.73      ad        442:        for (CPU_INFO_FOREACH(cii, ci)) {
1.77      ad        443:                cpup = ci->ci_data.cpu_nch;
                    444:                mutex_enter(&cpup->cpu_lock);
1.103     dennis    445:        }
                    446:        for (CPU_INFO_FOREACH(cii, ci)) {
                    447:                cpup = ci->ci_data.cpu_nch;
                    448:                UPDATE(cpup, ncs_goodhits);
                    449:                UPDATE(cpup, ncs_neghits);
                    450:                UPDATE(cpup, ncs_badhits);
                    451:                UPDATE(cpup, ncs_falsehits);
                    452:                UPDATE(cpup, ncs_miss);
                    453:                UPDATE(cpup, ncs_long);
                    454:                UPDATE(cpup, ncs_pass2);
                    455:                UPDATE(cpup, ncs_2passes);
                    456:                UPDATE(cpup, ncs_revhits);
                    457:                UPDATE(cpup, ncs_revmiss);
1.73      ad        458:        }
1.46      yamt      459: }
                    460:
1.73      ad        461: /*
                    462:  * Release all CPU locks.
                    463:  */
                    464: static void
                    465: cache_unlock_cpus(void)
                    466: {
                    467:        CPU_INFO_ITERATOR cii;
                    468:        struct cpu_info *ci;
1.77      ad        469:        struct nchcpu *cpup;
1.73      ad        470:
                    471:        for (CPU_INFO_FOREACH(cii, ci)) {
1.77      ad        472:                cpup = ci->ci_data.cpu_nch;
                    473:                mutex_exit(&cpup->cpu_lock);
1.73      ad        474:        }
                    475: }
                    476:
                    477: /*
1.103     dennis    478:  * Find a single cache entry and return it locked.
                    479:  * The caller needs to hold namecache_lock or a per-cpu lock to hold
                    480:  * off cache_reclaim().
1.73      ad        481:  */
                    482: static struct namecache *
1.91      dholland  483: cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
1.55      yamt      484: {
                    485:        struct nchashhead *ncpp;
                    486:        struct namecache *ncp;
1.90      dholland  487:        nchash_t hash;
1.55      yamt      488:
1.84      yamt      489:        KASSERT(dvp != NULL);
1.91      dholland  490:        hash = cache_hash(name, namelen);
1.90      dholland  491:        ncpp = &nchashtbl[NCHASH2(hash, dvp)];
1.55      yamt      492:
                    493:        LIST_FOREACH(ncp, ncpp, nc_hash) {
1.105     dennis    494:                membar_datadep_consumer();      /* for Alpha... */
1.73      ad        495:                if (ncp->nc_dvp != dvp ||
1.91      dholland  496:                    ncp->nc_nlen != namelen ||
                    497:                    memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
1.73      ad        498:                        continue;
                    499:                mutex_enter(&ncp->nc_lock);
1.77      ad        500:                if (__predict_true(ncp->nc_dvp == dvp)) {
1.73      ad        501:                        ncp->nc_hittime = hardclock_ticks;
1.108     christos  502:                        SDT_PROBE(vfs, namecache, lookup, hit, dvp,
                    503:                            name, namelen, 0, 0);
1.73      ad        504:                        return ncp;
                    505:                }
                    506:                /* Raced: entry has been nullified. */
                    507:                mutex_exit(&ncp->nc_lock);
1.55      yamt      508:        }
                    509:
1.108     christos  510:        SDT_PROBE(vfs, namecache, lookup, miss, dvp,
                    511:            name, namelen, 0, 0);
1.73      ad        512:        return NULL;
1.55      yamt      513: }
                    514:
1.1       cgd       515: /*
                    516:  * Look for a the name in the cache. We don't do this
                    517:  * if the segment name is long, simply so the cache can avoid
                    518:  * holding long names (which would either waste space, or
                    519:  * add greatly to the complexity).
                    520:  *
1.90      dholland  521:  * Lookup is called with DVP pointing to the directory to search,
                    522:  * and CNP providing the name of the entry being sought: cn_nameptr
                    523:  * is the name, cn_namelen is its length, and cn_flags is the flags
                    524:  * word from the namei operation.
                    525:  *
                    526:  * DVP must be locked.
                    527:  *
                    528:  * There are three possible non-error return states:
                    529:  *    1. Nothing was found in the cache. Nothing is known about
                    530:  *       the requested name.
                    531:  *    2. A negative entry was found in the cache, meaning that the
                    532:  *       requested name definitely does not exist.
                    533:  *    3. A positive entry was found in the cache, meaning that the
                    534:  *       requested name does exist and that we are providing the
                    535:  *       vnode.
                    536:  * In these cases the results are:
                    537:  *    1. 0 returned; VN is set to NULL.
                    538:  *    2. 1 returned; VN is set to NULL.
                    539:  *    3. 1 returned; VN is set to the vnode found.
                    540:  *
                    541:  * The additional result argument ISWHT is set to zero, unless a
                    542:  * negative entry is found that was entered as a whiteout, in which
                    543:  * case ISWHT is set to one.
                    544:  *
                    545:  * The ISWHT_RET argument pointer may be null. In this case an
                    546:  * assertion is made that the whiteout flag is not set. File systems
                    547:  * that do not support whiteouts can/should do this.
                    548:  *
                    549:  * Filesystems that do support whiteouts should add ISWHITEOUT to
                    550:  * cnp->cn_flags if ISWHT comes back nonzero.
                    551:  *
                    552:  * When a vnode is returned, it is locked, as per the vnode lookup
                    553:  * locking protocol.
                    554:  *
                    555:  * There is no way for this function to fail, in the sense of
                    556:  * generating an error that requires aborting the namei operation.
                    557:  *
                    558:  * (Prior to October 2012, this function returned an integer status,
                    559:  * and a vnode, and mucked with the flags word in CNP for whiteouts.
                    560:  * The integer status was -1 for "nothing found", ENOENT for "a
                    561:  * negative entry found", 0 for "a positive entry found", and possibly
                    562:  * other errors, and the value of VN might or might not have been set
                    563:  * depending on what error occurred.)
1.1       cgd       564:  */
1.113     riastrad  565: bool
1.91      dholland  566: cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
                    567:             uint32_t nameiop, uint32_t cnflags,
1.90      dholland  568:             int *iswht_ret, struct vnode **vn_ret)
1.1       cgd       569: {
1.23      augustss  570:        struct namecache *ncp;
1.20      jdolecek  571:        struct vnode *vp;
1.77      ad        572:        struct nchcpu *cpup;
1.113     riastrad  573:        int error;
                    574:        bool hit;
1.103     dennis    575:
1.1       cgd       576:
1.90      dholland  577:        /* Establish default result values */
                    578:        if (iswht_ret != NULL) {
                    579:                *iswht_ret = 0;
                    580:        }
                    581:        *vn_ret = NULL;
                    582:
1.77      ad        583:        if (__predict_false(!doingcache)) {
1.113     riastrad  584:                return false;
1.8       cgd       585:        }
1.39      pk        586:
1.77      ad        587:        cpup = curcpu()->ci_data.cpu_nch;
1.102     dennis    588:        mutex_enter(&cpup->cpu_lock);
1.121     christos  589:        if (__predict_false(namelen > USHRT_MAX)) {
1.108     christos  590:                SDT_PROBE(vfs, namecache, lookup, toolong, dvp,
                    591:                    name, namelen, 0, 0);
1.103     dennis    592:                COUNT(cpup, ncs_long);
1.77      ad        593:                mutex_exit(&cpup->cpu_lock);
1.90      dholland  594:                /* found nothing */
1.113     riastrad  595:                return false;
1.1       cgd       596:        }
1.103     dennis    597:
1.91      dholland  598:        ncp = cache_lookup_entry(dvp, name, namelen);
1.77      ad        599:        if (__predict_false(ncp == NULL)) {
1.103     dennis    600:                COUNT(cpup, ncs_miss);
1.77      ad        601:                mutex_exit(&cpup->cpu_lock);
1.90      dholland  602:                /* found nothing */
1.113     riastrad  603:                return false;
1.1       cgd       604:        }
1.91      dholland  605:        if ((cnflags & MAKEENTRY) == 0) {
1.103     dennis    606:                COUNT(cpup, ncs_badhits);
1.77      ad        607:                /*
                    608:                 * Last component and we are renaming or deleting,
                    609:                 * the cache entry is invalid, or otherwise don't
                    610:                 * want cache entry to exist.
                    611:                 */
                    612:                cache_invalidate(ncp);
                    613:                mutex_exit(&ncp->nc_lock);
1.102     dennis    614:                mutex_exit(&cpup->cpu_lock);
1.90      dholland  615:                /* found nothing */
1.113     riastrad  616:                return false;
1.90      dholland  617:        }
                    618:        if (ncp->nc_vp == NULL) {
                    619:                if (iswht_ret != NULL) {
                    620:                        /*
                    621:                         * Restore the ISWHITEOUT flag saved earlier.
                    622:                         */
                    623:                        KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
                    624:                        *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
                    625:                } else {
                    626:                        KASSERT(ncp->nc_flags == 0);
                    627:                }
                    628:
1.91      dholland  629:                if (__predict_true(nameiop != CREATE ||
                    630:                    (cnflags & ISLASTCN) == 0)) {
1.103     dennis    631:                        COUNT(cpup, ncs_neghits);
1.90      dholland  632:                        /* found neg entry; vn is already null from above */
1.113     riastrad  633:                        hit = true;
1.20      jdolecek  634:                } else {
1.103     dennis    635:                        COUNT(cpup, ncs_badhits);
1.77      ad        636:                        /*
1.109     dholland  637:                         * Last component and we are preparing to create
                    638:                         * the named object, so flush the negative cache
                    639:                         * entry.
1.77      ad        640:                         */
                    641:                        cache_invalidate(ncp);
1.90      dholland  642:                        /* found nothing */
1.113     riastrad  643:                        hit = false;
1.20      jdolecek  644:                }
1.103     dennis    645:                mutex_exit(&ncp->nc_lock);
                    646:                mutex_exit(&cpup->cpu_lock);
1.113     riastrad  647:                return hit;
1.20      jdolecek  648:        }
                    649:
                    650:        vp = ncp->nc_vp;
1.92      hannken   651:        mutex_enter(vp->v_interlock);
                    652:        mutex_exit(&ncp->nc_lock);
1.102     dennis    653:        mutex_exit(&cpup->cpu_lock);
1.103     dennis    654:
                    655:        /*
1.111     hannken   656:         * Unlocked except for the vnode interlock.  Call vcache_tryvget().
1.103     dennis    657:         */
1.111     hannken   658:        error = vcache_tryvget(vp);
1.92      hannken   659:        if (error) {
                    660:                KASSERT(error == EBUSY);
                    661:                /*
                    662:                 * This vnode is being cleaned out.
                    663:                 * XXX badhits?
                    664:                 */
1.103     dennis    665:                COUNT_UNL(cpup, ncs_falsehits);
1.92      hannken   666:                /* found nothing */
1.113     riastrad  667:                return false;
1.77      ad        668:        }
1.101     christos  669:
1.103     dennis    670:        COUNT_UNL(cpup, ncs_goodhits);
1.101     christos  671:        /* found it */
                    672:        *vn_ret = vp;
1.113     riastrad  673:        return true;
1.1       cgd       674: }
                    675:
1.103     dennis    676:
                    677: /*
                    678:  * Cut-'n-pasted version of the above without the nameiop argument.
                    679:  */
1.113     riastrad  680: bool
1.91      dholland  681: cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen,
                    682:                 uint32_t cnflags,
1.90      dholland  683:                 int *iswht_ret, struct vnode **vn_ret)
1.61      yamt      684: {
                    685:        struct namecache *ncp;
                    686:        struct vnode *vp;
1.77      ad        687:        struct nchcpu *cpup;
1.101     christos  688:        int error;
1.61      yamt      689:
1.90      dholland  690:        /* Establish default results. */
                    691:        if (iswht_ret != NULL) {
                    692:                *iswht_ret = 0;
                    693:        }
                    694:        *vn_ret = NULL;
                    695:
1.77      ad        696:        if (__predict_false(!doingcache)) {
1.90      dholland  697:                /* found nothing */
1.113     riastrad  698:                return false;
1.61      yamt      699:        }
                    700:
1.77      ad        701:        cpup = curcpu()->ci_data.cpu_nch;
1.102     dennis    702:        mutex_enter(&cpup->cpu_lock);
1.121     christos  703:        if (__predict_false(namelen > USHRT_MAX)) {
1.103     dennis    704:                COUNT(cpup, ncs_long);
1.77      ad        705:                mutex_exit(&cpup->cpu_lock);
1.90      dholland  706:                /* found nothing */
1.113     riastrad  707:                return false;
1.61      yamt      708:        }
1.91      dholland  709:        ncp = cache_lookup_entry(dvp, name, namelen);
1.77      ad        710:        if (__predict_false(ncp == NULL)) {
1.103     dennis    711:                COUNT(cpup, ncs_miss);
1.77      ad        712:                mutex_exit(&cpup->cpu_lock);
1.90      dholland  713:                /* found nothing */
1.113     riastrad  714:                return false;
1.61      yamt      715:        }
                    716:        vp = ncp->nc_vp;
                    717:        if (vp == NULL) {
                    718:                /*
                    719:                 * Restore the ISWHITEOUT flag saved earlier.
                    720:                 */
1.90      dholland  721:                if (iswht_ret != NULL) {
                    722:                        KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
                    723:                        /*cnp->cn_flags |= ncp->nc_flags;*/
                    724:                        *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
                    725:                }
1.103     dennis    726:                COUNT(cpup, ncs_neghits);
1.102     dennis    727:                mutex_exit(&ncp->nc_lock);
1.101     christos  728:                mutex_exit(&cpup->cpu_lock);
1.90      dholland  729:                /* found negative entry; vn is already null from above */
1.113     riastrad  730:                return true;
1.61      yamt      731:        }
1.92      hannken   732:        mutex_enter(vp->v_interlock);
                    733:        mutex_exit(&ncp->nc_lock);
1.102     dennis    734:        mutex_exit(&cpup->cpu_lock);
1.103     dennis    735:
                    736:        /*
1.111     hannken   737:         * Unlocked except for the vnode interlock.  Call vcache_tryvget().
1.103     dennis    738:         */
1.111     hannken   739:        error = vcache_tryvget(vp);
1.92      hannken   740:        if (error) {
                    741:                KASSERT(error == EBUSY);
                    742:                /*
                    743:                 * This vnode is being cleaned out.
                    744:                 * XXX badhits?
                    745:                 */
1.103     dennis    746:                COUNT_UNL(cpup, ncs_falsehits);
1.92      hannken   747:                /* found nothing */
1.113     riastrad  748:                return false;
1.61      yamt      749:        }
1.101     christos  750:
1.103     dennis    751:        COUNT_UNL(cpup, ncs_goodhits); /* XXX can be "badhits" */
1.101     christos  752:        /* found it */
                    753:        *vn_ret = vp;
1.113     riastrad  754:        return true;
1.61      yamt      755: }
                    756:
1.1       cgd       757: /*
1.19      sommerfe  758:  * Scan cache looking for name of directory entry pointing at vp.
                    759:  *
1.86      hannken   760:  * If the lookup succeeds the vnode is referenced and stored in dvpp.
1.19      sommerfe  761:  *
                    762:  * If bufp is non-NULL, also place the name in the buffer which starts
                    763:  * at bufp, immediately before *bpp, and move bpp backwards to point
                    764:  * at the start of it.  (Yes, this is a little baroque, but it's done
                    765:  * this way to cater to the whims of getcwd).
                    766:  *
                    767:  * Returns 0 on success, -1 on cache miss, positive errno on failure.
                    768:  */
                    769: int
1.34      enami     770: cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
1.19      sommerfe  771: {
                    772:        struct namecache *ncp;
                    773:        struct vnode *dvp;
1.103     dennis    774:        struct ncvhashhead *nvcpp;
1.95      joerg     775:        struct nchcpu *cpup;
1.34      enami     776:        char *bp;
1.86      hannken   777:        int error, nlen;
1.34      enami     778:
1.19      sommerfe  779:        if (!doingcache)
                    780:                goto out;
                    781:
1.30      chs       782:        nvcpp = &ncvhashtbl[NCVHASH(vp)];
1.103     dennis    783:
                    784:        /*
                    785:         * We increment counters in the local CPU's per-cpu stats.
                    786:         * We don't take the per-cpu lock, however, since this function
                    787:         * is the only place these counters are incremented so no one
                    788:         * will be racing with us to increment them.
                    789:         */
1.95      joerg     790:        cpup = curcpu()->ci_data.cpu_nch;
1.73      ad        791:        mutex_enter(namecache_lock);
1.27      chs       792:        LIST_FOREACH(ncp, nvcpp, nc_vhash) {
1.73      ad        793:                mutex_enter(&ncp->nc_lock);
1.34      enami     794:                if (ncp->nc_vp == vp &&
                    795:                    (dvp = ncp->nc_dvp) != NULL &&
1.47      yamt      796:                    dvp != vp) {                /* avoid pesky . entries.. */
1.34      enami     797:
1.19      sommerfe  798: #ifdef DIAGNOSTIC
1.34      enami     799:                        if (ncp->nc_nlen == 1 &&
                    800:                            ncp->nc_name[0] == '.')
1.19      sommerfe  801:                                panic("cache_revlookup: found entry for .");
                    802:
1.34      enami     803:                        if (ncp->nc_nlen == 2 &&
                    804:                            ncp->nc_name[0] == '.' &&
                    805:                            ncp->nc_name[1] == '.')
1.19      sommerfe  806:                                panic("cache_revlookup: found entry for ..");
                    807: #endif
1.103     dennis    808:                        COUNT(cpup, ncs_revhits);
1.86      hannken   809:                        nlen = ncp->nc_nlen;
1.19      sommerfe  810:
                    811:                        if (bufp) {
                    812:                                bp = *bpp;
1.86      hannken   813:                                bp -= nlen;
1.19      sommerfe  814:                                if (bp <= bufp) {
1.34      enami     815:                                        *dvpp = NULL;
1.73      ad        816:                                        mutex_exit(&ncp->nc_lock);
                    817:                                        mutex_exit(namecache_lock);
1.108     christos  818:                                        SDT_PROBE(vfs, namecache, revlookup,
                    819:                                            fail, vp, ERANGE, 0, 0, 0);
1.34      enami     820:                                        return (ERANGE);
1.19      sommerfe  821:                                }
1.86      hannken   822:                                memcpy(bp, ncp->nc_name, nlen);
1.19      sommerfe  823:                                *bpp = bp;
                    824:                        }
1.34      enami     825:
1.92      hannken   826:                        mutex_enter(dvp->v_interlock);
1.110     msaitoh   827:                        mutex_exit(&ncp->nc_lock);
1.92      hannken   828:                        mutex_exit(namecache_lock);
1.111     hannken   829:                        error = vcache_tryvget(dvp);
1.92      hannken   830:                        if (error) {
                    831:                                KASSERT(error == EBUSY);
                    832:                                if (bufp)
                    833:                                        (*bpp) += nlen;
                    834:                                *dvpp = NULL;
1.108     christos  835:                                SDT_PROBE(vfs, namecache, revlookup, fail, vp,
                    836:                                    error, 0, 0, 0);
1.92      hannken   837:                                return -1;
1.86      hannken   838:                        }
1.19      sommerfe  839:                        *dvpp = dvp;
1.108     christos  840:                        SDT_PROBE(vfs, namecache, revlookup, success, vp, dvp,
                    841:                            0, 0, 0);
1.34      enami     842:                        return (0);
1.19      sommerfe  843:                }
1.73      ad        844:                mutex_exit(&ncp->nc_lock);
1.19      sommerfe  845:        }
1.103     dennis    846:        COUNT(cpup, ncs_revmiss);
1.73      ad        847:        mutex_exit(namecache_lock);
1.19      sommerfe  848:  out:
1.34      enami     849:        *dvpp = NULL;
                    850:        return (-1);
1.19      sommerfe  851: }
                    852:
                    853: /*
1.1       cgd       854:  * Add an entry to the cache
                    855:  */
1.13      christos  856: void
1.91      dholland  857: cache_enter(struct vnode *dvp, struct vnode *vp,
                    858:            const char *name, size_t namelen, uint32_t cnflags)
1.1       cgd       859: {
1.23      augustss  860:        struct namecache *ncp;
1.59      yamt      861:        struct namecache *oncp;
1.23      augustss  862:        struct nchashhead *ncpp;
                    863:        struct ncvhashhead *nvcpp;
1.90      dholland  864:        nchash_t hash;
1.1       cgd       865:
1.89      rmind     866:        /* First, check whether we can/should add a cache entry. */
1.91      dholland  867:        if ((cnflags & MAKEENTRY) == 0 ||
1.121     christos  868:            __predict_false(namelen > USHRT_MAX || !doingcache)) {
1.108     christos  869:                SDT_PROBE(vfs, namecache, enter, toolong, vp, name, namelen,
                    870:                    0, 0);
1.1       cgd       871:                return;
1.89      rmind     872:        }
1.58      yamt      873:
1.108     christos  874:        SDT_PROBE(vfs, namecache, enter, done, vp, name, namelen, 0, 0);
1.73      ad        875:        if (numcache > desiredvnodes) {
                    876:                mutex_enter(namecache_lock);
                    877:                cache_ev_forced.ev_count++;
                    878:                cache_reclaim();
                    879:                mutex_exit(namecache_lock);
1.39      pk        880:        }
1.57      pk        881:
1.121     christos  882:        if (namelen > NCHNAMLEN) {
                    883:                ncp = kmem_alloc(sizeof(*ncp) + namelen, KM_SLEEP);
                    884:                cache_ctor(NULL, ncp, 0);
                    885:        } else
1.122     maya      886:                ncp = pool_cache_get(namecache_cache, PR_WAITOK);
                    887:
1.73      ad        888:        mutex_enter(namecache_lock);
                    889:        numcache++;
                    890:
1.59      yamt      891:        /*
                    892:         * Concurrent lookups in the same directory may race for a
                    893:         * cache entry.  if there's a duplicated entry, free it.
                    894:         */
1.91      dholland  895:        oncp = cache_lookup_entry(dvp, name, namelen);
1.59      yamt      896:        if (oncp) {
1.73      ad        897:                cache_invalidate(oncp);
                    898:                mutex_exit(&oncp->nc_lock);
1.59      yamt      899:        }
                    900:
1.34      enami     901:        /* Grab the vnode we just found. */
1.73      ad        902:        mutex_enter(&ncp->nc_lock);
1.5       mycroft   903:        ncp->nc_vp = vp;
1.73      ad        904:        ncp->nc_flags = 0;
                    905:        ncp->nc_hittime = 0;
                    906:        ncp->nc_gcqueue = NULL;
1.47      yamt      907:        if (vp == NULL) {
1.11      mycroft   908:                /*
                    909:                 * For negative hits, save the ISWHITEOUT flag so we can
                    910:                 * restore it later when the cache entry is used again.
                    911:                 */
1.91      dholland  912:                ncp->nc_flags = cnflags & ISWHITEOUT;
1.11      mycroft   913:        }
1.89      rmind     914:
1.34      enami     915:        /* Fill in cache info. */
1.5       mycroft   916:        ncp->nc_dvp = dvp;
1.112     hannken   917:        LIST_INSERT_HEAD(&VNODE_TO_VIMPL(dvp)->vi_dnclist, ncp, nc_dvlist);
1.46      yamt      918:        if (vp)
1.112     hannken   919:                LIST_INSERT_HEAD(&VNODE_TO_VIMPL(vp)->vi_nclist, ncp, nc_vlist);
1.73      ad        920:        else {
                    921:                ncp->nc_vlist.le_prev = NULL;
                    922:                ncp->nc_vlist.le_next = NULL;
                    923:        }
1.121     christos  924:        KASSERT(namelen <= USHRT_MAX);
1.91      dholland  925:        ncp->nc_nlen = namelen;
                    926:        memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen);
1.73      ad        927:        TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
1.91      dholland  928:        hash = cache_hash(name, namelen);
1.90      dholland  929:        ncpp = &nchashtbl[NCHASH2(hash, dvp)];
1.73      ad        930:
                    931:        /*
                    932:         * Flush updates before making visible in table.  No need for a
                    933:         * memory barrier on the other side: to see modifications the
                    934:         * list must be followed, meaning a dependent pointer load.
1.74      ad        935:         * The below is LIST_INSERT_HEAD() inlined, with the memory
                    936:         * barrier included in the correct place.
1.73      ad        937:         */
1.74      ad        938:        if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
                    939:                ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
                    940:        ncp->nc_hash.le_prev = &ncpp->lh_first;
1.73      ad        941:        membar_producer();
1.74      ad        942:        ncpp->lh_first = ncp;
1.19      sommerfe  943:
1.34      enami     944:        ncp->nc_vhash.le_prev = NULL;
                    945:        ncp->nc_vhash.le_next = NULL;
                    946:
1.19      sommerfe  947:        /*
                    948:         * Create reverse-cache entries (used in getcwd) for directories.
1.66      christos  949:         * (and in linux procfs exe node)
1.19      sommerfe  950:         */
1.33      enami     951:        if (vp != NULL &&
                    952:            vp != dvp &&
1.29      fvdl      953: #ifndef NAMECACHE_ENTER_REVERSE
1.33      enami     954:            vp->v_type == VDIR &&
1.29      fvdl      955: #endif
1.33      enami     956:            (ncp->nc_nlen > 2 ||
                    957:            (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
                    958:            (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
1.30      chs       959:                nvcpp = &ncvhashtbl[NCVHASH(vp)];
1.19      sommerfe  960:                LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
                    961:        }
1.73      ad        962:        mutex_exit(&ncp->nc_lock);
                    963:        mutex_exit(namecache_lock);
1.1       cgd       964: }
                    965:
                    966: /*
                    967:  * Name cache initialization, from vfs_init() when we are booting
                    968:  */
1.13      christos  969: void
1.34      enami     970: nchinit(void)
1.1       cgd       971: {
1.73      ad        972:        int error;
1.1       cgd       973:
1.89      rmind     974:        TAILQ_INIT(&nclruhead);
1.121     christos  975:        namecache_cache = pool_cache_init(sizeof(struct namecache) + NCHNAMLEN,
1.73      ad        976:            coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
                    977:            cache_dtor, NULL);
1.71      ad        978:        KASSERT(namecache_cache != NULL);
                    979:
1.73      ad        980:        namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
                    981:
1.76      ad        982:        nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
1.26      ad        983:        ncvhashtbl =
1.29      fvdl      984: #ifdef NAMECACHE_ENTER_REVERSE
1.76      ad        985:            hashinit(desiredvnodes, HASH_LIST, true, &ncvhash);
1.29      fvdl      986: #else
1.76      ad        987:            hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash);
1.29      fvdl      988: #endif
1.73      ad        989:
                    990:        error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
                    991:            NULL, NULL, "cachegc");
                    992:        if (error != 0)
                    993:                panic("nchinit %d", error);
                    994:
                    995:        evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
                    996:           "namecache", "entries scanned");
                    997:        evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
                    998:           "namecache", "entries collected");
                    999:        evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
                   1000:           "namecache", "over scan target");
                   1001:        evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
                   1002:           "namecache", "under scan target");
                   1003:        evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
                   1004:           "namecache", "forced reclaims");
1.104     pooka    1005:
                   1006:        sysctl_cache_stat_setup();
1.73      ad       1007: }
                   1008:
                   1009: static int
                   1010: cache_ctor(void *arg, void *obj, int flag)
                   1011: {
                   1012:        struct namecache *ncp;
                   1013:
                   1014:        ncp = obj;
                   1015:        mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
                   1016:
                   1017:        return 0;
                   1018: }
                   1019:
                   1020: static void
                   1021: cache_dtor(void *arg, void *obj)
                   1022: {
                   1023:        struct namecache *ncp;
                   1024:
                   1025:        ncp = obj;
                   1026:        mutex_destroy(&ncp->nc_lock);
                   1027: }
                   1028:
                   1029: /*
                   1030:  * Called once for each CPU in the system as attached.
                   1031:  */
                   1032: void
                   1033: cache_cpu_init(struct cpu_info *ci)
                   1034: {
1.77      ad       1035:        struct nchcpu *cpup;
                   1036:        size_t sz;
1.73      ad       1037:
1.77      ad       1038:        sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit;
                   1039:        cpup = kmem_zalloc(sz, KM_SLEEP);
                   1040:        cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit);
                   1041:        mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE);
                   1042:        ci->ci_data.cpu_nch = cpup;
1.30      chs      1043: }
                   1044:
                   1045: /*
                   1046:  * Name cache reinitialization, for when the maximum number of vnodes increases.
                   1047:  */
                   1048: void
1.34      enami    1049: nchreinit(void)
1.30      chs      1050: {
                   1051:        struct namecache *ncp;
                   1052:        struct nchashhead *oldhash1, *hash1;
                   1053:        struct ncvhashhead *oldhash2, *hash2;
1.36      thorpej  1054:        u_long i, oldmask1, oldmask2, mask1, mask2;
1.30      chs      1055:
1.76      ad       1056:        hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1);
1.30      chs      1057:        hash2 =
                   1058: #ifdef NAMECACHE_ENTER_REVERSE
1.76      ad       1059:            hashinit(desiredvnodes, HASH_LIST, true, &mask2);
1.30      chs      1060: #else
1.76      ad       1061:            hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
1.30      chs      1062: #endif
1.73      ad       1063:        mutex_enter(namecache_lock);
                   1064:        cache_lock_cpus();
1.30      chs      1065:        oldhash1 = nchashtbl;
                   1066:        oldmask1 = nchash;
                   1067:        nchashtbl = hash1;
                   1068:        nchash = mask1;
                   1069:        oldhash2 = ncvhashtbl;
                   1070:        oldmask2 = ncvhash;
                   1071:        ncvhashtbl = hash2;
                   1072:        ncvhash = mask2;
                   1073:        for (i = 0; i <= oldmask1; i++) {
                   1074:                while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
                   1075:                        LIST_REMOVE(ncp, nc_hash);
                   1076:                        ncp->nc_hash.le_prev = NULL;
                   1077:                }
                   1078:        }
                   1079:        for (i = 0; i <= oldmask2; i++) {
                   1080:                while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
                   1081:                        LIST_REMOVE(ncp, nc_vhash);
                   1082:                        ncp->nc_vhash.le_prev = NULL;
                   1083:                }
                   1084:        }
1.73      ad       1085:        cache_unlock_cpus();
                   1086:        mutex_exit(namecache_lock);
1.76      ad       1087:        hashdone(oldhash1, HASH_LIST, oldmask1);
                   1088:        hashdone(oldhash2, HASH_LIST, oldmask2);
1.1       cgd      1089: }
                   1090:
                   1091: /*
                   1092:  * Cache flush, a particular vnode; called when a vnode is renamed to
                   1093:  * hide entries that would now be invalid
                   1094:  */
1.13      christos 1095: void
1.91      dholland 1096: cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
1.1       cgd      1097: {
1.46      yamt     1098:        struct namecache *ncp, *ncnext;
1.1       cgd      1099:
1.73      ad       1100:        mutex_enter(namecache_lock);
1.55      yamt     1101:        if (flags & PURGE_PARENTS) {
1.108     christos 1102:                SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0);
                   1103:
1.112     hannken  1104:                for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_nclist);
                   1105:                    ncp != NULL; ncp = ncnext) {
1.55      yamt     1106:                        ncnext = LIST_NEXT(ncp, nc_vlist);
1.73      ad       1107:                        mutex_enter(&ncp->nc_lock);
                   1108:                        cache_invalidate(ncp);
                   1109:                        mutex_exit(&ncp->nc_lock);
                   1110:                        cache_disassociate(ncp);
1.55      yamt     1111:                }
                   1112:        }
                   1113:        if (flags & PURGE_CHILDREN) {
1.108     christos 1114:                SDT_PROBE(vfs, namecache, purge, children, vp, 0, 0, 0, 0);
1.112     hannken  1115:                for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_dnclist);
                   1116:                    ncp != NULL; ncp = ncnext) {
1.55      yamt     1117:                        ncnext = LIST_NEXT(ncp, nc_dvlist);
1.73      ad       1118:                        mutex_enter(&ncp->nc_lock);
                   1119:                        cache_invalidate(ncp);
                   1120:                        mutex_exit(&ncp->nc_lock);
                   1121:                        cache_disassociate(ncp);
1.55      yamt     1122:                }
1.46      yamt     1123:        }
1.91      dholland 1124:        if (name != NULL) {
1.108     christos 1125:                SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0);
1.91      dholland 1126:                ncp = cache_lookup_entry(vp, name, namelen);
1.55      yamt     1127:                if (ncp) {
1.73      ad       1128:                        cache_invalidate(ncp);
1.83      yamt     1129:                        mutex_exit(&ncp->nc_lock);
1.73      ad       1130:                        cache_disassociate(ncp);
1.55      yamt     1131:                }
1.46      yamt     1132:        }
1.73      ad       1133:        mutex_exit(namecache_lock);
1.1       cgd      1134: }
                   1135:
                   1136: /*
                   1137:  * Cache flush, a whole filesystem; called when filesys is umounted to
1.27      chs      1138:  * remove entries that would now be invalid.
1.1       cgd      1139:  */
1.13      christos 1140: void
1.34      enami    1141: cache_purgevfs(struct mount *mp)
1.1       cgd      1142: {
1.23      augustss 1143:        struct namecache *ncp, *nxtcp;
1.1       cgd      1144:
1.108     christos 1145:        SDT_PROBE(vfs, namecache, purge, vfs, mp, 0, 0, 0, 0);
1.73      ad       1146:        mutex_enter(namecache_lock);
                   1147:        for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
                   1148:                nxtcp = TAILQ_NEXT(ncp, nc_lru);
                   1149:                mutex_enter(&ncp->nc_lock);
                   1150:                if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
                   1151:                        /* Free the resources we had. */
                   1152:                        cache_invalidate(ncp);
                   1153:                        cache_disassociate(ncp);
                   1154:                }
                   1155:                mutex_exit(&ncp->nc_lock);
                   1156:        }
                   1157:        cache_reclaim();
                   1158:        mutex_exit(namecache_lock);
                   1159: }
                   1160:
                   1161: /*
1.116     riastrad 1162:  * Scan global list invalidating entries until we meet a preset target.
1.73      ad       1163:  * Prefer to invalidate entries that have not scored a hit within
                   1164:  * cache_hottime seconds.  We sort the LRU list only for this routine's
                   1165:  * benefit.
                   1166:  */
                   1167: static void
                   1168: cache_prune(int incache, int target)
                   1169: {
                   1170:        struct namecache *ncp, *nxtcp, *sentinel;
                   1171:        int items, recent, tryharder;
                   1172:
                   1173:        KASSERT(mutex_owned(namecache_lock));
                   1174:
1.108     christos 1175:        SDT_PROBE(vfs, namecache, prune, done, incache, target, 0, 0, 0);
1.73      ad       1176:        items = 0;
                   1177:        tryharder = 0;
                   1178:        recent = hardclock_ticks - hz * cache_hottime;
                   1179:        sentinel = NULL;
1.27      chs      1180:        for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
1.73      ad       1181:                if (incache <= target)
                   1182:                        break;
                   1183:                items++;
1.27      chs      1184:                nxtcp = TAILQ_NEXT(ncp, nc_lru);
1.73      ad       1185:                if (ncp == sentinel) {
                   1186:                        /*
                   1187:                         * If we looped back on ourself, then ignore
                   1188:                         * recent entries and purge whatever we find.
                   1189:                         */
                   1190:                        tryharder = 1;
1.5       mycroft  1191:                }
1.93      hannken  1192:                if (ncp->nc_dvp == NULL)
                   1193:                        continue;
1.81      yamt     1194:                if (!tryharder && (ncp->nc_hittime - recent) > 0) {
1.73      ad       1195:                        if (sentinel == NULL)
                   1196:                                sentinel = ncp;
                   1197:                        TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
                   1198:                        TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
                   1199:                        continue;
                   1200:                }
                   1201:                mutex_enter(&ncp->nc_lock);
                   1202:                if (ncp->nc_dvp != NULL) {
                   1203:                        cache_invalidate(ncp);
                   1204:                        cache_disassociate(ncp);
                   1205:                        incache--;
                   1206:                }
                   1207:                mutex_exit(&ncp->nc_lock);
                   1208:        }
                   1209:        cache_ev_scan.ev_count += items;
                   1210: }
                   1211:
                   1212: /*
                   1213:  * Collect dead cache entries from all CPUs and garbage collect.
                   1214:  */
                   1215: static void
                   1216: cache_reclaim(void)
                   1217: {
                   1218:        struct namecache *ncp, *next;
                   1219:        int items;
                   1220:
                   1221:        KASSERT(mutex_owned(namecache_lock));
                   1222:
                   1223:        /*
                   1224:         * If the number of extant entries not awaiting garbage collection
                   1225:         * exceeds the high water mark, then reclaim stale entries until we
                   1226:         * reach our low water mark.
                   1227:         */
                   1228:        items = numcache - cache_gcpend;
                   1229:        if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
                   1230:                cache_prune(items, (int)((uint64_t)desiredvnodes *
                   1231:                    cache_lowat / 100));
                   1232:                cache_ev_over.ev_count++;
                   1233:        } else
                   1234:                cache_ev_under.ev_count++;
                   1235:
                   1236:        /*
                   1237:         * Stop forward lookup activity on all CPUs and garbage collect dead
                   1238:         * entries.
                   1239:         */
                   1240:        cache_lock_cpus();
                   1241:        ncp = cache_gcqueue;
                   1242:        cache_gcqueue = NULL;
                   1243:        items = cache_gcpend;
                   1244:        cache_gcpend = 0;
                   1245:        while (ncp != NULL) {
                   1246:                next = ncp->nc_gcqueue;
                   1247:                cache_disassociate(ncp);
                   1248:                KASSERT(ncp->nc_dvp == NULL);
                   1249:                if (ncp->nc_hash.le_prev != NULL) {
                   1250:                        LIST_REMOVE(ncp, nc_hash);
                   1251:                        ncp->nc_hash.le_prev = NULL;
                   1252:                }
1.121     christos 1253:                if (ncp->nc_nlen > NCHNAMLEN) {
                   1254:                        cache_dtor(NULL, ncp);
                   1255:                        kmem_free(ncp, sizeof(*ncp) + ncp->nc_nlen);
                   1256:                } else
1.123   ! maya     1257:                        pool_cache_put(namecache_cache, ncp);
1.73      ad       1258:                ncp = next;
                   1259:        }
                   1260:        cache_unlock_cpus();
                   1261:        numcache -= items;
                   1262:        cache_ev_gc.ev_count += items;
                   1263: }
                   1264:
                   1265: /*
                   1266:  * Cache maintainence thread, awakening once per second to:
                   1267:  *
                   1268:  * => keep number of entries below the high water mark
                   1269:  * => sort pseudo-LRU list
                   1270:  * => garbage collect dead entries
                   1271:  */
                   1272: static void
                   1273: cache_thread(void *arg)
                   1274: {
                   1275:
                   1276:        mutex_enter(namecache_lock);
                   1277:        for (;;) {
                   1278:                cache_reclaim();
                   1279:                kpause("cachegc", false, hz, namecache_lock);
1.1       cgd      1280:        }
                   1281: }
1.19      sommerfe 1282:
1.28      chs      1283: #ifdef DDB
                   1284: void
                   1285: namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
                   1286: {
                   1287:        struct vnode *dvp = NULL;
                   1288:        struct namecache *ncp;
                   1289:
                   1290:        TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
1.73      ad       1291:                if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
1.28      chs      1292:                        (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
                   1293:                        dvp = ncp->nc_dvp;
                   1294:                }
                   1295:        }
                   1296:        if (dvp == NULL) {
                   1297:                (*pr)("name not found\n");
                   1298:                return;
                   1299:        }
                   1300:        vp = dvp;
                   1301:        TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
1.47      yamt     1302:                if (ncp->nc_vp == vp) {
1.28      chs      1303:                        (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
                   1304:                }
                   1305:        }
                   1306: }
                   1307: #endif
1.95      joerg    1308:
                   1309: void
                   1310: namecache_count_pass2(void)
                   1311: {
                   1312:        struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
                   1313:
1.103     dennis   1314:        COUNT_UNL(cpup, ncs_pass2);
1.95      joerg    1315: }
                   1316:
                   1317: void
                   1318: namecache_count_2passes(void)
                   1319: {
                   1320:        struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
                   1321:
1.103     dennis   1322:        COUNT_UNL(cpup, ncs_2passes);
1.95      joerg    1323: }
1.97      joerg    1324:
1.103     dennis   1325: /*
                   1326:  * Fetch the current values of the stats.  We return the most
                   1327:  * recent values harvested into nchstats by cache_reclaim(), which
                   1328:  * will be less than a second old.
                   1329:  */
1.97      joerg    1330: static int
                   1331: cache_stat_sysctl(SYSCTLFN_ARGS)
                   1332: {
1.103     dennis   1333:        struct nchstats stats;
                   1334:        struct nchcpu *my_cpup;
                   1335: #ifdef CACHE_STATS_CURRENT
                   1336:        CPU_INFO_ITERATOR cii;
                   1337:        struct cpu_info *ci;
                   1338: #endif /* CACHE_STATS_CURRENT */
1.97      joerg    1339:
                   1340:        if (oldp == NULL) {
                   1341:                *oldlenp = sizeof(stats);
                   1342:                return 0;
                   1343:        }
                   1344:
                   1345:        if (*oldlenp < sizeof(stats)) {
                   1346:                *oldlenp = 0;
                   1347:                return 0;
                   1348:        }
                   1349:
1.103     dennis   1350:        /*
                   1351:         * Take this CPU's per-cpu lock to hold off cache_reclaim()
                   1352:         * from doing a stats update while doing minimal damage to
                   1353:         * concurrent operations.
                   1354:         */
                   1355:        sysctl_unlock();
                   1356:        my_cpup = curcpu()->ci_data.cpu_nch;
                   1357:        mutex_enter(&my_cpup->cpu_lock);
                   1358:        stats = nchstats;
                   1359: #ifdef CACHE_STATS_CURRENT
                   1360:        for (CPU_INFO_FOREACH(cii, ci)) {
                   1361:                struct nchcpu *cpup = ci->ci_data.cpu_nch;
1.97      joerg    1362:
1.103     dennis   1363:                ADD(stats, cpup, ncs_goodhits);
                   1364:                ADD(stats, cpup, ncs_neghits);
                   1365:                ADD(stats, cpup, ncs_badhits);
                   1366:                ADD(stats, cpup, ncs_falsehits);
                   1367:                ADD(stats, cpup, ncs_miss);
                   1368:                ADD(stats, cpup, ncs_long);
                   1369:                ADD(stats, cpup, ncs_pass2);
                   1370:                ADD(stats, cpup, ncs_2passes);
                   1371:                ADD(stats, cpup, ncs_revhits);
                   1372:                ADD(stats, cpup, ncs_revmiss);
                   1373:        }
                   1374: #endif /* CACHE_STATS_CURRENT */
                   1375:        mutex_exit(&my_cpup->cpu_lock);
1.97      joerg    1376:        sysctl_relock();
                   1377:
                   1378:        *oldlenp = sizeof(stats);
                   1379:        return sysctl_copyout(l, &stats, oldp, sizeof(stats));
                   1380: }
                   1381:
1.104     pooka    1382: static void
                   1383: sysctl_cache_stat_setup(void)
1.97      joerg    1384: {
1.104     pooka    1385:
                   1386:        KASSERT(sysctllog == NULL);
                   1387:        sysctl_createv(&sysctllog, 0, NULL, NULL,
1.97      joerg    1388:                       CTLFLAG_PERMANENT,
                   1389:                       CTLTYPE_STRUCT, "namecache_stats",
                   1390:                       SYSCTL_DESCR("namecache statistics"),
                   1391:                       cache_stat_sysctl, 0, NULL, 0,
                   1392:                       CTL_VFS, CTL_CREATE, CTL_EOL);
                   1393: }

CVSweb <webmaster@jp.NetBSD.org>