Annotation of src/sys/kern/vfs_cache.c, Revision 1.123
1.123 ! maya 1: /* $NetBSD: vfs_cache.c,v 1.122 2019/09/15 17:36:43 maya Exp $ */
1.73 ad 2:
3: /*-
4: * Copyright (c) 2008 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26: * POSSIBILITY OF SUCH DAMAGE.
27: */
1.6 cgd 28:
1.1 cgd 29: /*
1.5 mycroft 30: * Copyright (c) 1989, 1993
31: * The Regents of the University of California. All rights reserved.
1.1 cgd 32: *
33: * Redistribution and use in source and binary forms, with or without
34: * modification, are permitted provided that the following conditions
35: * are met:
36: * 1. Redistributions of source code must retain the above copyright
37: * notice, this list of conditions and the following disclaimer.
38: * 2. Redistributions in binary form must reproduce the above copyright
39: * notice, this list of conditions and the following disclaimer in the
40: * documentation and/or other materials provided with the distribution.
1.51 agc 41: * 3. Neither the name of the University nor the names of its contributors
1.1 cgd 42: * may be used to endorse or promote products derived from this software
43: * without specific prior written permission.
44: *
45: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55: * SUCH DAMAGE.
56: *
1.10 mycroft 57: * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94
1.1 cgd 58: */
1.32 lukem 59:
60: #include <sys/cdefs.h>
1.123 ! maya 61: __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.122 2019/09/15 17:36:43 maya Exp $");
1.1 cgd 62:
1.121 christos 63: #define __NAMECACHE_PRIVATE
1.107 pooka 64: #ifdef _KERNEL_OPT
1.28 chs 65: #include "opt_ddb.h"
1.115 riastrad 66: #include "opt_dtrace.h"
1.29 fvdl 67: #include "opt_revcache.h"
1.107 pooka 68: #endif
1.28 chs 69:
1.4 mycroft 70: #include <sys/param.h>
1.115 riastrad 71: #include <sys/atomic.h>
72: #include <sys/cpu.h>
73: #include <sys/errno.h>
74: #include <sys/evcnt.h>
75: #include <sys/kernel.h>
76: #include <sys/kthread.h>
1.4 mycroft 77: #include <sys/mount.h>
1.115 riastrad 78: #include <sys/mutex.h>
1.4 mycroft 79: #include <sys/namei.h>
1.18 thorpej 80: #include <sys/pool.h>
1.108 christos 81: #include <sys/sdt.h>
1.115 riastrad 82: #include <sys/sysctl.h>
83: #include <sys/systm.h>
84: #include <sys/time.h>
85: #include <sys/vnode_impl.h>
1.1 cgd 86:
1.66 christos 87: #define NAMECACHE_ENTER_REVERSE
1.1 cgd 88: /*
89: * Name caching works as follows:
90: *
91: * Names found by directory scans are retained in a cache
92: * for future reference. It is managed LRU, so frequently
93: * used names will hang around. Cache is indexed by hash value
1.20 jdolecek 94: * obtained from (dvp, name) where dvp refers to the directory
1.1 cgd 95: * containing name.
96: *
97: * Upon reaching the last segment of a path, if the reference
98: * is for DELETE, or NOCACHE is set (rewrite), and the
99: * name is located in the cache, it will be dropped.
100: */
101:
102: /*
1.120 riastrad 103: * Cache entry lifetime:
104: *
105: * nonexistent
106: * ---create---> active
107: * ---invalidate---> queued
108: * ---reclaim---> nonexistent.
109: *
110: * States:
111: * - Nonexistent. Cache entry does not exist.
112: *
113: * - Active. cache_lookup, cache_lookup_raw, cache_revlookup can look
114: * up, acquire references, and hand off references to vnodes,
115: * e.g. via v_interlock. Marked by nonnull ncp->nc_dvp.
116: *
117: * - Queued. Pending desstruction by cache_reclaim. Cannot be used by
118: * cache_lookup, cache_lookup_raw, or cache_revlookup. May still be
119: * on lists. Marked by null ncp->nc_dvp.
120: *
121: * Transitions:
122: *
123: * - Create: nonexistent--->active
124: *
125: * Done by cache_enter(dvp, vp, name, namelen, cnflags), called by
126: * VOP_LOOKUP after the answer is found. Allocates a struct
127: * namecache object, initializes it with the above fields, and
128: * activates it by inserting it into the forward and reverse tables.
129: *
130: * - Invalidate: active--->queued
131: *
132: * Done by cache_invalidate. If not already invalidated, nullify
133: * ncp->nc_dvp and ncp->nc_vp, and add to cache_gcqueue. Called,
134: * among various other places, in cache_lookup(dvp, name, namelen,
135: * nameiop, cnflags, &iswht, &vp) when MAKEENTRY is missing from
136: * cnflags.
137: *
138: * - Reclaim: queued--->nonexistent
139: *
140: * Done by cache_reclaim. Disassociate ncp from any lists it is on
141: * and free memory.
142: */
143:
144: /*
1.117 riastrad 145: * Locking.
1.102 dennis 146: *
1.117 riastrad 147: * L namecache_lock Global lock for namecache table and queues.
148: * C struct nchcpu::cpu_lock Per-CPU lock to reduce read contention.
149: * N struct namecache::nc_lock Per-entry lock.
150: * V struct vnode::v_interlock Vnode interlock.
151: *
1.118 riastrad 152: * Lock order: L -> C -> N -> V
153: *
154: * Examples:
155: * . L->C: cache_reclaim
156: * . C->N->V: cache_lookup
157: * . L->N->V: cache_purge1, cache_revlookup
1.117 riastrad 158: *
159: * All use serialized by namecache_lock:
160: *
161: * nclruhead / struct namecache::nc_lru
162: * ncvhashtbl / struct namecache::nc_vhash
163: * struct vnode_impl::vi_dnclist / struct namecache::nc_dvlist
164: * struct vnode_impl::vi_nclist / struct namecache::nc_vlist
165: * nchstats
166: *
167: * - Insertion serialized by namecache_lock,
168: * - read protected by per-CPU lock,
169: * - insert/read ordering guaranteed by memory barriers, and
170: * - deletion allowed only under namecache_lock and *all* per-CPU locks
171: * in CPU_INFO_FOREACH order:
172: *
173: * nchashtbl / struct namecache::nc_hash
174: *
175: * The per-CPU locks exist only to reduce the probability of
176: * contention between readers. We do not bind to a CPU, so
177: * contention is still possible.
178: *
179: * All use serialized by struct namecache::nc_lock:
180: *
181: * struct namecache::nc_dvp
182: * struct namecache::nc_vp
183: * struct namecache::nc_gcqueue (*)
184: * struct namecache::nc_hittime (**)
185: *
186: * (*) Once on the queue, only cache_thread uses this nc_gcqueue, unlocked.
187: * (**) cache_prune reads nc_hittime unlocked, since approximate is OK.
188: *
189: * Unlocked because stable after initialization:
190: *
191: * struct namecache::nc_dvp
192: * struct namecache::nc_vp
193: * struct namecache::nc_flags
194: * struct namecache::nc_nlen
195: * struct namecache::nc_name
196: *
197: * Unlocked because approximation is OK:
198: *
199: * struct nchcpu::cpu_stats
200: * struct nchcpu::cpu_stats_last
201: *
202: * Updates under namecache_lock or any per-CPU lock are marked with
203: * COUNT, while updates outside those locks are marked with COUNT_UNL.
204: *
205: * - The theory seems to have been that you could replace COUNT_UNL by
206: * atomic operations -- except that doesn't help unless you also
207: * replace COUNT by atomic operations, because mixing atomics and
208: * nonatomics is a recipe for failure.
209: * - We use 32-bit per-CPU counters and 64-bit global counters under
210: * the theory that 32-bit counters are less likely to be hosed by
211: * nonatomic increment.
212: */
213:
214: /*
215: * The comment below is preserved for posterity in case it is
216: * important, but it is clear that everywhere the namecache_count_*()
217: * functions are called, other cache_*() functions that take the same
218: * locks are also called, so I can't imagine how this could be a
219: * problem:
1.103 dennis 220: *
221: * N.B.: Attempting to protect COUNT_UNL() increments by taking
222: * a per-cpu lock in the namecache_count_*() functions causes
223: * a deadlock. Don't do that, use atomic increments instead if
224: * the imperfections here bug you.
1.117 riastrad 225: */
226:
227: /*
228: * struct nchstats_percpu:
1.103 dennis 229: *
1.117 riastrad 230: * Per-CPU counters.
1.77 ad 231: */
1.103 dennis 232: struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t);
233:
1.117 riastrad 234: /*
235: * struct nchcpu:
236: *
237: * Per-CPU namecache state: lock and per-CPU counters.
238: */
1.77 ad 239: struct nchcpu {
1.103 dennis 240: kmutex_t cpu_lock;
241: struct nchstats_percpu cpu_stats;
242: /* XXX maybe __cacheline_aligned would improve this? */
243: struct nchstats_percpu cpu_stats_last; /* from last sample */
1.77 ad 244: };
245:
246: /*
1.90 dholland 247: * The type for the hash code. While the hash function generates a
248: * u32, the hash code has historically been passed around as a u_long,
249: * and the value is modified by xor'ing a uintptr_t, so it's not
250: * entirely clear what the best type is. For now I'll leave it
251: * unchanged as u_long.
252: */
253:
254: typedef u_long nchash_t;
255:
256: /*
1.1 cgd 257: * Structures associated with name cacheing.
258: */
1.89 rmind 259:
260: static kmutex_t *namecache_lock __read_mostly;
261: static pool_cache_t namecache_cache __read_mostly;
262: static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
263:
264: static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly;
265: static u_long nchash __read_mostly;
266:
1.90 dholland 267: #define NCHASH2(hash, dvp) \
268: (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash)
1.19 sommerfe 269:
1.89 rmind 270: static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly;
271: static u_long ncvhash __read_mostly;
272:
1.48 yamt 273: #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash)
1.19 sommerfe 274:
1.89 rmind 275: /* Number of cache entries allocated. */
276: static long numcache __cacheline_aligned;
1.73 ad 277:
1.89 rmind 278: /* Garbage collection queue and number of entries pending in it. */
279: static void *cache_gcqueue;
280: static u_int cache_gcpend;
281:
1.103 dennis 282: /* Cache effectiveness statistics. This holds total from per-cpu stats */
1.89 rmind 283: struct nchstats nchstats __cacheline_aligned;
1.103 dennis 284:
285: /*
286: * Macros to count an event, update the central stats with per-cpu
287: * values and add current per-cpu increments to the subsystem total
288: * last collected by cache_reclaim().
289: */
290: #define CACHE_STATS_CURRENT /* nothing */
291:
292: #define COUNT(cpup, f) ((cpup)->cpu_stats.f++)
293:
294: #define UPDATE(cpup, f) do { \
295: struct nchcpu *Xcpup = (cpup); \
296: uint32_t Xcnt = (volatile uint32_t) Xcpup->cpu_stats.f; \
297: nchstats.f += Xcnt - Xcpup->cpu_stats_last.f; \
298: Xcpup->cpu_stats_last.f = Xcnt; \
299: } while (/* CONSTCOND */ 0)
300:
301: #define ADD(stats, cpup, f) do { \
302: struct nchcpu *Xcpup = (cpup); \
303: stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \
304: } while (/* CONSTCOND */ 0)
305:
306: /* Do unlocked stats the same way. Use a different name to allow mind changes */
307: #define COUNT_UNL(cpup, f) COUNT((cpup), f)
1.38 thorpej 308:
1.89 rmind 309: static const int cache_lowat = 95;
310: static const int cache_hiwat = 98;
311: static const int cache_hottime = 5; /* number of seconds */
312: static int doingcache = 1; /* 1 => enable the cache */
1.1 cgd 313:
1.73 ad 314: static struct evcnt cache_ev_scan;
315: static struct evcnt cache_ev_gc;
316: static struct evcnt cache_ev_over;
317: static struct evcnt cache_ev_under;
318: static struct evcnt cache_ev_forced;
319:
1.89 rmind 320: static struct namecache *cache_lookup_entry(
1.91 dholland 321: const struct vnode *, const char *, size_t);
1.73 ad 322: static void cache_thread(void *);
323: static void cache_invalidate(struct namecache *);
324: static void cache_disassociate(struct namecache *);
325: static void cache_reclaim(void);
326: static int cache_ctor(void *, void *, int);
327: static void cache_dtor(void *, void *);
1.46 yamt 328:
1.104 pooka 329: static struct sysctllog *sysctllog;
330: static void sysctl_cache_stat_setup(void);
331:
1.108 christos 332: SDT_PROVIDER_DEFINE(vfs);
333:
334: SDT_PROBE_DEFINE1(vfs, namecache, invalidate, done, "struct vnode *");
335: SDT_PROBE_DEFINE1(vfs, namecache, purge, parents, "struct vnode *");
336: SDT_PROBE_DEFINE1(vfs, namecache, purge, children, "struct vnode *");
337: SDT_PROBE_DEFINE2(vfs, namecache, purge, name, "char *", "size_t");
338: SDT_PROBE_DEFINE1(vfs, namecache, purge, vfs, "struct mount *");
339: SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *",
340: "char *", "size_t");
341: SDT_PROBE_DEFINE3(vfs, namecache, lookup, miss, "struct vnode *",
342: "char *", "size_t");
343: SDT_PROBE_DEFINE3(vfs, namecache, lookup, toolong, "struct vnode *",
344: "char *", "size_t");
345: SDT_PROBE_DEFINE2(vfs, namecache, revlookup, success, "struct vnode *",
346: "struct vnode *");
347: SDT_PROBE_DEFINE2(vfs, namecache, revlookup, fail, "struct vnode *",
348: "int");
349: SDT_PROBE_DEFINE2(vfs, namecache, prune, done, "int", "int");
350: SDT_PROBE_DEFINE3(vfs, namecache, enter, toolong, "struct vnode *",
351: "char *", "size_t");
352: SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *",
353: "char *", "size_t");
354:
1.73 ad 355: /*
1.90 dholland 356: * Compute the hash for an entry.
357: *
358: * (This is for now a wrapper around namei_hash, whose interface is
359: * for the time being slightly inconvenient.)
360: */
361: static nchash_t
1.91 dholland 362: cache_hash(const char *name, size_t namelen)
1.90 dholland 363: {
364: const char *endptr;
365:
1.91 dholland 366: endptr = name + namelen;
367: return namei_hash(name, &endptr);
1.90 dholland 368: }
369:
370: /*
1.73 ad 371: * Invalidate a cache entry and enqueue it for garbage collection.
1.103 dennis 372: * The caller needs to hold namecache_lock or a per-cpu lock to hold
373: * off cache_reclaim().
1.73 ad 374: */
1.46 yamt 375: static void
1.73 ad 376: cache_invalidate(struct namecache *ncp)
1.46 yamt 377: {
1.73 ad 378: void *head;
1.46 yamt 379:
1.73 ad 380: KASSERT(mutex_owned(&ncp->nc_lock));
1.46 yamt 381:
1.73 ad 382: if (ncp->nc_dvp != NULL) {
1.108 christos 383: SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp,
384: 0, 0, 0, 0);
385:
1.73 ad 386: ncp->nc_vp = NULL;
387: ncp->nc_dvp = NULL;
388: do {
389: head = cache_gcqueue;
390: ncp->nc_gcqueue = head;
391: } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
392: atomic_inc_uint(&cache_gcpend);
393: }
394: }
1.46 yamt 395:
1.73 ad 396: /*
397: * Disassociate a namecache entry from any vnodes it is attached to,
398: * and remove from the global LRU list.
399: */
400: static void
401: cache_disassociate(struct namecache *ncp)
402: {
403:
404: KASSERT(mutex_owned(namecache_lock));
405: KASSERT(ncp->nc_dvp == NULL);
406:
407: if (ncp->nc_lru.tqe_prev != NULL) {
408: TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
409: ncp->nc_lru.tqe_prev = NULL;
1.46 yamt 410: }
411: if (ncp->nc_vhash.le_prev != NULL) {
412: LIST_REMOVE(ncp, nc_vhash);
413: ncp->nc_vhash.le_prev = NULL;
414: }
415: if (ncp->nc_vlist.le_prev != NULL) {
416: LIST_REMOVE(ncp, nc_vlist);
417: ncp->nc_vlist.le_prev = NULL;
418: }
419: if (ncp->nc_dvlist.le_prev != NULL) {
420: LIST_REMOVE(ncp, nc_dvlist);
421: ncp->nc_dvlist.le_prev = NULL;
422: }
423: }
424:
1.73 ad 425: /*
426: * Lock all CPUs to prevent any cache lookup activity. Conceptually,
427: * this locks out all "readers".
428: */
1.46 yamt 429: static void
1.73 ad 430: cache_lock_cpus(void)
1.46 yamt 431: {
1.73 ad 432: CPU_INFO_ITERATOR cii;
433: struct cpu_info *ci;
1.77 ad 434: struct nchcpu *cpup;
1.46 yamt 435:
1.103 dennis 436: /*
437: * Lock out all CPUs first, then harvest per-cpu stats. This
438: * is probably not quite as cache-efficient as doing the lock
439: * and harvest at the same time, but allows cache_stat_sysctl()
440: * to make do with a per-cpu lock.
441: */
1.73 ad 442: for (CPU_INFO_FOREACH(cii, ci)) {
1.77 ad 443: cpup = ci->ci_data.cpu_nch;
444: mutex_enter(&cpup->cpu_lock);
1.103 dennis 445: }
446: for (CPU_INFO_FOREACH(cii, ci)) {
447: cpup = ci->ci_data.cpu_nch;
448: UPDATE(cpup, ncs_goodhits);
449: UPDATE(cpup, ncs_neghits);
450: UPDATE(cpup, ncs_badhits);
451: UPDATE(cpup, ncs_falsehits);
452: UPDATE(cpup, ncs_miss);
453: UPDATE(cpup, ncs_long);
454: UPDATE(cpup, ncs_pass2);
455: UPDATE(cpup, ncs_2passes);
456: UPDATE(cpup, ncs_revhits);
457: UPDATE(cpup, ncs_revmiss);
1.73 ad 458: }
1.46 yamt 459: }
460:
1.73 ad 461: /*
462: * Release all CPU locks.
463: */
464: static void
465: cache_unlock_cpus(void)
466: {
467: CPU_INFO_ITERATOR cii;
468: struct cpu_info *ci;
1.77 ad 469: struct nchcpu *cpup;
1.73 ad 470:
471: for (CPU_INFO_FOREACH(cii, ci)) {
1.77 ad 472: cpup = ci->ci_data.cpu_nch;
473: mutex_exit(&cpup->cpu_lock);
1.73 ad 474: }
475: }
476:
477: /*
1.103 dennis 478: * Find a single cache entry and return it locked.
479: * The caller needs to hold namecache_lock or a per-cpu lock to hold
480: * off cache_reclaim().
1.73 ad 481: */
482: static struct namecache *
1.91 dholland 483: cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
1.55 yamt 484: {
485: struct nchashhead *ncpp;
486: struct namecache *ncp;
1.90 dholland 487: nchash_t hash;
1.55 yamt 488:
1.84 yamt 489: KASSERT(dvp != NULL);
1.91 dholland 490: hash = cache_hash(name, namelen);
1.90 dholland 491: ncpp = &nchashtbl[NCHASH2(hash, dvp)];
1.55 yamt 492:
493: LIST_FOREACH(ncp, ncpp, nc_hash) {
1.105 dennis 494: membar_datadep_consumer(); /* for Alpha... */
1.73 ad 495: if (ncp->nc_dvp != dvp ||
1.91 dholland 496: ncp->nc_nlen != namelen ||
497: memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
1.73 ad 498: continue;
499: mutex_enter(&ncp->nc_lock);
1.77 ad 500: if (__predict_true(ncp->nc_dvp == dvp)) {
1.73 ad 501: ncp->nc_hittime = hardclock_ticks;
1.108 christos 502: SDT_PROBE(vfs, namecache, lookup, hit, dvp,
503: name, namelen, 0, 0);
1.73 ad 504: return ncp;
505: }
506: /* Raced: entry has been nullified. */
507: mutex_exit(&ncp->nc_lock);
1.55 yamt 508: }
509:
1.108 christos 510: SDT_PROBE(vfs, namecache, lookup, miss, dvp,
511: name, namelen, 0, 0);
1.73 ad 512: return NULL;
1.55 yamt 513: }
514:
1.1 cgd 515: /*
516: * Look for a the name in the cache. We don't do this
517: * if the segment name is long, simply so the cache can avoid
518: * holding long names (which would either waste space, or
519: * add greatly to the complexity).
520: *
1.90 dholland 521: * Lookup is called with DVP pointing to the directory to search,
522: * and CNP providing the name of the entry being sought: cn_nameptr
523: * is the name, cn_namelen is its length, and cn_flags is the flags
524: * word from the namei operation.
525: *
526: * DVP must be locked.
527: *
528: * There are three possible non-error return states:
529: * 1. Nothing was found in the cache. Nothing is known about
530: * the requested name.
531: * 2. A negative entry was found in the cache, meaning that the
532: * requested name definitely does not exist.
533: * 3. A positive entry was found in the cache, meaning that the
534: * requested name does exist and that we are providing the
535: * vnode.
536: * In these cases the results are:
537: * 1. 0 returned; VN is set to NULL.
538: * 2. 1 returned; VN is set to NULL.
539: * 3. 1 returned; VN is set to the vnode found.
540: *
541: * The additional result argument ISWHT is set to zero, unless a
542: * negative entry is found that was entered as a whiteout, in which
543: * case ISWHT is set to one.
544: *
545: * The ISWHT_RET argument pointer may be null. In this case an
546: * assertion is made that the whiteout flag is not set. File systems
547: * that do not support whiteouts can/should do this.
548: *
549: * Filesystems that do support whiteouts should add ISWHITEOUT to
550: * cnp->cn_flags if ISWHT comes back nonzero.
551: *
552: * When a vnode is returned, it is locked, as per the vnode lookup
553: * locking protocol.
554: *
555: * There is no way for this function to fail, in the sense of
556: * generating an error that requires aborting the namei operation.
557: *
558: * (Prior to October 2012, this function returned an integer status,
559: * and a vnode, and mucked with the flags word in CNP for whiteouts.
560: * The integer status was -1 for "nothing found", ENOENT for "a
561: * negative entry found", 0 for "a positive entry found", and possibly
562: * other errors, and the value of VN might or might not have been set
563: * depending on what error occurred.)
1.1 cgd 564: */
1.113 riastrad 565: bool
1.91 dholland 566: cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
567: uint32_t nameiop, uint32_t cnflags,
1.90 dholland 568: int *iswht_ret, struct vnode **vn_ret)
1.1 cgd 569: {
1.23 augustss 570: struct namecache *ncp;
1.20 jdolecek 571: struct vnode *vp;
1.77 ad 572: struct nchcpu *cpup;
1.113 riastrad 573: int error;
574: bool hit;
1.103 dennis 575:
1.1 cgd 576:
1.90 dholland 577: /* Establish default result values */
578: if (iswht_ret != NULL) {
579: *iswht_ret = 0;
580: }
581: *vn_ret = NULL;
582:
1.77 ad 583: if (__predict_false(!doingcache)) {
1.113 riastrad 584: return false;
1.8 cgd 585: }
1.39 pk 586:
1.77 ad 587: cpup = curcpu()->ci_data.cpu_nch;
1.102 dennis 588: mutex_enter(&cpup->cpu_lock);
1.121 christos 589: if (__predict_false(namelen > USHRT_MAX)) {
1.108 christos 590: SDT_PROBE(vfs, namecache, lookup, toolong, dvp,
591: name, namelen, 0, 0);
1.103 dennis 592: COUNT(cpup, ncs_long);
1.77 ad 593: mutex_exit(&cpup->cpu_lock);
1.90 dholland 594: /* found nothing */
1.113 riastrad 595: return false;
1.1 cgd 596: }
1.103 dennis 597:
1.91 dholland 598: ncp = cache_lookup_entry(dvp, name, namelen);
1.77 ad 599: if (__predict_false(ncp == NULL)) {
1.103 dennis 600: COUNT(cpup, ncs_miss);
1.77 ad 601: mutex_exit(&cpup->cpu_lock);
1.90 dholland 602: /* found nothing */
1.113 riastrad 603: return false;
1.1 cgd 604: }
1.91 dholland 605: if ((cnflags & MAKEENTRY) == 0) {
1.103 dennis 606: COUNT(cpup, ncs_badhits);
1.77 ad 607: /*
608: * Last component and we are renaming or deleting,
609: * the cache entry is invalid, or otherwise don't
610: * want cache entry to exist.
611: */
612: cache_invalidate(ncp);
613: mutex_exit(&ncp->nc_lock);
1.102 dennis 614: mutex_exit(&cpup->cpu_lock);
1.90 dholland 615: /* found nothing */
1.113 riastrad 616: return false;
1.90 dholland 617: }
618: if (ncp->nc_vp == NULL) {
619: if (iswht_ret != NULL) {
620: /*
621: * Restore the ISWHITEOUT flag saved earlier.
622: */
623: KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
624: *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
625: } else {
626: KASSERT(ncp->nc_flags == 0);
627: }
628:
1.91 dholland 629: if (__predict_true(nameiop != CREATE ||
630: (cnflags & ISLASTCN) == 0)) {
1.103 dennis 631: COUNT(cpup, ncs_neghits);
1.90 dholland 632: /* found neg entry; vn is already null from above */
1.113 riastrad 633: hit = true;
1.20 jdolecek 634: } else {
1.103 dennis 635: COUNT(cpup, ncs_badhits);
1.77 ad 636: /*
1.109 dholland 637: * Last component and we are preparing to create
638: * the named object, so flush the negative cache
639: * entry.
1.77 ad 640: */
641: cache_invalidate(ncp);
1.90 dholland 642: /* found nothing */
1.113 riastrad 643: hit = false;
1.20 jdolecek 644: }
1.103 dennis 645: mutex_exit(&ncp->nc_lock);
646: mutex_exit(&cpup->cpu_lock);
1.113 riastrad 647: return hit;
1.20 jdolecek 648: }
649:
650: vp = ncp->nc_vp;
1.92 hannken 651: mutex_enter(vp->v_interlock);
652: mutex_exit(&ncp->nc_lock);
1.102 dennis 653: mutex_exit(&cpup->cpu_lock);
1.103 dennis 654:
655: /*
1.111 hannken 656: * Unlocked except for the vnode interlock. Call vcache_tryvget().
1.103 dennis 657: */
1.111 hannken 658: error = vcache_tryvget(vp);
1.92 hannken 659: if (error) {
660: KASSERT(error == EBUSY);
661: /*
662: * This vnode is being cleaned out.
663: * XXX badhits?
664: */
1.103 dennis 665: COUNT_UNL(cpup, ncs_falsehits);
1.92 hannken 666: /* found nothing */
1.113 riastrad 667: return false;
1.77 ad 668: }
1.101 christos 669:
1.103 dennis 670: COUNT_UNL(cpup, ncs_goodhits);
1.101 christos 671: /* found it */
672: *vn_ret = vp;
1.113 riastrad 673: return true;
1.1 cgd 674: }
675:
1.103 dennis 676:
677: /*
678: * Cut-'n-pasted version of the above without the nameiop argument.
679: */
1.113 riastrad 680: bool
1.91 dholland 681: cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen,
682: uint32_t cnflags,
1.90 dholland 683: int *iswht_ret, struct vnode **vn_ret)
1.61 yamt 684: {
685: struct namecache *ncp;
686: struct vnode *vp;
1.77 ad 687: struct nchcpu *cpup;
1.101 christos 688: int error;
1.61 yamt 689:
1.90 dholland 690: /* Establish default results. */
691: if (iswht_ret != NULL) {
692: *iswht_ret = 0;
693: }
694: *vn_ret = NULL;
695:
1.77 ad 696: if (__predict_false(!doingcache)) {
1.90 dholland 697: /* found nothing */
1.113 riastrad 698: return false;
1.61 yamt 699: }
700:
1.77 ad 701: cpup = curcpu()->ci_data.cpu_nch;
1.102 dennis 702: mutex_enter(&cpup->cpu_lock);
1.121 christos 703: if (__predict_false(namelen > USHRT_MAX)) {
1.103 dennis 704: COUNT(cpup, ncs_long);
1.77 ad 705: mutex_exit(&cpup->cpu_lock);
1.90 dholland 706: /* found nothing */
1.113 riastrad 707: return false;
1.61 yamt 708: }
1.91 dholland 709: ncp = cache_lookup_entry(dvp, name, namelen);
1.77 ad 710: if (__predict_false(ncp == NULL)) {
1.103 dennis 711: COUNT(cpup, ncs_miss);
1.77 ad 712: mutex_exit(&cpup->cpu_lock);
1.90 dholland 713: /* found nothing */
1.113 riastrad 714: return false;
1.61 yamt 715: }
716: vp = ncp->nc_vp;
717: if (vp == NULL) {
718: /*
719: * Restore the ISWHITEOUT flag saved earlier.
720: */
1.90 dholland 721: if (iswht_ret != NULL) {
722: KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
723: /*cnp->cn_flags |= ncp->nc_flags;*/
724: *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
725: }
1.103 dennis 726: COUNT(cpup, ncs_neghits);
1.102 dennis 727: mutex_exit(&ncp->nc_lock);
1.101 christos 728: mutex_exit(&cpup->cpu_lock);
1.90 dholland 729: /* found negative entry; vn is already null from above */
1.113 riastrad 730: return true;
1.61 yamt 731: }
1.92 hannken 732: mutex_enter(vp->v_interlock);
733: mutex_exit(&ncp->nc_lock);
1.102 dennis 734: mutex_exit(&cpup->cpu_lock);
1.103 dennis 735:
736: /*
1.111 hannken 737: * Unlocked except for the vnode interlock. Call vcache_tryvget().
1.103 dennis 738: */
1.111 hannken 739: error = vcache_tryvget(vp);
1.92 hannken 740: if (error) {
741: KASSERT(error == EBUSY);
742: /*
743: * This vnode is being cleaned out.
744: * XXX badhits?
745: */
1.103 dennis 746: COUNT_UNL(cpup, ncs_falsehits);
1.92 hannken 747: /* found nothing */
1.113 riastrad 748: return false;
1.61 yamt 749: }
1.101 christos 750:
1.103 dennis 751: COUNT_UNL(cpup, ncs_goodhits); /* XXX can be "badhits" */
1.101 christos 752: /* found it */
753: *vn_ret = vp;
1.113 riastrad 754: return true;
1.61 yamt 755: }
756:
1.1 cgd 757: /*
1.19 sommerfe 758: * Scan cache looking for name of directory entry pointing at vp.
759: *
1.86 hannken 760: * If the lookup succeeds the vnode is referenced and stored in dvpp.
1.19 sommerfe 761: *
762: * If bufp is non-NULL, also place the name in the buffer which starts
763: * at bufp, immediately before *bpp, and move bpp backwards to point
764: * at the start of it. (Yes, this is a little baroque, but it's done
765: * this way to cater to the whims of getcwd).
766: *
767: * Returns 0 on success, -1 on cache miss, positive errno on failure.
768: */
769: int
1.34 enami 770: cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
1.19 sommerfe 771: {
772: struct namecache *ncp;
773: struct vnode *dvp;
1.103 dennis 774: struct ncvhashhead *nvcpp;
1.95 joerg 775: struct nchcpu *cpup;
1.34 enami 776: char *bp;
1.86 hannken 777: int error, nlen;
1.34 enami 778:
1.19 sommerfe 779: if (!doingcache)
780: goto out;
781:
1.30 chs 782: nvcpp = &ncvhashtbl[NCVHASH(vp)];
1.103 dennis 783:
784: /*
785: * We increment counters in the local CPU's per-cpu stats.
786: * We don't take the per-cpu lock, however, since this function
787: * is the only place these counters are incremented so no one
788: * will be racing with us to increment them.
789: */
1.95 joerg 790: cpup = curcpu()->ci_data.cpu_nch;
1.73 ad 791: mutex_enter(namecache_lock);
1.27 chs 792: LIST_FOREACH(ncp, nvcpp, nc_vhash) {
1.73 ad 793: mutex_enter(&ncp->nc_lock);
1.34 enami 794: if (ncp->nc_vp == vp &&
795: (dvp = ncp->nc_dvp) != NULL &&
1.47 yamt 796: dvp != vp) { /* avoid pesky . entries.. */
1.34 enami 797:
1.19 sommerfe 798: #ifdef DIAGNOSTIC
1.34 enami 799: if (ncp->nc_nlen == 1 &&
800: ncp->nc_name[0] == '.')
1.19 sommerfe 801: panic("cache_revlookup: found entry for .");
802:
1.34 enami 803: if (ncp->nc_nlen == 2 &&
804: ncp->nc_name[0] == '.' &&
805: ncp->nc_name[1] == '.')
1.19 sommerfe 806: panic("cache_revlookup: found entry for ..");
807: #endif
1.103 dennis 808: COUNT(cpup, ncs_revhits);
1.86 hannken 809: nlen = ncp->nc_nlen;
1.19 sommerfe 810:
811: if (bufp) {
812: bp = *bpp;
1.86 hannken 813: bp -= nlen;
1.19 sommerfe 814: if (bp <= bufp) {
1.34 enami 815: *dvpp = NULL;
1.73 ad 816: mutex_exit(&ncp->nc_lock);
817: mutex_exit(namecache_lock);
1.108 christos 818: SDT_PROBE(vfs, namecache, revlookup,
819: fail, vp, ERANGE, 0, 0, 0);
1.34 enami 820: return (ERANGE);
1.19 sommerfe 821: }
1.86 hannken 822: memcpy(bp, ncp->nc_name, nlen);
1.19 sommerfe 823: *bpp = bp;
824: }
1.34 enami 825:
1.92 hannken 826: mutex_enter(dvp->v_interlock);
1.110 msaitoh 827: mutex_exit(&ncp->nc_lock);
1.92 hannken 828: mutex_exit(namecache_lock);
1.111 hannken 829: error = vcache_tryvget(dvp);
1.92 hannken 830: if (error) {
831: KASSERT(error == EBUSY);
832: if (bufp)
833: (*bpp) += nlen;
834: *dvpp = NULL;
1.108 christos 835: SDT_PROBE(vfs, namecache, revlookup, fail, vp,
836: error, 0, 0, 0);
1.92 hannken 837: return -1;
1.86 hannken 838: }
1.19 sommerfe 839: *dvpp = dvp;
1.108 christos 840: SDT_PROBE(vfs, namecache, revlookup, success, vp, dvp,
841: 0, 0, 0);
1.34 enami 842: return (0);
1.19 sommerfe 843: }
1.73 ad 844: mutex_exit(&ncp->nc_lock);
1.19 sommerfe 845: }
1.103 dennis 846: COUNT(cpup, ncs_revmiss);
1.73 ad 847: mutex_exit(namecache_lock);
1.19 sommerfe 848: out:
1.34 enami 849: *dvpp = NULL;
850: return (-1);
1.19 sommerfe 851: }
852:
853: /*
1.1 cgd 854: * Add an entry to the cache
855: */
1.13 christos 856: void
1.91 dholland 857: cache_enter(struct vnode *dvp, struct vnode *vp,
858: const char *name, size_t namelen, uint32_t cnflags)
1.1 cgd 859: {
1.23 augustss 860: struct namecache *ncp;
1.59 yamt 861: struct namecache *oncp;
1.23 augustss 862: struct nchashhead *ncpp;
863: struct ncvhashhead *nvcpp;
1.90 dholland 864: nchash_t hash;
1.1 cgd 865:
1.89 rmind 866: /* First, check whether we can/should add a cache entry. */
1.91 dholland 867: if ((cnflags & MAKEENTRY) == 0 ||
1.121 christos 868: __predict_false(namelen > USHRT_MAX || !doingcache)) {
1.108 christos 869: SDT_PROBE(vfs, namecache, enter, toolong, vp, name, namelen,
870: 0, 0);
1.1 cgd 871: return;
1.89 rmind 872: }
1.58 yamt 873:
1.108 christos 874: SDT_PROBE(vfs, namecache, enter, done, vp, name, namelen, 0, 0);
1.73 ad 875: if (numcache > desiredvnodes) {
876: mutex_enter(namecache_lock);
877: cache_ev_forced.ev_count++;
878: cache_reclaim();
879: mutex_exit(namecache_lock);
1.39 pk 880: }
1.57 pk 881:
1.121 christos 882: if (namelen > NCHNAMLEN) {
883: ncp = kmem_alloc(sizeof(*ncp) + namelen, KM_SLEEP);
884: cache_ctor(NULL, ncp, 0);
885: } else
1.122 maya 886: ncp = pool_cache_get(namecache_cache, PR_WAITOK);
887:
1.73 ad 888: mutex_enter(namecache_lock);
889: numcache++;
890:
1.59 yamt 891: /*
892: * Concurrent lookups in the same directory may race for a
893: * cache entry. if there's a duplicated entry, free it.
894: */
1.91 dholland 895: oncp = cache_lookup_entry(dvp, name, namelen);
1.59 yamt 896: if (oncp) {
1.73 ad 897: cache_invalidate(oncp);
898: mutex_exit(&oncp->nc_lock);
1.59 yamt 899: }
900:
1.34 enami 901: /* Grab the vnode we just found. */
1.73 ad 902: mutex_enter(&ncp->nc_lock);
1.5 mycroft 903: ncp->nc_vp = vp;
1.73 ad 904: ncp->nc_flags = 0;
905: ncp->nc_hittime = 0;
906: ncp->nc_gcqueue = NULL;
1.47 yamt 907: if (vp == NULL) {
1.11 mycroft 908: /*
909: * For negative hits, save the ISWHITEOUT flag so we can
910: * restore it later when the cache entry is used again.
911: */
1.91 dholland 912: ncp->nc_flags = cnflags & ISWHITEOUT;
1.11 mycroft 913: }
1.89 rmind 914:
1.34 enami 915: /* Fill in cache info. */
1.5 mycroft 916: ncp->nc_dvp = dvp;
1.112 hannken 917: LIST_INSERT_HEAD(&VNODE_TO_VIMPL(dvp)->vi_dnclist, ncp, nc_dvlist);
1.46 yamt 918: if (vp)
1.112 hannken 919: LIST_INSERT_HEAD(&VNODE_TO_VIMPL(vp)->vi_nclist, ncp, nc_vlist);
1.73 ad 920: else {
921: ncp->nc_vlist.le_prev = NULL;
922: ncp->nc_vlist.le_next = NULL;
923: }
1.121 christos 924: KASSERT(namelen <= USHRT_MAX);
1.91 dholland 925: ncp->nc_nlen = namelen;
926: memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen);
1.73 ad 927: TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
1.91 dholland 928: hash = cache_hash(name, namelen);
1.90 dholland 929: ncpp = &nchashtbl[NCHASH2(hash, dvp)];
1.73 ad 930:
931: /*
932: * Flush updates before making visible in table. No need for a
933: * memory barrier on the other side: to see modifications the
934: * list must be followed, meaning a dependent pointer load.
1.74 ad 935: * The below is LIST_INSERT_HEAD() inlined, with the memory
936: * barrier included in the correct place.
1.73 ad 937: */
1.74 ad 938: if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
939: ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
940: ncp->nc_hash.le_prev = &ncpp->lh_first;
1.73 ad 941: membar_producer();
1.74 ad 942: ncpp->lh_first = ncp;
1.19 sommerfe 943:
1.34 enami 944: ncp->nc_vhash.le_prev = NULL;
945: ncp->nc_vhash.le_next = NULL;
946:
1.19 sommerfe 947: /*
948: * Create reverse-cache entries (used in getcwd) for directories.
1.66 christos 949: * (and in linux procfs exe node)
1.19 sommerfe 950: */
1.33 enami 951: if (vp != NULL &&
952: vp != dvp &&
1.29 fvdl 953: #ifndef NAMECACHE_ENTER_REVERSE
1.33 enami 954: vp->v_type == VDIR &&
1.29 fvdl 955: #endif
1.33 enami 956: (ncp->nc_nlen > 2 ||
957: (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
958: (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
1.30 chs 959: nvcpp = &ncvhashtbl[NCVHASH(vp)];
1.19 sommerfe 960: LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
961: }
1.73 ad 962: mutex_exit(&ncp->nc_lock);
963: mutex_exit(namecache_lock);
1.1 cgd 964: }
965:
966: /*
967: * Name cache initialization, from vfs_init() when we are booting
968: */
1.13 christos 969: void
1.34 enami 970: nchinit(void)
1.1 cgd 971: {
1.73 ad 972: int error;
1.1 cgd 973:
1.89 rmind 974: TAILQ_INIT(&nclruhead);
1.121 christos 975: namecache_cache = pool_cache_init(sizeof(struct namecache) + NCHNAMLEN,
1.73 ad 976: coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
977: cache_dtor, NULL);
1.71 ad 978: KASSERT(namecache_cache != NULL);
979:
1.73 ad 980: namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
981:
1.76 ad 982: nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
1.26 ad 983: ncvhashtbl =
1.29 fvdl 984: #ifdef NAMECACHE_ENTER_REVERSE
1.76 ad 985: hashinit(desiredvnodes, HASH_LIST, true, &ncvhash);
1.29 fvdl 986: #else
1.76 ad 987: hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash);
1.29 fvdl 988: #endif
1.73 ad 989:
990: error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
991: NULL, NULL, "cachegc");
992: if (error != 0)
993: panic("nchinit %d", error);
994:
995: evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
996: "namecache", "entries scanned");
997: evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
998: "namecache", "entries collected");
999: evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
1000: "namecache", "over scan target");
1001: evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
1002: "namecache", "under scan target");
1003: evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
1004: "namecache", "forced reclaims");
1.104 pooka 1005:
1006: sysctl_cache_stat_setup();
1.73 ad 1007: }
1008:
1009: static int
1010: cache_ctor(void *arg, void *obj, int flag)
1011: {
1012: struct namecache *ncp;
1013:
1014: ncp = obj;
1015: mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
1016:
1017: return 0;
1018: }
1019:
1020: static void
1021: cache_dtor(void *arg, void *obj)
1022: {
1023: struct namecache *ncp;
1024:
1025: ncp = obj;
1026: mutex_destroy(&ncp->nc_lock);
1027: }
1028:
1029: /*
1030: * Called once for each CPU in the system as attached.
1031: */
1032: void
1033: cache_cpu_init(struct cpu_info *ci)
1034: {
1.77 ad 1035: struct nchcpu *cpup;
1036: size_t sz;
1.73 ad 1037:
1.77 ad 1038: sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit;
1039: cpup = kmem_zalloc(sz, KM_SLEEP);
1040: cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit);
1041: mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE);
1042: ci->ci_data.cpu_nch = cpup;
1.30 chs 1043: }
1044:
1045: /*
1046: * Name cache reinitialization, for when the maximum number of vnodes increases.
1047: */
1048: void
1.34 enami 1049: nchreinit(void)
1.30 chs 1050: {
1051: struct namecache *ncp;
1052: struct nchashhead *oldhash1, *hash1;
1053: struct ncvhashhead *oldhash2, *hash2;
1.36 thorpej 1054: u_long i, oldmask1, oldmask2, mask1, mask2;
1.30 chs 1055:
1.76 ad 1056: hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1);
1.30 chs 1057: hash2 =
1058: #ifdef NAMECACHE_ENTER_REVERSE
1.76 ad 1059: hashinit(desiredvnodes, HASH_LIST, true, &mask2);
1.30 chs 1060: #else
1.76 ad 1061: hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
1.30 chs 1062: #endif
1.73 ad 1063: mutex_enter(namecache_lock);
1064: cache_lock_cpus();
1.30 chs 1065: oldhash1 = nchashtbl;
1066: oldmask1 = nchash;
1067: nchashtbl = hash1;
1068: nchash = mask1;
1069: oldhash2 = ncvhashtbl;
1070: oldmask2 = ncvhash;
1071: ncvhashtbl = hash2;
1072: ncvhash = mask2;
1073: for (i = 0; i <= oldmask1; i++) {
1074: while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
1075: LIST_REMOVE(ncp, nc_hash);
1076: ncp->nc_hash.le_prev = NULL;
1077: }
1078: }
1079: for (i = 0; i <= oldmask2; i++) {
1080: while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
1081: LIST_REMOVE(ncp, nc_vhash);
1082: ncp->nc_vhash.le_prev = NULL;
1083: }
1084: }
1.73 ad 1085: cache_unlock_cpus();
1086: mutex_exit(namecache_lock);
1.76 ad 1087: hashdone(oldhash1, HASH_LIST, oldmask1);
1088: hashdone(oldhash2, HASH_LIST, oldmask2);
1.1 cgd 1089: }
1090:
1091: /*
1092: * Cache flush, a particular vnode; called when a vnode is renamed to
1093: * hide entries that would now be invalid
1094: */
1.13 christos 1095: void
1.91 dholland 1096: cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
1.1 cgd 1097: {
1.46 yamt 1098: struct namecache *ncp, *ncnext;
1.1 cgd 1099:
1.73 ad 1100: mutex_enter(namecache_lock);
1.55 yamt 1101: if (flags & PURGE_PARENTS) {
1.108 christos 1102: SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0);
1103:
1.112 hannken 1104: for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_nclist);
1105: ncp != NULL; ncp = ncnext) {
1.55 yamt 1106: ncnext = LIST_NEXT(ncp, nc_vlist);
1.73 ad 1107: mutex_enter(&ncp->nc_lock);
1108: cache_invalidate(ncp);
1109: mutex_exit(&ncp->nc_lock);
1110: cache_disassociate(ncp);
1.55 yamt 1111: }
1112: }
1113: if (flags & PURGE_CHILDREN) {
1.108 christos 1114: SDT_PROBE(vfs, namecache, purge, children, vp, 0, 0, 0, 0);
1.112 hannken 1115: for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_dnclist);
1116: ncp != NULL; ncp = ncnext) {
1.55 yamt 1117: ncnext = LIST_NEXT(ncp, nc_dvlist);
1.73 ad 1118: mutex_enter(&ncp->nc_lock);
1119: cache_invalidate(ncp);
1120: mutex_exit(&ncp->nc_lock);
1121: cache_disassociate(ncp);
1.55 yamt 1122: }
1.46 yamt 1123: }
1.91 dholland 1124: if (name != NULL) {
1.108 christos 1125: SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0);
1.91 dholland 1126: ncp = cache_lookup_entry(vp, name, namelen);
1.55 yamt 1127: if (ncp) {
1.73 ad 1128: cache_invalidate(ncp);
1.83 yamt 1129: mutex_exit(&ncp->nc_lock);
1.73 ad 1130: cache_disassociate(ncp);
1.55 yamt 1131: }
1.46 yamt 1132: }
1.73 ad 1133: mutex_exit(namecache_lock);
1.1 cgd 1134: }
1135:
1136: /*
1137: * Cache flush, a whole filesystem; called when filesys is umounted to
1.27 chs 1138: * remove entries that would now be invalid.
1.1 cgd 1139: */
1.13 christos 1140: void
1.34 enami 1141: cache_purgevfs(struct mount *mp)
1.1 cgd 1142: {
1.23 augustss 1143: struct namecache *ncp, *nxtcp;
1.1 cgd 1144:
1.108 christos 1145: SDT_PROBE(vfs, namecache, purge, vfs, mp, 0, 0, 0, 0);
1.73 ad 1146: mutex_enter(namecache_lock);
1147: for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
1148: nxtcp = TAILQ_NEXT(ncp, nc_lru);
1149: mutex_enter(&ncp->nc_lock);
1150: if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
1151: /* Free the resources we had. */
1152: cache_invalidate(ncp);
1153: cache_disassociate(ncp);
1154: }
1155: mutex_exit(&ncp->nc_lock);
1156: }
1157: cache_reclaim();
1158: mutex_exit(namecache_lock);
1159: }
1160:
1161: /*
1.116 riastrad 1162: * Scan global list invalidating entries until we meet a preset target.
1.73 ad 1163: * Prefer to invalidate entries that have not scored a hit within
1164: * cache_hottime seconds. We sort the LRU list only for this routine's
1165: * benefit.
1166: */
1167: static void
1168: cache_prune(int incache, int target)
1169: {
1170: struct namecache *ncp, *nxtcp, *sentinel;
1171: int items, recent, tryharder;
1172:
1173: KASSERT(mutex_owned(namecache_lock));
1174:
1.108 christos 1175: SDT_PROBE(vfs, namecache, prune, done, incache, target, 0, 0, 0);
1.73 ad 1176: items = 0;
1177: tryharder = 0;
1178: recent = hardclock_ticks - hz * cache_hottime;
1179: sentinel = NULL;
1.27 chs 1180: for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
1.73 ad 1181: if (incache <= target)
1182: break;
1183: items++;
1.27 chs 1184: nxtcp = TAILQ_NEXT(ncp, nc_lru);
1.73 ad 1185: if (ncp == sentinel) {
1186: /*
1187: * If we looped back on ourself, then ignore
1188: * recent entries and purge whatever we find.
1189: */
1190: tryharder = 1;
1.5 mycroft 1191: }
1.93 hannken 1192: if (ncp->nc_dvp == NULL)
1193: continue;
1.81 yamt 1194: if (!tryharder && (ncp->nc_hittime - recent) > 0) {
1.73 ad 1195: if (sentinel == NULL)
1196: sentinel = ncp;
1197: TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
1198: TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
1199: continue;
1200: }
1201: mutex_enter(&ncp->nc_lock);
1202: if (ncp->nc_dvp != NULL) {
1203: cache_invalidate(ncp);
1204: cache_disassociate(ncp);
1205: incache--;
1206: }
1207: mutex_exit(&ncp->nc_lock);
1208: }
1209: cache_ev_scan.ev_count += items;
1210: }
1211:
1212: /*
1213: * Collect dead cache entries from all CPUs and garbage collect.
1214: */
1215: static void
1216: cache_reclaim(void)
1217: {
1218: struct namecache *ncp, *next;
1219: int items;
1220:
1221: KASSERT(mutex_owned(namecache_lock));
1222:
1223: /*
1224: * If the number of extant entries not awaiting garbage collection
1225: * exceeds the high water mark, then reclaim stale entries until we
1226: * reach our low water mark.
1227: */
1228: items = numcache - cache_gcpend;
1229: if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
1230: cache_prune(items, (int)((uint64_t)desiredvnodes *
1231: cache_lowat / 100));
1232: cache_ev_over.ev_count++;
1233: } else
1234: cache_ev_under.ev_count++;
1235:
1236: /*
1237: * Stop forward lookup activity on all CPUs and garbage collect dead
1238: * entries.
1239: */
1240: cache_lock_cpus();
1241: ncp = cache_gcqueue;
1242: cache_gcqueue = NULL;
1243: items = cache_gcpend;
1244: cache_gcpend = 0;
1245: while (ncp != NULL) {
1246: next = ncp->nc_gcqueue;
1247: cache_disassociate(ncp);
1248: KASSERT(ncp->nc_dvp == NULL);
1249: if (ncp->nc_hash.le_prev != NULL) {
1250: LIST_REMOVE(ncp, nc_hash);
1251: ncp->nc_hash.le_prev = NULL;
1252: }
1.121 christos 1253: if (ncp->nc_nlen > NCHNAMLEN) {
1254: cache_dtor(NULL, ncp);
1255: kmem_free(ncp, sizeof(*ncp) + ncp->nc_nlen);
1256: } else
1.123 ! maya 1257: pool_cache_put(namecache_cache, ncp);
1.73 ad 1258: ncp = next;
1259: }
1260: cache_unlock_cpus();
1261: numcache -= items;
1262: cache_ev_gc.ev_count += items;
1263: }
1264:
1265: /*
1266: * Cache maintainence thread, awakening once per second to:
1267: *
1268: * => keep number of entries below the high water mark
1269: * => sort pseudo-LRU list
1270: * => garbage collect dead entries
1271: */
1272: static void
1273: cache_thread(void *arg)
1274: {
1275:
1276: mutex_enter(namecache_lock);
1277: for (;;) {
1278: cache_reclaim();
1279: kpause("cachegc", false, hz, namecache_lock);
1.1 cgd 1280: }
1281: }
1.19 sommerfe 1282:
1.28 chs 1283: #ifdef DDB
1284: void
1285: namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
1286: {
1287: struct vnode *dvp = NULL;
1288: struct namecache *ncp;
1289:
1290: TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
1.73 ad 1291: if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
1.28 chs 1292: (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
1293: dvp = ncp->nc_dvp;
1294: }
1295: }
1296: if (dvp == NULL) {
1297: (*pr)("name not found\n");
1298: return;
1299: }
1300: vp = dvp;
1301: TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
1.47 yamt 1302: if (ncp->nc_vp == vp) {
1.28 chs 1303: (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
1304: }
1305: }
1306: }
1307: #endif
1.95 joerg 1308:
1309: void
1310: namecache_count_pass2(void)
1311: {
1312: struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
1313:
1.103 dennis 1314: COUNT_UNL(cpup, ncs_pass2);
1.95 joerg 1315: }
1316:
1317: void
1318: namecache_count_2passes(void)
1319: {
1320: struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
1321:
1.103 dennis 1322: COUNT_UNL(cpup, ncs_2passes);
1.95 joerg 1323: }
1.97 joerg 1324:
1.103 dennis 1325: /*
1326: * Fetch the current values of the stats. We return the most
1327: * recent values harvested into nchstats by cache_reclaim(), which
1328: * will be less than a second old.
1329: */
1.97 joerg 1330: static int
1331: cache_stat_sysctl(SYSCTLFN_ARGS)
1332: {
1.103 dennis 1333: struct nchstats stats;
1334: struct nchcpu *my_cpup;
1335: #ifdef CACHE_STATS_CURRENT
1336: CPU_INFO_ITERATOR cii;
1337: struct cpu_info *ci;
1338: #endif /* CACHE_STATS_CURRENT */
1.97 joerg 1339:
1340: if (oldp == NULL) {
1341: *oldlenp = sizeof(stats);
1342: return 0;
1343: }
1344:
1345: if (*oldlenp < sizeof(stats)) {
1346: *oldlenp = 0;
1347: return 0;
1348: }
1349:
1.103 dennis 1350: /*
1351: * Take this CPU's per-cpu lock to hold off cache_reclaim()
1352: * from doing a stats update while doing minimal damage to
1353: * concurrent operations.
1354: */
1355: sysctl_unlock();
1356: my_cpup = curcpu()->ci_data.cpu_nch;
1357: mutex_enter(&my_cpup->cpu_lock);
1358: stats = nchstats;
1359: #ifdef CACHE_STATS_CURRENT
1360: for (CPU_INFO_FOREACH(cii, ci)) {
1361: struct nchcpu *cpup = ci->ci_data.cpu_nch;
1.97 joerg 1362:
1.103 dennis 1363: ADD(stats, cpup, ncs_goodhits);
1364: ADD(stats, cpup, ncs_neghits);
1365: ADD(stats, cpup, ncs_badhits);
1366: ADD(stats, cpup, ncs_falsehits);
1367: ADD(stats, cpup, ncs_miss);
1368: ADD(stats, cpup, ncs_long);
1369: ADD(stats, cpup, ncs_pass2);
1370: ADD(stats, cpup, ncs_2passes);
1371: ADD(stats, cpup, ncs_revhits);
1372: ADD(stats, cpup, ncs_revmiss);
1373: }
1374: #endif /* CACHE_STATS_CURRENT */
1375: mutex_exit(&my_cpup->cpu_lock);
1.97 joerg 1376: sysctl_relock();
1377:
1378: *oldlenp = sizeof(stats);
1379: return sysctl_copyout(l, &stats, oldp, sizeof(stats));
1380: }
1381:
1.104 pooka 1382: static void
1383: sysctl_cache_stat_setup(void)
1.97 joerg 1384: {
1.104 pooka 1385:
1386: KASSERT(sysctllog == NULL);
1387: sysctl_createv(&sysctllog, 0, NULL, NULL,
1.97 joerg 1388: CTLFLAG_PERMANENT,
1389: CTLTYPE_STRUCT, "namecache_stats",
1390: SYSCTL_DESCR("namecache statistics"),
1391: cache_stat_sysctl, 0, NULL, 0,
1392: CTL_VFS, CTL_CREATE, CTL_EOL);
1393: }
CVSweb <webmaster@jp.NetBSD.org>