Annotation of src/sys/kern/vfs_vnode.c, Revision 1.49
1.49 ! hannken 1: /* $NetBSD: vfs_vnode.c,v 1.48 2016/05/19 14:47:33 hannken Exp $ */
1.1 rmind 2:
3: /*-
1.2 rmind 4: * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
1.1 rmind 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9: * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30: * POSSIBILITY OF SUCH DAMAGE.
31: */
32:
33: /*
34: * Copyright (c) 1989, 1993
35: * The Regents of the University of California. All rights reserved.
36: * (c) UNIX System Laboratories, Inc.
37: * All or some portions of this file are derived from material licensed
38: * to the University of California by American Telephone and Telegraph
39: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40: * the permission of UNIX System Laboratories, Inc.
41: *
42: * Redistribution and use in source and binary forms, with or without
43: * modification, are permitted provided that the following conditions
44: * are met:
45: * 1. Redistributions of source code must retain the above copyright
46: * notice, this list of conditions and the following disclaimer.
47: * 2. Redistributions in binary form must reproduce the above copyright
48: * notice, this list of conditions and the following disclaimer in the
49: * documentation and/or other materials provided with the distribution.
50: * 3. Neither the name of the University nor the names of its contributors
51: * may be used to endorse or promote products derived from this software
52: * without specific prior written permission.
53: *
54: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64: * SUCH DAMAGE.
65: *
66: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67: */
68:
69: /*
1.8 rmind 70: * The vnode cache subsystem.
1.1 rmind 71: *
1.8 rmind 72: * Life-cycle
1.1 rmind 73: *
1.8 rmind 74: * Normally, there are two points where new vnodes are created:
75: * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode
76: * starts in one of the following ways:
77: *
1.45 hannken 78: * - Allocation, via vcache_get(9) or vcache_new(9).
1.8 rmind 79: * - Reclamation of inactive vnode, via vget(9).
80: *
1.16 rmind 81: * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9)
82: * was another, traditional way. Currently, only the draining thread
83: * recycles the vnodes. This behaviour might be revisited.
84: *
1.8 rmind 85: * The life-cycle ends when the last reference is dropped, usually
86: * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform
87: * the file system that vnode is inactive. Via this call, file system
1.16 rmind 88: * indicates whether vnode can be recycled (usually, it checks its own
89: * references, e.g. count of links, whether the file was removed).
1.8 rmind 90: *
91: * Depending on indication, vnode can be put into a free list (cache),
92: * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate
93: * underlying file system from the vnode, and finally destroyed.
94: *
95: * Reference counting
96: *
97: * Vnode is considered active, if reference count (vnode_t::v_usecount)
98: * is non-zero. It is maintained using: vref(9) and vrele(9), as well
99: * as vput(9), routines. Common points holding references are e.g.
100: * file openings, current working directory, mount points, etc.
101: *
102: * Note on v_usecount and its locking
103: *
104: * At nearly all points it is known that v_usecount could be zero,
105: * the vnode_t::v_interlock will be held. To change v_usecount away
106: * from zero, the interlock must be held. To change from a non-zero
107: * value to zero, again the interlock must be held.
108: *
1.24 hannken 109: * Changing the usecount from a non-zero value to a non-zero value can
110: * safely be done using atomic operations, without the interlock held.
1.8 rmind 111: *
112: * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while
113: * mntvnode_lock is still held.
1.20 dholland 114: *
115: * See PR 41374.
1.1 rmind 116: */
117:
118: #include <sys/cdefs.h>
1.49 ! hannken 119: __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.48 2016/05/19 14:47:33 hannken Exp $");
1.23 hannken 120:
121: #define _VFS_VNODE_PRIVATE
1.1 rmind 122:
123: #include <sys/param.h>
124: #include <sys/kernel.h>
125:
126: #include <sys/atomic.h>
127: #include <sys/buf.h>
128: #include <sys/conf.h>
129: #include <sys/device.h>
1.36 hannken 130: #include <sys/hash.h>
1.1 rmind 131: #include <sys/kauth.h>
132: #include <sys/kmem.h>
133: #include <sys/kthread.h>
134: #include <sys/module.h>
135: #include <sys/mount.h>
136: #include <sys/namei.h>
137: #include <sys/syscallargs.h>
138: #include <sys/sysctl.h>
139: #include <sys/systm.h>
140: #include <sys/vnode.h>
141: #include <sys/wapbl.h>
1.24 hannken 142: #include <sys/fstrans.h>
1.1 rmind 143:
144: #include <uvm/uvm.h>
145: #include <uvm/uvm_readahead.h>
146:
1.23 hannken 147: /* Flags to vrelel. */
148: #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */
1.29 christos 149: #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */
1.23 hannken 150:
1.36 hannken 151: struct vcache_key {
152: struct mount *vk_mount;
153: const void *vk_key;
154: size_t vk_key_len;
155: };
156: struct vcache_node {
157: SLIST_ENTRY(vcache_node) vn_hash;
158: struct vnode *vn_vnode;
159: struct vcache_key vn_key;
160: };
161:
1.6 rmind 162: u_int numvnodes __cacheline_aligned;
1.1 rmind 163:
1.6 rmind 164: static pool_cache_t vnode_cache __read_mostly;
1.16 rmind 165:
166: /*
167: * There are two free lists: one is for vnodes which have no buffer/page
168: * references and one for those which do (i.e. v_holdcnt is non-zero).
169: * Vnode recycling mechanism first attempts to look into the former list.
170: */
1.6 rmind 171: static kmutex_t vnode_free_list_lock __cacheline_aligned;
172: static vnodelst_t vnode_free_list __cacheline_aligned;
173: static vnodelst_t vnode_hold_list __cacheline_aligned;
1.16 rmind 174: static kcondvar_t vdrain_cv __cacheline_aligned;
175:
1.6 rmind 176: static vnodelst_t vrele_list __cacheline_aligned;
177: static kmutex_t vrele_lock __cacheline_aligned;
178: static kcondvar_t vrele_cv __cacheline_aligned;
179: static lwp_t * vrele_lwp __cacheline_aligned;
180: static int vrele_pending __cacheline_aligned;
181: static int vrele_gen __cacheline_aligned;
1.1 rmind 182:
1.38 matt 183: SLIST_HEAD(hashhead, vcache_node);
1.36 hannken 184: static struct {
185: kmutex_t lock;
186: u_long hashmask;
1.38 matt 187: struct hashhead *hashtab;
1.36 hannken 188: pool_cache_t pool;
189: } vcache __cacheline_aligned;
190:
1.12 hannken 191: static int cleanvnode(void);
1.36 hannken 192: static void vcache_init(void);
193: static void vcache_reinit(void);
1.25 hannken 194: static void vclean(vnode_t *);
1.23 hannken 195: static void vrelel(vnode_t *, int);
1.12 hannken 196: static void vdrain_thread(void *);
1.1 rmind 197: static void vrele_thread(void *);
1.48 hannken 198: static vnode_t * vnalloc(struct mount *);
199: static void vnfree(vnode_t *);
1.11 christos 200: static void vnpanic(vnode_t *, const char *, ...)
1.18 christos 201: __printflike(2, 3);
1.35 hannken 202: static void vwait(vnode_t *, int);
1.1 rmind 203:
204: /* Routines having to do with the management of the vnode table. */
1.44 hannken 205: extern struct mount *dead_rootmount;
1.1 rmind 206: extern int (**dead_vnodeop_p)(void *);
1.31 hannken 207: extern struct vfsops dead_vfsops;
1.1 rmind 208:
209: void
210: vfs_vnode_sysinit(void)
211: {
1.22 martin 212: int error __diagused;
1.1 rmind 213:
214: vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl",
215: NULL, IPL_NONE, NULL, NULL, NULL);
216: KASSERT(vnode_cache != NULL);
217:
1.44 hannken 218: dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL);
219: KASSERT(dead_rootmount != NULL);
220: dead_rootmount->mnt_iflag = IMNT_MPSAFE;
1.31 hannken 221:
1.1 rmind 222: mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
223: TAILQ_INIT(&vnode_free_list);
224: TAILQ_INIT(&vnode_hold_list);
225: TAILQ_INIT(&vrele_list);
226:
1.36 hannken 227: vcache_init();
228:
1.1 rmind 229: mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
1.12 hannken 230: cv_init(&vdrain_cv, "vdrain");
1.1 rmind 231: cv_init(&vrele_cv, "vrele");
1.12 hannken 232: error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
233: NULL, NULL, "vdrain");
1.47 riastrad 234: KASSERTMSG((error == 0), "kthread_create(vdrain) failed: %d", error);
1.1 rmind 235: error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
236: NULL, &vrele_lwp, "vrele");
1.47 riastrad 237: KASSERTMSG((error == 0), "kthread_create(vrele) failed: %d", error);
1.1 rmind 238: }
239:
240: /*
1.48 hannken 241: * Allocate a new marker vnode.
242: */
243: vnode_t *
244: vnalloc_marker(struct mount *mp)
245: {
246:
247: return vnalloc(mp);
248: }
249:
250: /*
251: * Free a marker vnode.
252: */
253: void
254: vnfree_marker(vnode_t *vp)
255: {
256:
257: KASSERT(ISSET(vp->v_iflag, VI_MARKER));
258: vnfree(vp);
259: }
260:
261: /*
262: * Test a vnode for being a marker vnode.
263: */
264: bool
265: vnis_marker(vnode_t *vp)
266: {
267:
268: return (ISSET(vp->v_iflag, VI_MARKER));
269: }
270:
271: /*
1.1 rmind 272: * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
1.13 hannken 273: * marker vnode.
1.1 rmind 274: */
1.48 hannken 275: static vnode_t *
1.1 rmind 276: vnalloc(struct mount *mp)
277: {
278: vnode_t *vp;
279:
1.13 hannken 280: vp = pool_cache_get(vnode_cache, PR_WAITOK);
281: KASSERT(vp != NULL);
1.1 rmind 282:
283: memset(vp, 0, sizeof(*vp));
1.9 rmind 284: uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
1.1 rmind 285: cv_init(&vp->v_cv, "vnode");
286: /*
287: * Done by memset() above.
288: * LIST_INIT(&vp->v_nclist);
289: * LIST_INIT(&vp->v_dnclist);
290: */
291:
292: if (mp != NULL) {
293: vp->v_mount = mp;
294: vp->v_type = VBAD;
295: vp->v_iflag = VI_MARKER;
1.36 hannken 296: return vp;
1.1 rmind 297: }
298:
1.36 hannken 299: mutex_enter(&vnode_free_list_lock);
300: numvnodes++;
301: if (numvnodes > desiredvnodes + desiredvnodes / 10)
302: cv_signal(&vdrain_cv);
303: mutex_exit(&vnode_free_list_lock);
304:
305: rw_init(&vp->v_lock);
306: vp->v_usecount = 1;
307: vp->v_type = VNON;
308: vp->v_size = vp->v_writesize = VSIZENOTSET;
309:
1.1 rmind 310: return vp;
311: }
312:
313: /*
314: * Free an unused, unreferenced vnode.
315: */
1.48 hannken 316: static void
1.1 rmind 317: vnfree(vnode_t *vp)
318: {
319:
320: KASSERT(vp->v_usecount == 0);
321:
322: if ((vp->v_iflag & VI_MARKER) == 0) {
323: rw_destroy(&vp->v_lock);
324: mutex_enter(&vnode_free_list_lock);
325: numvnodes--;
326: mutex_exit(&vnode_free_list_lock);
327: }
328:
1.9 rmind 329: uvm_obj_destroy(&vp->v_uobj, true);
1.1 rmind 330: cv_destroy(&vp->v_cv);
331: pool_cache_put(vnode_cache, vp);
332: }
333:
334: /*
1.12 hannken 335: * cleanvnode: grab a vnode from freelist, clean and free it.
1.5 rmind 336: *
337: * => Releases vnode_free_list_lock.
1.1 rmind 338: */
1.12 hannken 339: static int
340: cleanvnode(void)
1.1 rmind 341: {
342: vnode_t *vp;
343: vnodelst_t *listhd;
1.24 hannken 344: struct mount *mp;
1.1 rmind 345:
346: KASSERT(mutex_owned(&vnode_free_list_lock));
1.24 hannken 347:
1.1 rmind 348: listhd = &vnode_free_list;
349: try_nextlist:
350: TAILQ_FOREACH(vp, listhd, v_freelist) {
351: /*
352: * It's safe to test v_usecount and v_iflag
353: * without holding the interlock here, since
354: * these vnodes should never appear on the
355: * lists.
356: */
1.5 rmind 357: KASSERT(vp->v_usecount == 0);
358: KASSERT((vp->v_iflag & VI_CLEAN) == 0);
359: KASSERT(vp->v_freelisthd == listhd);
360:
1.46 hannken 361: if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
1.1 rmind 362: continue;
1.46 hannken 363: if (!mutex_tryenter(vp->v_interlock)) {
364: VOP_UNLOCK(vp);
1.24 hannken 365: continue;
366: }
1.46 hannken 367: KASSERT((vp->v_iflag & VI_XLOCK) == 0);
1.24 hannken 368: mp = vp->v_mount;
369: if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) {
370: mutex_exit(vp->v_interlock);
1.46 hannken 371: VOP_UNLOCK(vp);
1.24 hannken 372: continue;
373: }
374: break;
1.1 rmind 375: }
376:
377: if (vp == NULL) {
378: if (listhd == &vnode_free_list) {
379: listhd = &vnode_hold_list;
380: goto try_nextlist;
381: }
382: mutex_exit(&vnode_free_list_lock);
1.12 hannken 383: return EBUSY;
1.1 rmind 384: }
385:
386: /* Remove it from the freelist. */
387: TAILQ_REMOVE(listhd, vp, v_freelist);
388: vp->v_freelisthd = NULL;
389: mutex_exit(&vnode_free_list_lock);
390:
391: KASSERT(vp->v_usecount == 0);
392:
393: /*
394: * The vnode is still associated with a file system, so we must
1.12 hannken 395: * clean it out before freeing it. We need to add a reference
1.24 hannken 396: * before doing this.
1.1 rmind 397: */
1.24 hannken 398: vp->v_usecount = 1;
1.29 christos 399: KASSERT((vp->v_iflag & VI_CHANGING) == 0);
400: vp->v_iflag |= VI_CHANGING;
1.25 hannken 401: vclean(vp);
1.29 christos 402: vrelel(vp, VRELEL_CHANGING_SET);
1.24 hannken 403: fstrans_done(mp);
1.12 hannken 404:
405: return 0;
1.1 rmind 406: }
407:
408: /*
1.12 hannken 409: * Helper thread to keep the number of vnodes below desiredvnodes.
410: */
411: static void
412: vdrain_thread(void *cookie)
413: {
414: int error;
415:
416: mutex_enter(&vnode_free_list_lock);
417:
418: for (;;) {
419: cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz);
420: while (numvnodes > desiredvnodes) {
421: error = cleanvnode();
422: if (error)
423: kpause("vndsbusy", false, hz, NULL);
424: mutex_enter(&vnode_free_list_lock);
425: if (error)
426: break;
427: }
428: }
429: }
430:
431: /*
1.1 rmind 432: * Remove a vnode from its freelist.
433: */
434: void
435: vremfree(vnode_t *vp)
436: {
437:
1.9 rmind 438: KASSERT(mutex_owned(vp->v_interlock));
1.1 rmind 439: KASSERT(vp->v_usecount == 0);
440:
441: /*
442: * Note that the reference count must not change until
443: * the vnode is removed.
444: */
445: mutex_enter(&vnode_free_list_lock);
446: if (vp->v_holdcnt > 0) {
447: KASSERT(vp->v_freelisthd == &vnode_hold_list);
448: } else {
449: KASSERT(vp->v_freelisthd == &vnode_free_list);
450: }
451: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
452: vp->v_freelisthd = NULL;
453: mutex_exit(&vnode_free_list_lock);
454: }
455:
456: /*
1.4 rmind 457: * vget: get a particular vnode from the free list, increment its reference
458: * count and lock it.
459: *
460: * => Should be called with v_interlock held.
461: *
1.29 christos 462: * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean().
1.4 rmind 463: * In that case, we cannot grab the vnode, so the process is awakened when
464: * the transition is completed, and an error returned to indicate that the
1.29 christos 465: * vnode is no longer usable.
1.1 rmind 466: */
467: int
1.41 riastrad 468: vget(vnode_t *vp, int flags, bool waitok)
1.1 rmind 469: {
470: int error = 0;
471:
472: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.9 rmind 473: KASSERT(mutex_owned(vp->v_interlock));
1.41 riastrad 474: KASSERT((flags & ~LK_NOWAIT) == 0);
475: KASSERT(waitok == ((flags & LK_NOWAIT) == 0));
1.1 rmind 476:
477: /*
478: * Before adding a reference, we must remove the vnode
479: * from its freelist.
480: */
481: if (vp->v_usecount == 0) {
482: vremfree(vp);
483: vp->v_usecount = 1;
484: } else {
485: atomic_inc_uint(&vp->v_usecount);
486: }
487:
488: /*
1.29 christos 489: * If the vnode is in the process of changing state we wait
490: * for the change to complete and take care not to return
491: * a clean vnode.
1.1 rmind 492: */
1.29 christos 493: if ((vp->v_iflag & VI_CHANGING) != 0) {
1.1 rmind 494: if ((flags & LK_NOWAIT) != 0) {
495: vrelel(vp, 0);
496: return EBUSY;
497: }
1.29 christos 498: vwait(vp, VI_CHANGING);
1.17 hannken 499: if ((vp->v_iflag & VI_CLEAN) != 0) {
500: vrelel(vp, 0);
501: return ENOENT;
502: }
503: }
504:
1.1 rmind 505: /*
1.41 riastrad 506: * Ok, we got it in good shape.
1.1 rmind 507: */
508: KASSERT((vp->v_iflag & VI_CLEAN) == 0);
1.9 rmind 509: mutex_exit(vp->v_interlock);
1.1 rmind 510: return error;
511: }
512:
513: /*
1.4 rmind 514: * vput: unlock and release the reference.
1.1 rmind 515: */
516: void
517: vput(vnode_t *vp)
518: {
519:
520: KASSERT((vp->v_iflag & VI_MARKER) == 0);
521:
522: VOP_UNLOCK(vp);
523: vrele(vp);
524: }
525:
526: /*
527: * Try to drop reference on a vnode. Abort if we are releasing the
528: * last reference. Note: this _must_ succeed if not the last reference.
529: */
530: static inline bool
531: vtryrele(vnode_t *vp)
532: {
533: u_int use, next;
534:
535: for (use = vp->v_usecount;; use = next) {
536: if (use == 1) {
537: return false;
538: }
1.24 hannken 539: KASSERT(use > 1);
1.1 rmind 540: next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
541: if (__predict_true(next == use)) {
542: return true;
543: }
544: }
545: }
546:
547: /*
548: * Vnode release. If reference count drops to zero, call inactive
549: * routine and either return to freelist or free to the pool.
550: */
1.23 hannken 551: static void
1.1 rmind 552: vrelel(vnode_t *vp, int flags)
553: {
554: bool recycle, defer;
555: int error;
556:
1.9 rmind 557: KASSERT(mutex_owned(vp->v_interlock));
1.1 rmind 558: KASSERT((vp->v_iflag & VI_MARKER) == 0);
559: KASSERT(vp->v_freelisthd == NULL);
560:
561: if (__predict_false(vp->v_op == dead_vnodeop_p &&
562: (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
1.11 christos 563: vnpanic(vp, "dead but not clean");
1.1 rmind 564: }
565:
566: /*
567: * If not the last reference, just drop the reference count
568: * and unlock.
569: */
570: if (vtryrele(vp)) {
1.29 christos 571: if ((flags & VRELEL_CHANGING_SET) != 0) {
572: KASSERT((vp->v_iflag & VI_CHANGING) != 0);
573: vp->v_iflag &= ~VI_CHANGING;
574: cv_broadcast(&vp->v_cv);
575: }
1.9 rmind 576: mutex_exit(vp->v_interlock);
1.1 rmind 577: return;
578: }
579: if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
1.11 christos 580: vnpanic(vp, "%s: bad ref count", __func__);
1.1 rmind 581: }
582:
583: KASSERT((vp->v_iflag & VI_XLOCK) == 0);
584:
1.15 hannken 585: #ifdef DIAGNOSTIC
586: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
587: vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
588: vprint("vrelel: missing VOP_CLOSE()", vp);
589: }
590: #endif
591:
1.1 rmind 592: /*
593: * If not clean, deactivate the vnode, but preserve
594: * our reference across the call to VOP_INACTIVE().
595: */
596: if ((vp->v_iflag & VI_CLEAN) == 0) {
597: recycle = false;
598:
599: /*
600: * XXX This ugly block can be largely eliminated if
601: * locking is pushed down into the file systems.
602: *
603: * Defer vnode release to vrele_thread if caller
1.30 hannken 604: * requests it explicitly or is the pagedaemon.
1.1 rmind 605: */
606: if ((curlwp == uvm.pagedaemon_lwp) ||
607: (flags & VRELEL_ASYNC_RELE) != 0) {
608: defer = true;
609: } else if (curlwp == vrele_lwp) {
1.17 hannken 610: /*
1.29 christos 611: * We have to try harder.
1.17 hannken 612: */
1.9 rmind 613: mutex_exit(vp->v_interlock);
1.32 hannken 614: error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.47 riastrad 615: KASSERTMSG((error == 0), "vn_lock failed: %d", error);
1.17 hannken 616: mutex_enter(vp->v_interlock);
1.1 rmind 617: defer = false;
1.4 rmind 618: } else {
1.1 rmind 619: /* If we can't acquire the lock, then defer. */
1.32 hannken 620: mutex_exit(vp->v_interlock);
621: error = vn_lock(vp,
622: LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);
1.30 hannken 623: defer = (error != 0);
1.32 hannken 624: mutex_enter(vp->v_interlock);
1.1 rmind 625: }
626:
1.30 hannken 627: KASSERT(mutex_owned(vp->v_interlock));
628: KASSERT(! (curlwp == vrele_lwp && defer));
629:
1.1 rmind 630: if (defer) {
631: /*
632: * Defer reclaim to the kthread; it's not safe to
633: * clean it here. We donate it our last reference.
634: */
1.29 christos 635: if ((flags & VRELEL_CHANGING_SET) != 0) {
636: KASSERT((vp->v_iflag & VI_CHANGING) != 0);
637: vp->v_iflag &= ~VI_CHANGING;
638: cv_broadcast(&vp->v_cv);
639: }
1.1 rmind 640: mutex_enter(&vrele_lock);
641: TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
642: if (++vrele_pending > (desiredvnodes >> 8))
643: cv_signal(&vrele_cv);
644: mutex_exit(&vrele_lock);
1.9 rmind 645: mutex_exit(vp->v_interlock);
1.1 rmind 646: return;
647: }
648:
1.32 hannken 649: /*
650: * If the node got another reference while we
651: * released the interlock, don't try to inactivate it yet.
652: */
653: if (__predict_false(vtryrele(vp))) {
654: VOP_UNLOCK(vp);
655: if ((flags & VRELEL_CHANGING_SET) != 0) {
656: KASSERT((vp->v_iflag & VI_CHANGING) != 0);
657: vp->v_iflag &= ~VI_CHANGING;
658: cv_broadcast(&vp->v_cv);
659: }
660: mutex_exit(vp->v_interlock);
661: return;
662: }
663:
1.29 christos 664: if ((flags & VRELEL_CHANGING_SET) == 0) {
665: KASSERT((vp->v_iflag & VI_CHANGING) == 0);
666: vp->v_iflag |= VI_CHANGING;
667: }
668: mutex_exit(vp->v_interlock);
669:
1.1 rmind 670: /*
671: * The vnode can gain another reference while being
672: * deactivated. If VOP_INACTIVE() indicates that
673: * the described file has been deleted, then recycle
674: * the vnode irrespective of additional references.
675: * Another thread may be waiting to re-use the on-disk
676: * inode.
677: *
678: * Note that VOP_INACTIVE() will drop the vnode lock.
679: */
680: VOP_INACTIVE(vp, &recycle);
1.46 hannken 681: if (recycle) {
682: /* vclean() below will drop the lock. */
683: if (vn_lock(vp, LK_EXCLUSIVE) != 0)
684: recycle = false;
685: }
1.9 rmind 686: mutex_enter(vp->v_interlock);
1.1 rmind 687: if (!recycle) {
688: if (vtryrele(vp)) {
1.29 christos 689: KASSERT((vp->v_iflag & VI_CHANGING) != 0);
690: vp->v_iflag &= ~VI_CHANGING;
691: cv_broadcast(&vp->v_cv);
1.9 rmind 692: mutex_exit(vp->v_interlock);
1.1 rmind 693: return;
694: }
695: }
696:
697: /* Take care of space accounting. */
698: if (vp->v_iflag & VI_EXECMAP) {
699: atomic_add_int(&uvmexp.execpages,
700: -vp->v_uobj.uo_npages);
701: atomic_add_int(&uvmexp.filepages,
702: vp->v_uobj.uo_npages);
703: }
704: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
705: vp->v_vflag &= ~VV_MAPPED;
706:
707: /*
708: * Recycle the vnode if the file is now unused (unlinked),
709: * otherwise just free it.
710: */
711: if (recycle) {
1.25 hannken 712: vclean(vp);
1.1 rmind 713: }
714: KASSERT(vp->v_usecount > 0);
1.29 christos 715: } else { /* vnode was already clean */
716: if ((flags & VRELEL_CHANGING_SET) == 0) {
717: KASSERT((vp->v_iflag & VI_CHANGING) == 0);
718: vp->v_iflag |= VI_CHANGING;
719: }
1.1 rmind 720: }
721:
722: if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
723: /* Gained another reference while being reclaimed. */
1.29 christos 724: KASSERT((vp->v_iflag & VI_CHANGING) != 0);
725: vp->v_iflag &= ~VI_CHANGING;
726: cv_broadcast(&vp->v_cv);
1.9 rmind 727: mutex_exit(vp->v_interlock);
1.1 rmind 728: return;
729: }
730:
731: if ((vp->v_iflag & VI_CLEAN) != 0) {
732: /*
733: * It's clean so destroy it. It isn't referenced
734: * anywhere since it has been reclaimed.
735: */
736: KASSERT(vp->v_holdcnt == 0);
737: KASSERT(vp->v_writecount == 0);
1.9 rmind 738: mutex_exit(vp->v_interlock);
1.1 rmind 739: vfs_insmntque(vp, NULL);
740: if (vp->v_type == VBLK || vp->v_type == VCHR) {
741: spec_node_destroy(vp);
742: }
743: vnfree(vp);
744: } else {
745: /*
746: * Otherwise, put it back onto the freelist. It
747: * can't be destroyed while still associated with
748: * a file system.
749: */
750: mutex_enter(&vnode_free_list_lock);
751: if (vp->v_holdcnt > 0) {
752: vp->v_freelisthd = &vnode_hold_list;
753: } else {
754: vp->v_freelisthd = &vnode_free_list;
755: }
756: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
757: mutex_exit(&vnode_free_list_lock);
1.29 christos 758: KASSERT((vp->v_iflag & VI_CHANGING) != 0);
759: vp->v_iflag &= ~VI_CHANGING;
760: cv_broadcast(&vp->v_cv);
1.9 rmind 761: mutex_exit(vp->v_interlock);
1.1 rmind 762: }
763: }
764:
765: void
766: vrele(vnode_t *vp)
767: {
768:
769: KASSERT((vp->v_iflag & VI_MARKER) == 0);
770:
1.29 christos 771: if (vtryrele(vp)) {
1.1 rmind 772: return;
773: }
1.9 rmind 774: mutex_enter(vp->v_interlock);
1.1 rmind 775: vrelel(vp, 0);
776: }
777:
778: /*
779: * Asynchronous vnode release, vnode is released in different context.
780: */
781: void
782: vrele_async(vnode_t *vp)
783: {
784:
785: KASSERT((vp->v_iflag & VI_MARKER) == 0);
786:
1.29 christos 787: if (vtryrele(vp)) {
1.1 rmind 788: return;
789: }
1.9 rmind 790: mutex_enter(vp->v_interlock);
1.1 rmind 791: vrelel(vp, VRELEL_ASYNC_RELE);
792: }
793:
794: static void
795: vrele_thread(void *cookie)
796: {
1.34 hannken 797: vnodelst_t skip_list;
1.1 rmind 798: vnode_t *vp;
1.34 hannken 799: struct mount *mp;
800:
801: TAILQ_INIT(&skip_list);
1.1 rmind 802:
1.34 hannken 803: mutex_enter(&vrele_lock);
1.1 rmind 804: for (;;) {
805: while (TAILQ_EMPTY(&vrele_list)) {
806: vrele_gen++;
807: cv_broadcast(&vrele_cv);
808: cv_timedwait(&vrele_cv, &vrele_lock, hz);
1.34 hannken 809: TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist);
1.1 rmind 810: }
811: vp = TAILQ_FIRST(&vrele_list);
1.34 hannken 812: mp = vp->v_mount;
1.1 rmind 813: TAILQ_REMOVE(&vrele_list, vp, v_freelist);
1.34 hannken 814: if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) {
815: TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist);
816: continue;
817: }
1.1 rmind 818: vrele_pending--;
819: mutex_exit(&vrele_lock);
820:
821: /*
822: * If not the last reference, then ignore the vnode
823: * and look for more work.
824: */
1.9 rmind 825: mutex_enter(vp->v_interlock);
1.1 rmind 826: vrelel(vp, 0);
1.34 hannken 827: fstrans_done(mp);
828: mutex_enter(&vrele_lock);
1.1 rmind 829: }
830: }
831:
1.2 rmind 832: void
833: vrele_flush(void)
834: {
835: int gen;
836:
837: mutex_enter(&vrele_lock);
838: gen = vrele_gen;
839: while (vrele_pending && gen == vrele_gen) {
840: cv_broadcast(&vrele_cv);
841: cv_wait(&vrele_cv, &vrele_lock);
842: }
843: mutex_exit(&vrele_lock);
844: }
845:
1.1 rmind 846: /*
847: * Vnode reference, where a reference is already held by some other
848: * object (for example, a file structure).
849: */
850: void
851: vref(vnode_t *vp)
852: {
853:
854: KASSERT((vp->v_iflag & VI_MARKER) == 0);
855: KASSERT(vp->v_usecount != 0);
856:
857: atomic_inc_uint(&vp->v_usecount);
858: }
859:
860: /*
861: * Page or buffer structure gets a reference.
862: * Called with v_interlock held.
863: */
864: void
865: vholdl(vnode_t *vp)
866: {
867:
1.9 rmind 868: KASSERT(mutex_owned(vp->v_interlock));
1.1 rmind 869: KASSERT((vp->v_iflag & VI_MARKER) == 0);
870:
871: if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
872: mutex_enter(&vnode_free_list_lock);
873: KASSERT(vp->v_freelisthd == &vnode_free_list);
874: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
875: vp->v_freelisthd = &vnode_hold_list;
876: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
877: mutex_exit(&vnode_free_list_lock);
878: }
879: }
880:
881: /*
882: * Page or buffer structure frees a reference.
883: * Called with v_interlock held.
884: */
885: void
886: holdrelel(vnode_t *vp)
887: {
888:
1.9 rmind 889: KASSERT(mutex_owned(vp->v_interlock));
1.1 rmind 890: KASSERT((vp->v_iflag & VI_MARKER) == 0);
891:
892: if (vp->v_holdcnt <= 0) {
1.11 christos 893: vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
1.1 rmind 894: }
895:
896: vp->v_holdcnt--;
897: if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
898: mutex_enter(&vnode_free_list_lock);
899: KASSERT(vp->v_freelisthd == &vnode_hold_list);
900: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
901: vp->v_freelisthd = &vnode_free_list;
902: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
903: mutex_exit(&vnode_free_list_lock);
904: }
905: }
906:
907: /*
908: * Disassociate the underlying file system from a vnode.
909: *
1.46 hannken 910: * Must be called with vnode locked and will return unlocked.
1.1 rmind 911: * Must be called with the interlock held, and will return with it held.
912: */
1.25 hannken 913: static void
914: vclean(vnode_t *vp)
1.1 rmind 915: {
916: lwp_t *l = curlwp;
1.43 hannken 917: bool recycle, active;
1.1 rmind 918: int error;
919:
1.46 hannken 920: KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 ||
921: VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
1.9 rmind 922: KASSERT(mutex_owned(vp->v_interlock));
1.1 rmind 923: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.46 hannken 924: KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0);
1.1 rmind 925: KASSERT(vp->v_usecount != 0);
926:
1.32 hannken 927: active = (vp->v_usecount > 1);
1.1 rmind 928: /*
929: * Prevent the vnode from being recycled or brought into use
930: * while we clean it out.
931: */
932: vp->v_iflag |= VI_XLOCK;
933: if (vp->v_iflag & VI_EXECMAP) {
934: atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
935: atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
936: }
937: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1.9 rmind 938: mutex_exit(vp->v_interlock);
1.23 hannken 939:
1.1 rmind 940: /*
941: * Clean out any cached data associated with the vnode.
942: * If purging an active vnode, it must be closed and
943: * deactivated before being reclaimed. Note that the
944: * VOP_INACTIVE will unlock the vnode.
945: */
1.43 hannken 946: error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
947: if (error != 0) {
948: if (wapbl_vphaswapbl(vp))
949: WAPBL_DISCARD(wapbl_vptomp(vp));
950: error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
951: }
1.47 riastrad 952: KASSERTMSG((error == 0), "vinvalbuf failed: %d", error);
1.43 hannken 953: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
954: if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
955: spec_node_revoke(vp);
1.1 rmind 956: }
957: if (active) {
958: VOP_INACTIVE(vp, &recycle);
959: } else {
960: /*
961: * Any other processes trying to obtain this lock must first
962: * wait for VI_XLOCK to clear, then call the new lock operation.
963: */
964: VOP_UNLOCK(vp);
965: }
966:
967: /* Disassociate the underlying file system from the vnode. */
968: if (VOP_RECLAIM(vp)) {
1.11 christos 969: vnpanic(vp, "%s: cannot reclaim", __func__);
1.1 rmind 970: }
971:
1.7 rmind 972: KASSERT(vp->v_data == NULL);
1.1 rmind 973: KASSERT(vp->v_uobj.uo_npages == 0);
1.7 rmind 974:
1.1 rmind 975: if (vp->v_type == VREG && vp->v_ractx != NULL) {
976: uvm_ra_freectx(vp->v_ractx);
977: vp->v_ractx = NULL;
978: }
1.7 rmind 979:
980: /* Purge name cache. */
1.1 rmind 981: cache_purge(vp);
982:
1.31 hannken 983: /* Move to dead mount. */
984: vp->v_vflag &= ~VV_ROOT;
1.44 hannken 985: atomic_inc_uint(&dead_rootmount->mnt_refcnt);
986: vfs_insmntque(vp, dead_rootmount);
1.23 hannken 987:
1.1 rmind 988: /* Done with purge, notify sleepers of the grim news. */
1.9 rmind 989: mutex_enter(vp->v_interlock);
1.43 hannken 990: vp->v_op = dead_vnodeop_p;
991: vp->v_vflag |= VV_LOCKSWORK;
992: vp->v_iflag |= VI_CLEAN;
1.1 rmind 993: vp->v_tag = VT_NON;
994: KNOTE(&vp->v_klist, NOTE_REVOKE);
995: vp->v_iflag &= ~VI_XLOCK;
996: cv_broadcast(&vp->v_cv);
997:
998: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
999: }
1000:
1001: /*
1.33 hannken 1002: * Recycle an unused vnode if caller holds the last reference.
1.1 rmind 1003: */
1.33 hannken 1004: bool
1005: vrecycle(vnode_t *vp)
1.1 rmind 1006: {
1007:
1.46 hannken 1008: if (vn_lock(vp, LK_EXCLUSIVE) != 0)
1009: return false;
1010:
1.33 hannken 1011: mutex_enter(vp->v_interlock);
1012:
1.1 rmind 1013: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1014:
1.33 hannken 1015: if (vp->v_usecount != 1) {
1.9 rmind 1016: mutex_exit(vp->v_interlock);
1.46 hannken 1017: VOP_UNLOCK(vp);
1.33 hannken 1018: return false;
1.1 rmind 1019: }
1.33 hannken 1020: if ((vp->v_iflag & VI_CHANGING) != 0)
1021: vwait(vp, VI_CHANGING);
1022: if (vp->v_usecount != 1) {
1023: mutex_exit(vp->v_interlock);
1.46 hannken 1024: VOP_UNLOCK(vp);
1.33 hannken 1025: return false;
1.1 rmind 1026: }
1.46 hannken 1027: KASSERT((vp->v_iflag & VI_CLEAN) == 0);
1.29 christos 1028: vp->v_iflag |= VI_CHANGING;
1.25 hannken 1029: vclean(vp);
1.29 christos 1030: vrelel(vp, VRELEL_CHANGING_SET);
1.33 hannken 1031: return true;
1.1 rmind 1032: }
1033:
1034: /*
1035: * Eliminate all activity associated with the requested vnode
1036: * and with all vnodes aliased to the requested vnode.
1037: */
1038: void
1039: vrevoke(vnode_t *vp)
1040: {
1.19 hannken 1041: vnode_t *vq;
1.1 rmind 1042: enum vtype type;
1043: dev_t dev;
1044:
1045: KASSERT(vp->v_usecount > 0);
1046:
1.9 rmind 1047: mutex_enter(vp->v_interlock);
1.1 rmind 1048: if ((vp->v_iflag & VI_CLEAN) != 0) {
1.9 rmind 1049: mutex_exit(vp->v_interlock);
1.1 rmind 1050: return;
1051: } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
1052: atomic_inc_uint(&vp->v_usecount);
1.29 christos 1053: mutex_exit(vp->v_interlock);
1054: vgone(vp);
1.1 rmind 1055: return;
1056: } else {
1057: dev = vp->v_rdev;
1058: type = vp->v_type;
1.9 rmind 1059: mutex_exit(vp->v_interlock);
1.1 rmind 1060: }
1061:
1.19 hannken 1062: while (spec_node_lookup_by_dev(type, dev, &vq) == 0) {
1.29 christos 1063: vgone(vq);
1.1 rmind 1064: }
1065: }
1066:
1067: /*
1068: * Eliminate all activity associated with a vnode in preparation for
1069: * reuse. Drops a reference from the vnode.
1070: */
1071: void
1072: vgone(vnode_t *vp)
1073: {
1074:
1.46 hannken 1075: if (vn_lock(vp, LK_EXCLUSIVE) != 0) {
1076: KASSERT((vp->v_iflag & VI_CLEAN) != 0);
1077: vrele(vp);
1078: }
1079:
1.9 rmind 1080: mutex_enter(vp->v_interlock);
1.29 christos 1081: if ((vp->v_iflag & VI_CHANGING) != 0)
1082: vwait(vp, VI_CHANGING);
1083: vp->v_iflag |= VI_CHANGING;
1.25 hannken 1084: vclean(vp);
1.29 christos 1085: vrelel(vp, VRELEL_CHANGING_SET);
1.1 rmind 1086: }
1087:
1.36 hannken 1088: static inline uint32_t
1089: vcache_hash(const struct vcache_key *key)
1090: {
1091: uint32_t hash = HASH32_BUF_INIT;
1092:
1093: hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash);
1094: hash = hash32_buf(key->vk_key, key->vk_key_len, hash);
1095: return hash;
1096: }
1097:
1098: static void
1099: vcache_init(void)
1100: {
1101:
1102: vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0,
1103: "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
1104: KASSERT(vcache.pool != NULL);
1105: mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE);
1106: vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true,
1107: &vcache.hashmask);
1108: }
1109:
1110: static void
1111: vcache_reinit(void)
1112: {
1113: int i;
1114: uint32_t hash;
1115: u_long oldmask, newmask;
1116: struct hashhead *oldtab, *newtab;
1117: struct vcache_node *node;
1118:
1119: newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask);
1120: mutex_enter(&vcache.lock);
1121: oldtab = vcache.hashtab;
1122: oldmask = vcache.hashmask;
1123: vcache.hashtab = newtab;
1124: vcache.hashmask = newmask;
1125: for (i = 0; i <= oldmask; i++) {
1126: while ((node = SLIST_FIRST(&oldtab[i])) != NULL) {
1127: SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash);
1128: hash = vcache_hash(&node->vn_key);
1129: SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask],
1130: node, vn_hash);
1131: }
1132: }
1133: mutex_exit(&vcache.lock);
1134: hashdone(oldtab, HASH_SLIST, oldmask);
1135: }
1136:
1137: static inline struct vcache_node *
1138: vcache_hash_lookup(const struct vcache_key *key, uint32_t hash)
1139: {
1140: struct hashhead *hashp;
1141: struct vcache_node *node;
1142:
1143: KASSERT(mutex_owned(&vcache.lock));
1144:
1145: hashp = &vcache.hashtab[hash & vcache.hashmask];
1146: SLIST_FOREACH(node, hashp, vn_hash) {
1147: if (key->vk_mount != node->vn_key.vk_mount)
1148: continue;
1149: if (key->vk_key_len != node->vn_key.vk_key_len)
1150: continue;
1151: if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len))
1152: continue;
1153: return node;
1154: }
1155: return NULL;
1156: }
1157:
1158: /*
1159: * Get a vnode / fs node pair by key and return it referenced through vpp.
1160: */
1161: int
1162: vcache_get(struct mount *mp, const void *key, size_t key_len,
1163: struct vnode **vpp)
1164: {
1165: int error;
1166: uint32_t hash;
1167: const void *new_key;
1168: struct vnode *vp;
1169: struct vcache_key vcache_key;
1170: struct vcache_node *node, *new_node;
1171:
1172: new_key = NULL;
1173: *vpp = NULL;
1174:
1175: vcache_key.vk_mount = mp;
1176: vcache_key.vk_key = key;
1177: vcache_key.vk_key_len = key_len;
1178: hash = vcache_hash(&vcache_key);
1179:
1180: again:
1181: mutex_enter(&vcache.lock);
1182: node = vcache_hash_lookup(&vcache_key, hash);
1183:
1184: /* If found, take a reference or retry. */
1185: if (__predict_true(node != NULL && node->vn_vnode != NULL)) {
1186: vp = node->vn_vnode;
1187: mutex_enter(vp->v_interlock);
1188: mutex_exit(&vcache.lock);
1.41 riastrad 1189: error = vget(vp, 0, true /* wait */);
1.36 hannken 1190: if (error == ENOENT)
1191: goto again;
1192: if (error == 0)
1193: *vpp = vp;
1194: KASSERT((error != 0) == (*vpp == NULL));
1195: return error;
1196: }
1197:
1198: /* If another thread loads this node, wait and retry. */
1199: if (node != NULL) {
1200: KASSERT(node->vn_vnode == NULL);
1201: mutex_exit(&vcache.lock);
1202: kpause("vcache", false, mstohz(20), NULL);
1203: goto again;
1204: }
1205: mutex_exit(&vcache.lock);
1206:
1207: /* Allocate and initialize a new vcache / vnode pair. */
1208: error = vfs_busy(mp, NULL);
1209: if (error)
1210: return error;
1211: new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1212: new_node->vn_vnode = NULL;
1213: new_node->vn_key = vcache_key;
1214: vp = vnalloc(NULL);
1215: mutex_enter(&vcache.lock);
1216: node = vcache_hash_lookup(&vcache_key, hash);
1217: if (node == NULL) {
1218: SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask],
1219: new_node, vn_hash);
1220: node = new_node;
1221: }
1222: mutex_exit(&vcache.lock);
1223:
1224: /* If another thread beat us inserting this node, retry. */
1225: if (node != new_node) {
1226: pool_cache_put(vcache.pool, new_node);
1227: KASSERT(vp->v_usecount == 1);
1228: vp->v_usecount = 0;
1229: vnfree(vp);
1230: vfs_unbusy(mp, false, NULL);
1231: goto again;
1232: }
1233:
1234: /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */
1.39 hannken 1235: vp->v_iflag |= VI_CHANGING;
1.36 hannken 1236: error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key);
1237: if (error) {
1238: mutex_enter(&vcache.lock);
1239: SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1240: new_node, vcache_node, vn_hash);
1241: mutex_exit(&vcache.lock);
1242: pool_cache_put(vcache.pool, new_node);
1243: KASSERT(vp->v_usecount == 1);
1244: vp->v_usecount = 0;
1245: vnfree(vp);
1246: vfs_unbusy(mp, false, NULL);
1247: KASSERT(*vpp == NULL);
1248: return error;
1249: }
1250: KASSERT(new_key != NULL);
1251: KASSERT(memcmp(key, new_key, key_len) == 0);
1252: KASSERT(vp->v_op != NULL);
1253: vfs_insmntque(vp, mp);
1254: if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1255: vp->v_vflag |= VV_MPSAFE;
1256: vfs_unbusy(mp, true, NULL);
1257:
1258: /* Finished loading, finalize node. */
1259: mutex_enter(&vcache.lock);
1260: new_node->vn_key.vk_key = new_key;
1261: new_node->vn_vnode = vp;
1262: mutex_exit(&vcache.lock);
1.39 hannken 1263: mutex_enter(vp->v_interlock);
1264: vp->v_iflag &= ~VI_CHANGING;
1265: cv_broadcast(&vp->v_cv);
1266: mutex_exit(vp->v_interlock);
1.36 hannken 1267: *vpp = vp;
1268: return 0;
1269: }
1270:
1271: /*
1.40 hannken 1272: * Create a new vnode / fs node pair and return it referenced through vpp.
1273: */
1274: int
1275: vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap,
1276: kauth_cred_t cred, struct vnode **vpp)
1277: {
1278: int error;
1279: uint32_t hash;
1280: struct vnode *vp;
1281: struct vcache_node *new_node;
1282: struct vcache_node *old_node __diagused;
1283:
1284: *vpp = NULL;
1285:
1286: /* Allocate and initialize a new vcache / vnode pair. */
1287: error = vfs_busy(mp, NULL);
1288: if (error)
1289: return error;
1290: new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1291: new_node->vn_key.vk_mount = mp;
1292: new_node->vn_vnode = NULL;
1293: vp = vnalloc(NULL);
1294:
1295: /* Create and load the fs node. */
1296: vp->v_iflag |= VI_CHANGING;
1297: error = VFS_NEWVNODE(mp, dvp, vp, vap, cred,
1298: &new_node->vn_key.vk_key_len, &new_node->vn_key.vk_key);
1299: if (error) {
1300: pool_cache_put(vcache.pool, new_node);
1301: KASSERT(vp->v_usecount == 1);
1302: vp->v_usecount = 0;
1303: vnfree(vp);
1304: vfs_unbusy(mp, false, NULL);
1305: KASSERT(*vpp == NULL);
1306: return error;
1307: }
1308: KASSERT(new_node->vn_key.vk_key != NULL);
1309: KASSERT(vp->v_op != NULL);
1310: hash = vcache_hash(&new_node->vn_key);
1311:
1312: /* Wait for previous instance to be reclaimed, then insert new node. */
1313: mutex_enter(&vcache.lock);
1314: while ((old_node = vcache_hash_lookup(&new_node->vn_key, hash))) {
1315: #ifdef DIAGNOSTIC
1316: if (old_node->vn_vnode != NULL)
1317: mutex_enter(old_node->vn_vnode->v_interlock);
1318: KASSERT(old_node->vn_vnode == NULL ||
1319: (old_node->vn_vnode->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0);
1320: if (old_node->vn_vnode != NULL)
1321: mutex_exit(old_node->vn_vnode->v_interlock);
1322: #endif
1323: mutex_exit(&vcache.lock);
1324: kpause("vcache", false, mstohz(20), NULL);
1325: mutex_enter(&vcache.lock);
1326: }
1327: SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask],
1328: new_node, vn_hash);
1329: mutex_exit(&vcache.lock);
1330: vfs_insmntque(vp, mp);
1331: if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1332: vp->v_vflag |= VV_MPSAFE;
1333: vfs_unbusy(mp, true, NULL);
1334:
1335: /* Finished loading, finalize node. */
1336: mutex_enter(&vcache.lock);
1337: new_node->vn_vnode = vp;
1338: mutex_exit(&vcache.lock);
1339: mutex_enter(vp->v_interlock);
1340: vp->v_iflag &= ~VI_CHANGING;
1341: cv_broadcast(&vp->v_cv);
1342: mutex_exit(vp->v_interlock);
1343: *vpp = vp;
1344: return 0;
1345: }
1346:
1347: /*
1.37 hannken 1348: * Prepare key change: lock old and new cache node.
1349: * Return an error if the new node already exists.
1350: */
1351: int
1352: vcache_rekey_enter(struct mount *mp, struct vnode *vp,
1353: const void *old_key, size_t old_key_len,
1354: const void *new_key, size_t new_key_len)
1355: {
1356: uint32_t old_hash, new_hash;
1357: struct vcache_key old_vcache_key, new_vcache_key;
1358: struct vcache_node *node, *new_node;
1359:
1360: old_vcache_key.vk_mount = mp;
1361: old_vcache_key.vk_key = old_key;
1362: old_vcache_key.vk_key_len = old_key_len;
1363: old_hash = vcache_hash(&old_vcache_key);
1364:
1365: new_vcache_key.vk_mount = mp;
1366: new_vcache_key.vk_key = new_key;
1367: new_vcache_key.vk_key_len = new_key_len;
1368: new_hash = vcache_hash(&new_vcache_key);
1369:
1370: new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1371: new_node->vn_vnode = NULL;
1372: new_node->vn_key = new_vcache_key;
1373:
1374: mutex_enter(&vcache.lock);
1.49 ! hannken 1375:
! 1376: /* Insert locked new node used as placeholder. */
1.37 hannken 1377: node = vcache_hash_lookup(&new_vcache_key, new_hash);
1378: if (node != NULL) {
1379: mutex_exit(&vcache.lock);
1380: pool_cache_put(vcache.pool, new_node);
1381: return EEXIST;
1382: }
1383: SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask],
1384: new_node, vn_hash);
1.49 ! hannken 1385:
! 1386: /* Lock old node. */
1.37 hannken 1387: node = vcache_hash_lookup(&old_vcache_key, old_hash);
1388: KASSERT(node != NULL);
1389: KASSERT(node->vn_vnode == vp);
1390: node->vn_vnode = NULL;
1391: node->vn_key = old_vcache_key;
1392: mutex_exit(&vcache.lock);
1393: return 0;
1394: }
1395:
1396: /*
1397: * Key change complete: remove old node and unlock new node.
1398: */
1399: void
1400: vcache_rekey_exit(struct mount *mp, struct vnode *vp,
1401: const void *old_key, size_t old_key_len,
1402: const void *new_key, size_t new_key_len)
1403: {
1404: uint32_t old_hash, new_hash;
1405: struct vcache_key old_vcache_key, new_vcache_key;
1.49 ! hannken 1406: struct vcache_node *old_node, *new_node;
1.37 hannken 1407:
1408: old_vcache_key.vk_mount = mp;
1409: old_vcache_key.vk_key = old_key;
1410: old_vcache_key.vk_key_len = old_key_len;
1411: old_hash = vcache_hash(&old_vcache_key);
1412:
1413: new_vcache_key.vk_mount = mp;
1414: new_vcache_key.vk_key = new_key;
1415: new_vcache_key.vk_key_len = new_key_len;
1416: new_hash = vcache_hash(&new_vcache_key);
1417:
1418: mutex_enter(&vcache.lock);
1.49 ! hannken 1419:
! 1420: /* Lookup old and new node. */
! 1421: old_node = vcache_hash_lookup(&old_vcache_key, old_hash);
! 1422: KASSERT(old_node != NULL);
! 1423: KASSERT(old_node->vn_vnode == NULL);
! 1424: new_node = vcache_hash_lookup(&new_vcache_key, new_hash);
! 1425: KASSERT(new_node != NULL && new_node->vn_vnode == NULL);
! 1426: KASSERT(new_node->vn_key.vk_key_len == new_key_len);
! 1427:
! 1428: /* Rekey old node and put it onto its new hashlist. */
! 1429: old_node->vn_vnode = vp;
! 1430: old_node->vn_key = new_vcache_key;
! 1431: if (old_hash != new_hash) {
! 1432: SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask],
! 1433: old_node, vcache_node, vn_hash);
! 1434: SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask],
! 1435: old_node, vn_hash);
! 1436: }
! 1437:
! 1438: /* Remove new node used as placeholder. */
! 1439: SLIST_REMOVE(&vcache.hashtab[new_hash & vcache.hashmask],
! 1440: new_node, vcache_node, vn_hash);
1.37 hannken 1441: mutex_exit(&vcache.lock);
1.49 ! hannken 1442: pool_cache_put(vcache.pool, new_node);
1.37 hannken 1443: }
1444:
1445: /*
1.36 hannken 1446: * Remove a vnode / fs node pair from the cache.
1447: */
1448: void
1449: vcache_remove(struct mount *mp, const void *key, size_t key_len)
1450: {
1451: uint32_t hash;
1452: struct vcache_key vcache_key;
1453: struct vcache_node *node;
1454:
1455: vcache_key.vk_mount = mp;
1456: vcache_key.vk_key = key;
1457: vcache_key.vk_key_len = key_len;
1458: hash = vcache_hash(&vcache_key);
1459:
1460: mutex_enter(&vcache.lock);
1461: node = vcache_hash_lookup(&vcache_key, hash);
1462: KASSERT(node != NULL);
1463: SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1464: node, vcache_node, vn_hash);
1465: mutex_exit(&vcache.lock);
1466: pool_cache_put(vcache.pool, node);
1467: }
1468:
1.1 rmind 1469: /*
1470: * Update outstanding I/O count and do wakeup if requested.
1471: */
1472: void
1473: vwakeup(struct buf *bp)
1474: {
1475: vnode_t *vp;
1476:
1477: if ((vp = bp->b_vp) == NULL)
1478: return;
1479:
1.9 rmind 1480: KASSERT(bp->b_objlock == vp->v_interlock);
1.1 rmind 1481: KASSERT(mutex_owned(bp->b_objlock));
1482:
1483: if (--vp->v_numoutput < 0)
1.11 christos 1484: vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp);
1.1 rmind 1485: if (vp->v_numoutput == 0)
1486: cv_broadcast(&vp->v_cv);
1487: }
1488:
1489: /*
1.35 hannken 1490: * Test a vnode for being or becoming dead. Returns one of:
1491: * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only.
1492: * ENOENT: vnode is dead.
1493: * 0: otherwise.
1494: *
1495: * Whenever this function returns a non-zero value all future
1496: * calls will also return a non-zero value.
1497: */
1498: int
1499: vdead_check(struct vnode *vp, int flags)
1500: {
1501:
1502: KASSERT(mutex_owned(vp->v_interlock));
1503: if (ISSET(vp->v_iflag, VI_XLOCK)) {
1504: if (ISSET(flags, VDEAD_NOWAIT))
1505: return EBUSY;
1506: vwait(vp, VI_XLOCK);
1507: KASSERT(ISSET(vp->v_iflag, VI_CLEAN));
1508: }
1509: if (ISSET(vp->v_iflag, VI_CLEAN))
1510: return ENOENT;
1511: return 0;
1512: }
1513:
1514: /*
1.1 rmind 1515: * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
1516: * recycled.
1517: */
1.35 hannken 1518: static void
1.1 rmind 1519: vwait(vnode_t *vp, int flags)
1520: {
1521:
1.9 rmind 1522: KASSERT(mutex_owned(vp->v_interlock));
1.1 rmind 1523: KASSERT(vp->v_usecount != 0);
1524:
1525: while ((vp->v_iflag & flags) != 0)
1.9 rmind 1526: cv_wait(&vp->v_cv, vp->v_interlock);
1.1 rmind 1527: }
1528:
1529: int
1.3 rmind 1530: vfs_drainvnodes(long target)
1.1 rmind 1531: {
1.12 hannken 1532: int error;
1533:
1534: mutex_enter(&vnode_free_list_lock);
1.1 rmind 1535:
1536: while (numvnodes > target) {
1.12 hannken 1537: error = cleanvnode();
1538: if (error != 0)
1539: return error;
1.1 rmind 1540: mutex_enter(&vnode_free_list_lock);
1541: }
1.12 hannken 1542:
1543: mutex_exit(&vnode_free_list_lock);
1544:
1.36 hannken 1545: vcache_reinit();
1546:
1.1 rmind 1547: return 0;
1548: }
1549:
1550: void
1.11 christos 1551: vnpanic(vnode_t *vp, const char *fmt, ...)
1.1 rmind 1552: {
1.11 christos 1553: va_list ap;
1554:
1.1 rmind 1555: #ifdef DIAGNOSTIC
1556: vprint(NULL, vp);
1557: #endif
1.11 christos 1558: va_start(ap, fmt);
1559: vpanic(fmt, ap);
1560: va_end(ap);
1.1 rmind 1561: }
CVSweb <webmaster@jp.NetBSD.org>