Annotation of src/sys/kern/vfs_subr.c, Revision 1.336.2.1
1.336.2.1! yamt 1: /* $NetBSD: vfs_subr.c,v 1.336 2008/04/04 20:13:18 cegger Exp $ */
1.74 thorpej 2:
3: /*-
1.315 ad 4: * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.302 ad 9: * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
1.74 thorpej 10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30: * POSSIBILITY OF SUCH DAMAGE.
31: */
1.32 cgd 32:
1.29 cgd 33: /*
1.30 mycroft 34: * Copyright (c) 1989, 1993
35: * The Regents of the University of California. All rights reserved.
1.29 cgd 36: * (c) UNIX System Laboratories, Inc.
37: * All or some portions of this file are derived from material licensed
38: * to the University of California by American Telephone and Telegraph
39: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40: * the permission of UNIX System Laboratories, Inc.
41: *
42: * Redistribution and use in source and binary forms, with or without
43: * modification, are permitted provided that the following conditions
44: * are met:
45: * 1. Redistributions of source code must retain the above copyright
46: * notice, this list of conditions and the following disclaimer.
47: * 2. Redistributions in binary form must reproduce the above copyright
48: * notice, this list of conditions and the following disclaimer in the
49: * documentation and/or other materials provided with the distribution.
1.204 agc 50: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 51: * may be used to endorse or promote products derived from this software
52: * without specific prior written permission.
53: *
54: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64: * SUCH DAMAGE.
65: *
1.32 cgd 66: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 67: */
68:
69: /*
1.296 pooka 70: * External virtual filesystem routines.
71: *
72: * This file contains vfs subroutines which are heavily dependant on
73: * the kernel and are not suitable for standalone use. Examples include
74: * routines involved vnode and mountpoint management.
1.29 cgd 75: */
1.162 lukem 76:
77: #include <sys/cdefs.h>
1.336.2.1! yamt 78: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.336 2008/04/04 20:13:18 cegger Exp $");
1.78 mrg 79:
1.125 chs 80: #include "opt_ddb.h"
1.95 thorpej 81: #include "opt_compat_netbsd.h"
1.97 christos 82: #include "opt_compat_43.h"
1.29 cgd 83:
84: #include <sys/param.h>
1.30 mycroft 85: #include <sys/systm.h>
1.29 cgd 86: #include <sys/proc.h>
1.138 bouyer 87: #include <sys/kernel.h>
1.29 cgd 88: #include <sys/mount.h>
1.46 mycroft 89: #include <sys/fcntl.h>
1.29 cgd 90: #include <sys/vnode.h>
1.30 mycroft 91: #include <sys/stat.h>
1.29 cgd 92: #include <sys/namei.h>
93: #include <sys/ucred.h>
94: #include <sys/buf.h>
95: #include <sys/errno.h>
96: #include <sys/malloc.h>
1.51 christos 97: #include <sys/syscallargs.h>
1.58 thorpej 98: #include <sys/device.h>
1.192 christos 99: #include <sys/filedesc.h>
1.266 elad 100: #include <sys/kauth.h>
1.307 ad 101: #include <sys/atomic.h>
1.309 ad 102: #include <sys/kthread.h>
1.50 christos 103:
1.30 mycroft 104: #include <miscfs/specfs/specdev.h>
1.113 fvdl 105: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 106:
1.125 chs 107: #include <uvm/uvm.h>
1.255 yamt 108: #include <uvm/uvm_readahead.h>
1.125 chs 109: #include <uvm/uvm_ddb.h>
1.129 mrg 110:
111: #include <sys/sysctl.h>
1.77 mrg 112:
1.117 fvdl 113: extern int dovfsusermount; /* 1 => permit any user to mount filesystems */
1.263 chs 114: extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */
1.117 fvdl 115:
1.309 ad 116: static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
117: static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
118: static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
119:
120: static int vrele_pending;
121: static kmutex_t vrele_lock;
122: static kcondvar_t vrele_cv;
123: static lwp_t *vrele_lwp;
1.113 fvdl 124:
1.309 ad 125: static pool_cache_t vnode_cache;
1.186 thorpej 126:
127: MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
1.93 thorpej 128:
1.89 kleink 129: /*
130: * Local declarations.
131: */
1.276 hannken 132:
1.309 ad 133: static void vrele_thread(void *);
134: static void insmntque(vnode_t *, struct mount *);
135: static int getdevvp(dev_t, vnode_t **, enum vtype);
136: static vnode_t *getcleanvnode(void);;
137: void vpanic(vnode_t *, const char *);
138:
139: #ifdef DIAGNOSTIC
140: void
141: vpanic(vnode_t *vp, const char *msg)
142: {
143:
144: vprint(NULL, vp);
145: panic("%s\n", msg);
146: }
147: #else
148: #define vpanic(vp, msg) /* nothing */
149: #endif
150:
151: void
152: vn_init1(void)
153: {
154:
155: vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
156: NULL, IPL_NONE, NULL, NULL, NULL);
157: KASSERT(vnode_cache != NULL);
158:
159: /* Create deferred release thread. */
160: mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
161: cv_init(&vrele_cv, "vrele");
162: if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
163: NULL, &vrele_lwp, "vrele"))
164: panic("fork vrele");
165: }
1.51 christos 166:
1.202 yamt 167: int
1.256 christos 168: vfs_drainvnodes(long target, struct lwp *l)
1.202 yamt 169: {
170:
171: while (numvnodes > target) {
1.309 ad 172: vnode_t *vp;
1.202 yamt 173:
1.309 ad 174: mutex_enter(&vnode_free_list_lock);
175: vp = getcleanvnode();
1.202 yamt 176: if (vp == NULL)
177: return EBUSY; /* give up */
1.309 ad 178: ungetnewvnode(vp);
1.202 yamt 179: }
180:
181: return 0;
182: }
183:
184: /*
185: * grab a vnode from freelist and clean it.
186: */
1.309 ad 187: vnode_t *
188: getcleanvnode(void)
1.202 yamt 189: {
1.309 ad 190: vnode_t *vp;
191: vnodelst_t *listhd;
1.202 yamt 192:
1.309 ad 193: KASSERT(mutex_owned(&vnode_free_list_lock));
1.229 yamt 194:
1.309 ad 195: retry:
1.229 yamt 196: listhd = &vnode_free_list;
197: try_nextlist:
198: TAILQ_FOREACH(vp, listhd, v_freelist) {
1.309 ad 199: /*
200: * It's safe to test v_usecount and v_iflag
201: * without holding the interlock here, since
202: * these vnodes should never appear on the
203: * lists.
204: */
205: if (vp->v_usecount != 0) {
206: vpanic(vp, "free vnode isn't");
207: }
208: if ((vp->v_iflag & VI_CLEAN) != 0) {
209: vpanic(vp, "clean vnode on freelist");
210: }
211: if (vp->v_freelisthd != listhd) {
212: printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
213: vpanic(vp, "list head mismatch");
214: }
215: if (!mutex_tryenter(&vp->v_interlock))
1.208 hannken 216: continue;
1.227 yamt 217: /*
1.309 ad 218: * Our lwp might hold the underlying vnode
219: * locked, so don't try to reclaim a VI_LAYER
220: * node if it's locked.
1.227 yamt 221: */
1.302 ad 222: if ((vp->v_iflag & VI_XLOCK) == 0 &&
223: ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285 hannken 224: break;
1.202 yamt 225: }
1.309 ad 226: mutex_exit(&vp->v_interlock);
1.202 yamt 227: }
228:
1.309 ad 229: if (vp == NULL) {
1.229 yamt 230: if (listhd == &vnode_free_list) {
231: listhd = &vnode_hold_list;
232: goto try_nextlist;
233: }
1.309 ad 234: mutex_exit(&vnode_free_list_lock);
235: return NULL;
1.202 yamt 236: }
237:
1.309 ad 238: /* Remove it from the freelist. */
1.202 yamt 239: TAILQ_REMOVE(listhd, vp, v_freelist);
1.309 ad 240: vp->v_freelisthd = NULL;
241: mutex_exit(&vnode_free_list_lock);
242:
243: /*
244: * The vnode is still associated with a file system, so we must
245: * clean it out before reusing it. We need to add a reference
246: * before doing this. If the vnode gains another reference while
247: * being cleaned out then we lose - retry.
248: */
249: vp->v_usecount++;
250: vclean(vp, DOCLOSE);
251: if (vp->v_usecount == 1) {
252: /* We're about to dirty it. */
253: vp->v_iflag &= ~VI_CLEAN;
254: mutex_exit(&vp->v_interlock);
1.318 ad 255: if (vp->v_type == VBLK || vp->v_type == VCHR) {
256: spec_node_destroy(vp);
257: }
258: vp->v_type = VNON;
1.309 ad 259: } else {
260: /*
261: * Don't return to freelist - the holder of the last
262: * reference will destroy it.
263: */
1.315 ad 264: KASSERT(vp->v_usecount > 1);
1.309 ad 265: vp->v_usecount--;
266: mutex_exit(&vp->v_interlock);
267: mutex_enter(&vnode_free_list_lock);
268: goto retry;
269: }
270:
271: if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
272: !TAILQ_EMPTY(&vp->v_uobj.memq)) {
273: vpanic(vp, "cleaned vnode isn't");
274: }
275: if (vp->v_numoutput != 0) {
276: vpanic(vp, "clean vnode has pending I/O's");
277: }
278: if ((vp->v_iflag & VI_ONWORKLST) != 0) {
279: vpanic(vp, "clean vnode on syncer list");
280: }
1.202 yamt 281:
282: return vp;
283: }
284:
1.29 cgd 285: /*
1.327 ad 286: * Mark a mount point as busy, and gain a new reference to it. Used to
1.336.2.1! yamt 287: * prevent the file system from being unmounted during critical sections.
1.327 ad 288: *
1.336.2.1! yamt 289: * => The caller must hold a pre-existing reference to the mount.
! 290: * => Will fail if the file system is being unmounted, or is unmounted.
1.327 ad 291: */
292: int
1.336.2.1! yamt 293: vfs_busy(struct mount *mp, struct mount **nextp)
1.327 ad 294: {
295:
296: KASSERT(mp->mnt_refcnt > 0);
297:
1.336.2.1! yamt 298: if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
! 299: if (nextp != NULL) {
! 300: KASSERT(mutex_owned(&mountlist_lock));
! 301: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
! 302: }
! 303: return EBUSY;
! 304: }
! 305: if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
! 306: rw_exit(&mp->mnt_unmounting);
! 307: if (nextp != NULL) {
! 308: KASSERT(mutex_owned(&mountlist_lock));
! 309: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
1.327 ad 310: }
311: return ENOENT;
312: }
1.336.2.1! yamt 313: if (nextp != NULL) {
! 314: mutex_exit(&mountlist_lock);
1.327 ad 315: }
316: atomic_inc_uint(&mp->mnt_refcnt);
317: return 0;
1.29 cgd 318: }
319:
320: /*
1.336.2.1! yamt 321: * Unbusy a busy filesystem.
! 322: *
! 323: * => If keepref is true, preserve reference added by vfs_busy().
! 324: * => If nextp != NULL, acquire mountlist_lock.
1.29 cgd 325: */
326: void
1.336.2.1! yamt 327: vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
1.29 cgd 328: {
329:
1.327 ad 330: KASSERT(mp->mnt_refcnt > 0);
331:
1.336.2.1! yamt 332: if (nextp != NULL) {
! 333: mutex_enter(&mountlist_lock);
1.327 ad 334: }
1.336.2.1! yamt 335: rw_exit(&mp->mnt_unmounting);
1.327 ad 336: if (!keepref) {
337: vfs_destroy(mp);
338: }
1.336.2.1! yamt 339: if (nextp != NULL) {
! 340: KASSERT(mutex_owned(&mountlist_lock));
! 341: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
! 342: }
1.29 cgd 343: }
344:
345: /*
1.80 fvdl 346: * Lookup a filesystem type, and if found allocate and initialize
347: * a mount structure for it.
348: *
349: * Devname is usually updated by mount(8) after booting.
1.29 cgd 350: */
1.50 christos 351: int
1.247 thorpej 352: vfs_rootmountalloc(const char *fstypename, const char *devname,
353: struct mount **mpp)
1.29 cgd 354: {
1.80 fvdl 355: struct vfsops *vfsp = NULL;
356: struct mount *mp;
1.29 cgd 357:
1.309 ad 358: mutex_enter(&vfs_list_lock);
1.152 jdolecek 359: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291 christos 360: if (!strncmp(vfsp->vfs_name, fstypename,
361: sizeof(mp->mnt_stat.f_fstypename)))
1.80 fvdl 362: break;
1.315 ad 363: if (vfsp == NULL) {
364: mutex_exit(&vfs_list_lock);
1.80 fvdl 365: return (ENODEV);
1.315 ad 366: }
1.309 ad 367: vfsp->vfs_refcount++;
368: mutex_exit(&vfs_list_lock);
369:
1.327 ad 370: mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
371: if (mp == NULL)
372: return ENOMEM;
373: mp->mnt_refcnt = 1;
1.336.2.1! yamt 374: rw_init(&mp->mnt_unmounting);
! 375: mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
1.331 skrll 376: mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
1.336.2.1! yamt 377: (void)vfs_busy(mp, NULL);
1.272 reinoud 378: TAILQ_INIT(&mp->mnt_vnodelist);
1.80 fvdl 379: mp->mnt_op = vfsp;
380: mp->mnt_flag = MNT_RDONLY;
1.309 ad 381: mp->mnt_vnodecovered = NULL;
1.291 christos 382: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
383: sizeof(mp->mnt_stat.f_fstypename));
1.80 fvdl 384: mp->mnt_stat.f_mntonname[0] = '/';
1.314 pooka 385: mp->mnt_stat.f_mntonname[1] = '\0';
1.291 christos 386: mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
387: '\0';
388: (void)copystr(devname, mp->mnt_stat.f_mntfromname,
389: sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.276 hannken 390: mount_initspecific(mp);
1.80 fvdl 391: *mpp = mp;
1.29 cgd 392: return (0);
393: }
394:
1.30 mycroft 395: /*
396: * Routines having to do with the management of the vnode table.
397: */
1.217 junyoung 398: extern int (**dead_vnodeop_p)(void *);
1.30 mycroft 399:
1.29 cgd 400: /*
401: * Return the next vnode from the free list.
402: */
1.50 christos 403: int
1.247 thorpej 404: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
1.309 ad 405: vnode_t **vpp)
1.29 cgd 406: {
1.142 chs 407: struct uvm_object *uobj;
1.113 fvdl 408: static int toggle;
1.309 ad 409: vnode_t *vp;
1.153 thorpej 410: int error = 0, tryalloc;
1.158 chs 411:
1.159 enami 412: try_again:
1.327 ad 413: if (mp != NULL) {
1.103 sommerfe 414: /*
1.327 ad 415: * Mark filesystem busy while we're creating a
416: * vnode. If unmount is in progress, this will
1.336.2.1! yamt 417: * fail.
1.103 sommerfe 418: */
1.336.2.1! yamt 419: error = vfs_busy(mp, NULL);
1.327 ad 420: if (error)
1.103 sommerfe 421: return error;
422: }
1.29 cgd 423:
1.113 fvdl 424: /*
425: * We must choose whether to allocate a new vnode or recycle an
426: * existing one. The criterion for allocating a new one is that
427: * the total number of vnodes is less than the number desired or
428: * there are no vnodes on either free list. Generally we only
429: * want to recycle vnodes that have no buffers associated with
430: * them, so we look first on the vnode_free_list. If it is empty,
431: * we next consider vnodes with referencing buffers on the
432: * vnode_hold_list. The toggle ensures that half the time we
433: * will use a buffer from the vnode_hold_list, and half the time
434: * we will allocate a new one unless the list has grown to twice
435: * the desired size. We are reticent to recycle vnodes from the
436: * vnode_hold_list because we will lose the identity of all its
437: * referencing buffers.
438: */
1.142 chs 439:
1.153 thorpej 440: vp = NULL;
441:
1.309 ad 442: mutex_enter(&vnode_free_list_lock);
1.153 thorpej 443:
1.113 fvdl 444: toggle ^= 1;
445: if (numvnodes > 2 * desiredvnodes)
446: toggle = 0;
447:
1.153 thorpej 448: tryalloc = numvnodes < desiredvnodes ||
1.159 enami 449: (TAILQ_FIRST(&vnode_free_list) == NULL &&
450: (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153 thorpej 451:
1.309 ad 452: if (tryalloc) {
1.206 yamt 453: numvnodes++;
1.309 ad 454: mutex_exit(&vnode_free_list_lock);
1.310 pooka 455: if ((vp = vnalloc(NULL)) == NULL) {
1.309 ad 456: mutex_enter(&vnode_free_list_lock);
457: numvnodes--;
458: } else
459: vp->v_usecount = 1;
460: }
461:
462: if (vp == NULL) {
463: vp = getcleanvnode();
464: if (vp == NULL) {
1.327 ad 465: if (mp != NULL) {
1.336.2.1! yamt 466: vfs_unbusy(mp, false, NULL);
1.327 ad 467: }
1.153 thorpej 468: if (tryalloc) {
469: printf("WARNING: unable to allocate new "
470: "vnode, retrying...\n");
471: (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
472: goto try_again;
473: }
1.132 jdolecek 474: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 475: *vpp = 0;
476: return (ENFILE);
477: }
1.302 ad 478: vp->v_iflag = 0;
479: vp->v_vflag = 0;
480: vp->v_uflag = 0;
1.158 chs 481: vp->v_socket = NULL;
1.29 cgd 482: }
1.309 ad 483:
484: KASSERT(vp->v_usecount == 1);
485: KASSERT(vp->v_freelisthd == NULL);
486: KASSERT(LIST_EMPTY(&vp->v_nclist));
487: KASSERT(LIST_EMPTY(&vp->v_dnclist));
488:
1.29 cgd 489: vp->v_type = VNON;
1.104 wrstuden 490: vp->v_vnlock = &vp->v_lock;
1.29 cgd 491: vp->v_tag = tag;
492: vp->v_op = vops;
493: insmntque(vp, mp);
1.30 mycroft 494: *vpp = vp;
495: vp->v_data = 0;
1.142 chs 496:
497: /*
498: * initialize uvm_object within vnode.
499: */
500:
1.158 chs 501: uobj = &vp->v_uobj;
502: KASSERT(uobj->pgops == &uvm_vnodeops);
503: KASSERT(uobj->uo_npages == 0);
504: KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288 yamt 505: vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142 chs 506:
1.309 ad 507: if (mp != NULL) {
508: if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
509: vp->v_vflag |= VV_MPSAFE;
1.336.2.1! yamt 510: vfs_unbusy(mp, true, NULL);
1.309 ad 511: }
512:
1.29 cgd 513: return (0);
1.130 fvdl 514: }
515:
516: /*
517: * This is really just the reverse of getnewvnode(). Needed for
518: * VFS_VGET functions who may need to push back a vnode in case
519: * of a locking race.
520: */
521: void
1.309 ad 522: ungetnewvnode(vnode_t *vp)
523: {
524:
525: KASSERT(vp->v_usecount == 1);
526: KASSERT(vp->v_data == NULL);
527: KASSERT(vp->v_freelisthd == NULL);
528:
529: mutex_enter(&vp->v_interlock);
530: vp->v_iflag |= VI_CLEAN;
1.324 pooka 531: vrelel(vp, 0);
1.309 ad 532: }
533:
534: /*
535: * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
536: * marker vnode and we are prepared to wait for the allocation.
537: */
538: vnode_t *
1.310 pooka 539: vnalloc(struct mount *mp)
1.130 fvdl 540: {
1.309 ad 541: vnode_t *vp;
542:
543: vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
544: if (vp == NULL) {
545: return NULL;
546: }
547:
548: memset(vp, 0, sizeof(*vp));
549: UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
550: cv_init(&vp->v_cv, "vnode");
551: /*
552: * done by memset() above.
553: * LIST_INIT(&vp->v_nclist);
554: * LIST_INIT(&vp->v_dnclist);
555: */
556:
557: if (mp != NULL) {
558: vp->v_mount = mp;
559: vp->v_type = VBAD;
560: vp->v_iflag = VI_MARKER;
561: } else {
1.326 ad 562: rw_init(&vp->v_lock.vl_lock);
1.309 ad 563: }
564:
565: return vp;
566: }
567:
568: /*
569: * Free an unused, unreferenced vnode.
570: */
571: void
1.310 pooka 572: vnfree(vnode_t *vp)
1.309 ad 573: {
574:
575: KASSERT(vp->v_usecount == 0);
576:
577: if ((vp->v_iflag & VI_MARKER) == 0) {
1.326 ad 578: rw_destroy(&vp->v_lock.vl_lock);
1.309 ad 579: mutex_enter(&vnode_free_list_lock);
580: numvnodes--;
581: mutex_exit(&vnode_free_list_lock);
582: }
583:
584: UVM_OBJ_DESTROY(&vp->v_uobj);
585: cv_destroy(&vp->v_cv);
586: pool_cache_put(vnode_cache, vp);
587: }
588:
589: /*
590: * Remove a vnode from its freelist.
591: */
592: static inline void
593: vremfree(vnode_t *vp)
594: {
595:
596: KASSERT(mutex_owned(&vp->v_interlock));
597: KASSERT(vp->v_usecount == 0);
1.130 fvdl 598:
1.217 junyoung 599: /*
1.309 ad 600: * Note that the reference count must not change until
601: * the vnode is removed.
1.130 fvdl 602: */
1.309 ad 603: mutex_enter(&vnode_free_list_lock);
604: if (vp->v_holdcnt > 0) {
605: KASSERT(vp->v_freelisthd == &vnode_hold_list);
606: } else {
607: KASSERT(vp->v_freelisthd == &vnode_free_list);
608: }
609: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
610: vp->v_freelisthd = NULL;
611: mutex_exit(&vnode_free_list_lock);
1.29 cgd 612: }
613:
614: /*
615: * Move a vnode from one mount queue to another.
616: */
1.260 yamt 617: static void
1.309 ad 618: insmntque(vnode_t *vp, struct mount *mp)
1.29 cgd 619: {
1.327 ad 620: struct mount *omp;
1.29 cgd 621:
1.103 sommerfe 622: #ifdef DIAGNOSTIC
623: if ((mp != NULL) &&
1.207 dbj 624: (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113 fvdl 625: !(mp->mnt_flag & MNT_SOFTDEP) &&
626: vp->v_tag != VT_VFS) {
1.103 sommerfe 627: panic("insmntque into dying filesystem");
628: }
629: #endif
1.217 junyoung 630:
1.309 ad 631: mutex_enter(&mntvnode_lock);
1.29 cgd 632: /*
633: * Delete from old mount point vnode list, if on one.
634: */
1.327 ad 635: if ((omp = vp->v_mount) != NULL)
1.272 reinoud 636: TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29 cgd 637: /*
1.327 ad 638: * Insert into list of vnodes for the new mount point, if
639: * available. The caller must take a reference on the mount
640: * structure and donate to the vnode.
1.29 cgd 641: */
1.279 pooka 642: if ((vp->v_mount = mp) != NULL)
643: TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.309 ad 644: mutex_exit(&mntvnode_lock);
1.327 ad 645:
646: if (omp != NULL) {
647: /* Release reference to old mount. */
648: vfs_destroy(omp);
649: }
1.29 cgd 650: }
651:
652: /*
653: * Create a vnode for a block device.
1.59 thorpej 654: * Used for root filesystem and swap areas.
1.29 cgd 655: * Also used for memory file system special devices.
656: */
1.50 christos 657: int
1.309 ad 658: bdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 659: {
1.30 mycroft 660:
661: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 662: }
663:
664: /*
665: * Create a vnode for a character device.
666: * Used for kernfs and some console handling.
667: */
1.50 christos 668: int
1.309 ad 669: cdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 670: {
1.30 mycroft 671:
672: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 673: }
674:
675: /*
676: * Create a vnode for a device.
677: * Used by bdevvp (block device) for root file system etc.,
678: * and by cdevvp (character device) for console and kernfs.
679: */
1.260 yamt 680: static int
1.309 ad 681: getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
1.29 cgd 682: {
1.309 ad 683: vnode_t *vp;
684: vnode_t *nvp;
1.29 cgd 685: int error;
686:
1.80 fvdl 687: if (dev == NODEV) {
1.302 ad 688: *vpp = NULL;
1.29 cgd 689: return (0);
1.80 fvdl 690: }
1.50 christos 691: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 692: if (error) {
1.302 ad 693: *vpp = NULL;
1.29 cgd 694: return (error);
695: }
696: vp = nvp;
697: vp->v_type = type;
1.309 ad 698: vp->v_vflag |= VV_MPSAFE;
1.297 pooka 699: uvm_vnp_setsize(vp, 0);
1.318 ad 700: spec_node_init(vp, dev);
1.29 cgd 701: *vpp = vp;
702: return (0);
703: }
704:
705: /*
706: * Grab a particular vnode from the free list, increment its
1.83 fvdl 707: * reference count and lock it. If the vnode lock bit is set the
708: * vnode is being eliminated in vgone. In that case, we can not
709: * grab the vnode, so the process is awakened when the transition is
710: * completed, and an error returned to indicate that the vnode is no
711: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 712: */
1.30 mycroft 713: int
1.309 ad 714: vget(vnode_t *vp, int flags)
1.29 cgd 715: {
1.175 perseant 716: int error;
1.29 cgd 717:
1.309 ad 718: KASSERT((vp->v_iflag & VI_MARKER) == 0);
719:
720: if ((flags & LK_INTERLOCK) == 0)
721: mutex_enter(&vp->v_interlock);
722:
723: /*
724: * Before adding a reference, we must remove the vnode
725: * from its freelist.
726: */
727: if (vp->v_usecount == 0) {
728: vremfree(vp);
729: }
730: if (++vp->v_usecount == 0) {
731: vpanic(vp, "vget: usecount overflow");
732: }
733:
1.30 mycroft 734: /*
735: * If the vnode is in the process of being cleaned out for
736: * another use, we wait for the cleaning to finish and then
1.312 ad 737: * return failure. Cleaning is determined by checking if
738: * the VI_XLOCK or VI_FREEING flags are set.
1.80 fvdl 739: */
1.312 ad 740: if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
1.313 ad 741: if ((flags & LK_NOWAIT) != 0) {
1.324 pooka 742: vrelel(vp, 0);
1.142 chs 743: return EBUSY;
744: }
1.312 ad 745: vwait(vp, VI_XLOCK | VI_FREEING);
1.324 pooka 746: vrelel(vp, 0);
1.313 ad 747: return ENOENT;
1.29 cgd 748: }
1.80 fvdl 749: if (flags & LK_TYPE_MASK) {
1.313 ad 750: error = vn_lock(vp, flags | LK_INTERLOCK);
751: if (error != 0) {
1.257 yamt 752: vrele(vp);
1.113 fvdl 753: }
1.313 ad 754: return error;
1.80 fvdl 755: }
1.309 ad 756: mutex_exit(&vp->v_interlock);
1.313 ad 757: return 0;
1.29 cgd 758: }
759:
760: /*
761: * vput(), just unlock and vrele()
762: */
763: void
1.309 ad 764: vput(vnode_t *vp)
1.29 cgd 765: {
1.30 mycroft 766:
1.309 ad 767: KASSERT((vp->v_iflag & VI_MARKER) == 0);
768:
769: VOP_UNLOCK(vp, 0);
770: vrele(vp);
1.29 cgd 771: }
772:
773: /*
1.309 ad 774: * Vnode release. If reference count drops to zero, call inactive
775: * routine and either return to freelist or free to the pool.
1.29 cgd 776: */
1.309 ad 777: void
1.324 pooka 778: vrelel(vnode_t *vp, int flags)
1.29 cgd 779: {
1.309 ad 780: bool recycle, defer;
781: int error;
782:
783: KASSERT(mutex_owned(&vp->v_interlock));
784: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.315 ad 785: KASSERT(vp->v_freelisthd == NULL);
1.29 cgd 786:
1.309 ad 787: if (vp->v_op == dead_vnodeop_p && (vp->v_iflag & VI_CLEAN) == 0) {
788: vpanic(vp, "dead but not clean");
789: }
790:
791: /*
792: * If not the last reference, just drop the reference count
793: * and unlock.
794: */
795: if (vp->v_usecount > 1) {
796: vp->v_usecount--;
797: vp->v_iflag |= VI_INACTREDO;
798: mutex_exit(&vp->v_interlock);
1.29 cgd 799: return;
1.80 fvdl 800: }
1.309 ad 801: if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
802: vpanic(vp, "vput: bad ref count");
1.29 cgd 803: }
1.309 ad 804:
1.30 mycroft 805: /*
1.309 ad 806: * If not clean, deactivate the vnode, but preserve
807: * our reference across the call to VOP_INACTIVE().
1.30 mycroft 808: */
1.309 ad 809: retry:
810: if ((vp->v_iflag & VI_CLEAN) == 0) {
811: recycle = false;
812: /*
813: * XXX This ugly block can be largely eliminated if
814: * locking is pushed down into the file systems.
815: */
816: if (curlwp == uvm.pagedaemon_lwp) {
817: /* The pagedaemon can't wait around; defer. */
818: defer = true;
819: } else if (curlwp == vrele_lwp) {
820: /* We have to try harder. */
821: vp->v_iflag &= ~VI_INACTREDO;
822: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
823: LK_RETRY);
824: if (error != 0) {
825: /* XXX */
826: vpanic(vp, "vrele: unable to lock %p");
827: }
828: defer = false;
829: } else if ((vp->v_iflag & VI_LAYER) != 0) {
830: /*
831: * Acquiring the stack's lock in vclean() even
832: * for an honest vput/vrele is dangerous because
833: * our caller may hold other vnode locks; defer.
834: */
835: defer = true;
836: } else {
837: /* If we can't acquire the lock, then defer. */
838: vp->v_iflag &= ~VI_INACTREDO;
839: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
840: LK_NOWAIT);
841: if (error != 0) {
842: defer = true;
843: mutex_enter(&vp->v_interlock);
844: } else {
845: defer = false;
846: }
847: }
848:
849: if (defer) {
850: /*
851: * Defer reclaim to the kthread; it's not safe to
852: * clean it here. We donate it our last reference.
853: */
854: KASSERT(mutex_owned(&vp->v_interlock));
855: KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
856: vp->v_iflag |= VI_INACTPEND;
857: mutex_enter(&vrele_lock);
858: TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
859: if (++vrele_pending > (desiredvnodes >> 8))
860: cv_signal(&vrele_cv);
861: mutex_exit(&vrele_lock);
862: mutex_exit(&vp->v_interlock);
863: return;
864: }
865:
1.318 ad 866: #ifdef DIAGNOSTIC
1.321 ad 867: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
868: vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1.318 ad 869: vprint("vrelel: missing VOP_CLOSE()", vp);
870: }
871: #endif
872:
1.309 ad 873: /*
1.312 ad 874: * The vnode can gain another reference while being
875: * deactivated. If VOP_INACTIVE() indicates that
876: * the described file has been deleted, then recycle
877: * the vnode irrespective of additional references.
878: * Another thread may be waiting to re-use the on-disk
879: * inode.
880: *
881: * Note that VOP_INACTIVE() will drop the vnode lock.
1.309 ad 882: */
883: VOP_INACTIVE(vp, &recycle);
884: mutex_enter(&vp->v_interlock);
1.312 ad 885: if (!recycle) {
886: if (vp->v_usecount > 1) {
887: vp->v_usecount--;
888: mutex_exit(&vp->v_interlock);
889: return;
890: }
1.309 ad 891:
1.312 ad 892: /*
893: * If we grew another reference while
894: * VOP_INACTIVE() was underway, retry.
895: */
896: if ((vp->v_iflag & VI_INACTREDO) != 0) {
897: goto retry;
898: }
1.309 ad 899: }
900:
901: /* Take care of space accounting. */
902: if (vp->v_iflag & VI_EXECMAP) {
903: atomic_add_int(&uvmexp.execpages,
904: -vp->v_uobj.uo_npages);
905: atomic_add_int(&uvmexp.filepages,
906: vp->v_uobj.uo_npages);
907: }
908: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED);
909: vp->v_vflag &= ~VV_MAPPED;
910:
911: /*
912: * Recycle the vnode if the file is now unused (unlinked),
913: * otherwise just free it.
914: */
915: if (recycle) {
916: vclean(vp, DOCLOSE);
917: }
918: KASSERT(vp->v_usecount > 0);
1.298 pooka 919: }
1.309 ad 920:
921: if (--vp->v_usecount != 0) {
922: /* Gained another reference while being reclaimed. */
923: mutex_exit(&vp->v_interlock);
924: return;
1.147 chs 925: }
1.298 pooka 926:
1.309 ad 927: if ((vp->v_iflag & VI_CLEAN) != 0) {
928: /*
929: * It's clean so destroy it. It isn't referenced
930: * anywhere since it has been reclaimed.
931: */
932: KASSERT(vp->v_holdcnt == 0);
933: KASSERT(vp->v_writecount == 0);
934: mutex_exit(&vp->v_interlock);
935: insmntque(vp, NULL);
1.318 ad 936: if (vp->v_type == VBLK || vp->v_type == VCHR) {
937: spec_node_destroy(vp);
938: }
1.310 pooka 939: vnfree(vp);
1.298 pooka 940: } else {
1.309 ad 941: /*
942: * Otherwise, put it back onto the freelist. It
943: * can't be destroyed while still associated with
944: * a file system.
945: */
946: mutex_enter(&vnode_free_list_lock);
947: if (vp->v_holdcnt > 0) {
948: vp->v_freelisthd = &vnode_hold_list;
949: } else {
950: vp->v_freelisthd = &vnode_free_list;
951: }
952: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
953: mutex_exit(&vnode_free_list_lock);
954: mutex_exit(&vp->v_interlock);
1.298 pooka 955: }
956: }
957:
958: void
1.309 ad 959: vrele(vnode_t *vp)
1.298 pooka 960: {
961:
1.309 ad 962: KASSERT((vp->v_iflag & VI_MARKER) == 0);
963:
964: mutex_enter(&vp->v_interlock);
1.324 pooka 965: vrelel(vp, 0);
1.298 pooka 966: }
967:
1.309 ad 968: static void
969: vrele_thread(void *cookie)
1.298 pooka 970: {
1.309 ad 971: vnode_t *vp;
1.298 pooka 972:
1.309 ad 973: for (;;) {
974: mutex_enter(&vrele_lock);
975: while (TAILQ_EMPTY(&vrele_list)) {
976: cv_timedwait(&vrele_cv, &vrele_lock, hz);
977: }
978: vp = TAILQ_FIRST(&vrele_list);
979: TAILQ_REMOVE(&vrele_list, vp, v_freelist);
980: vrele_pending--;
981: mutex_exit(&vrele_lock);
982:
983: /*
984: * If not the last reference, then ignore the vnode
985: * and look for more work.
986: */
987: mutex_enter(&vp->v_interlock);
988: KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
989: vp->v_iflag &= ~VI_INACTPEND;
990: if (vp->v_usecount > 1) {
991: vp->v_usecount--;
992: mutex_exit(&vp->v_interlock);
993: continue;
994: }
1.324 pooka 995: vrelel(vp, 0);
1.309 ad 996: }
1.29 cgd 997: }
998:
999: /*
1000: * Page or buffer structure gets a reference.
1.258 chs 1001: * Called with v_interlock held.
1.29 cgd 1002: */
1.30 mycroft 1003: void
1.309 ad 1004: vholdl(vnode_t *vp)
1.29 cgd 1005: {
1006:
1.309 ad 1007: KASSERT(mutex_owned(&vp->v_interlock));
1008: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1009:
1010: if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
1011: mutex_enter(&vnode_free_list_lock);
1012: KASSERT(vp->v_freelisthd == &vnode_free_list);
1013: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1014: vp->v_freelisthd = &vnode_hold_list;
1015: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1016: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1017: }
1.29 cgd 1018: }
1019:
1020: /*
1021: * Page or buffer structure frees a reference.
1.258 chs 1022: * Called with v_interlock held.
1.29 cgd 1023: */
1.30 mycroft 1024: void
1.309 ad 1025: holdrelel(vnode_t *vp)
1.29 cgd 1026: {
1027:
1.309 ad 1028: KASSERT(mutex_owned(&vp->v_interlock));
1029: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.142 chs 1030:
1.309 ad 1031: if (vp->v_holdcnt <= 0) {
1032: vpanic(vp, "holdrelel: holdcnt vp %p");
1033: }
1.142 chs 1034:
1.309 ad 1035: vp->v_holdcnt--;
1036: if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1037: mutex_enter(&vnode_free_list_lock);
1038: KASSERT(vp->v_freelisthd == &vnode_hold_list);
1039: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1040: vp->v_freelisthd = &vnode_free_list;
1041: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1042: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1043: }
1.81 ross 1044: }
1045:
1046: /*
1.309 ad 1047: * Vnode reference, where a reference is already held by some other
1048: * object (for example, a file structure).
1.81 ross 1049: */
1050: void
1.309 ad 1051: vref(vnode_t *vp)
1.81 ross 1052: {
1053:
1.309 ad 1054: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1055:
1056: mutex_enter(&vp->v_interlock);
1057: if (vp->v_usecount <= 0) {
1058: vpanic(vp, "vref used where vget required");
1059: }
1060: if (++vp->v_usecount == 0) {
1061: vpanic(vp, "vref: usecount overflow");
1.112 mycroft 1062: }
1.309 ad 1063: mutex_exit(&vp->v_interlock);
1.29 cgd 1064: }
1065:
1066: /*
1067: * Remove any vnodes in the vnode table belonging to mount point mp.
1068: *
1.183 yamt 1069: * If FORCECLOSE is not specified, there should not be any active ones,
1.29 cgd 1070: * return error if any are found (nb: this is a user error, not a
1.183 yamt 1071: * system error). If FORCECLOSE is specified, detach any active vnodes
1.29 cgd 1072: * that are found.
1.183 yamt 1073: *
1074: * If WRITECLOSE is set, only flush out regular file vnodes open for
1075: * writing.
1076: *
1077: * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29 cgd 1078: */
1.30 mycroft 1079: #ifdef DEBUG
1080: int busyprt = 0; /* print out busy vnodes */
1081: struct ctldebug debug1 = { "busyprt", &busyprt };
1082: #endif
1.29 cgd 1083:
1.334 ad 1084: static vnode_t *
1085: vflushnext(vnode_t *mvp, int *when)
1086: {
1087:
1088: if (hardclock_ticks > *when) {
1089: mutex_exit(&mntvnode_lock);
1090: yield();
1091: mutex_enter(&mntvnode_lock);
1092: *when = hardclock_ticks + hz / 10;
1093: }
1094:
1095: return vunmark(mvp);
1096: }
1097:
1.50 christos 1098: int
1.309 ad 1099: vflush(struct mount *mp, vnode_t *skipvp, int flags)
1.29 cgd 1100: {
1.309 ad 1101: vnode_t *vp, *mvp;
1.334 ad 1102: int busy = 0, when = 0;
1.29 cgd 1103:
1.309 ad 1104: /* Allocate a marker vnode. */
1.310 pooka 1105: if ((mvp = vnalloc(mp)) == NULL)
1.309 ad 1106: return (ENOMEM);
1107:
1108: mutex_enter(&mntvnode_lock);
1.273 reinoud 1109: /*
1110: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1111: * and vclean() are called
1112: */
1.334 ad 1113: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
1114: vp = vflushnext(mvp, &when)) {
1.309 ad 1115: vmark(mvp, vp);
1116: if (vp->v_mount != mp || vismarker(vp))
1117: continue;
1.29 cgd 1118: /*
1119: * Skip over a selected vnode.
1120: */
1121: if (vp == skipvp)
1122: continue;
1.309 ad 1123: mutex_enter(&vp->v_interlock);
1.29 cgd 1124: /*
1.315 ad 1125: * Ignore clean but still referenced vnodes.
1126: */
1127: if ((vp->v_iflag & VI_CLEAN) != 0) {
1128: mutex_exit(&vp->v_interlock);
1129: continue;
1130: }
1131: /*
1.309 ad 1132: * Skip over a vnodes marked VSYSTEM.
1.29 cgd 1133: */
1.302 ad 1134: if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1.309 ad 1135: mutex_exit(&vp->v_interlock);
1.29 cgd 1136: continue;
1.80 fvdl 1137: }
1.29 cgd 1138: /*
1.30 mycroft 1139: * If WRITECLOSE is set, only flush out regular file
1140: * vnodes open for writing.
1141: */
1142: if ((flags & WRITECLOSE) &&
1.92 thorpej 1143: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1.309 ad 1144: mutex_exit(&vp->v_interlock);
1.30 mycroft 1145: continue;
1.92 thorpej 1146: }
1.30 mycroft 1147: /*
1.29 cgd 1148: * With v_usecount == 0, all we need to do is clear
1149: * out the vnode data structures and we are done.
1150: */
1151: if (vp->v_usecount == 0) {
1.309 ad 1152: mutex_exit(&mntvnode_lock);
1153: vremfree(vp);
1154: vp->v_usecount++;
1155: vclean(vp, DOCLOSE);
1.324 pooka 1156: vrelel(vp, 0);
1.309 ad 1157: mutex_enter(&mntvnode_lock);
1.29 cgd 1158: continue;
1159: }
1160: /*
1.30 mycroft 1161: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 1162: * For block or character devices, revert to an
1.318 ad 1163: * anonymous device. For all other files, just
1164: * kill them.
1.29 cgd 1165: */
1166: if (flags & FORCECLOSE) {
1.309 ad 1167: mutex_exit(&mntvnode_lock);
1168: vp->v_usecount++;
1.29 cgd 1169: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.309 ad 1170: vclean(vp, DOCLOSE);
1.324 pooka 1171: vrelel(vp, 0);
1.29 cgd 1172: } else {
1.309 ad 1173: vclean(vp, 0);
1.318 ad 1174: vp->v_op = spec_vnodeop_p; /* XXXSMP */
1.320 ad 1175: mutex_exit(&vp->v_interlock);
1176: /*
1177: * The vnode isn't clean, but still resides
1178: * on the mount list. Remove it. XXX This
1179: * is a bit dodgy.
1180: */
1181: insmntque(vp, NULL);
1182: vrele(vp);
1.29 cgd 1183: }
1.309 ad 1184: mutex_enter(&mntvnode_lock);
1.29 cgd 1185: continue;
1186: }
1.30 mycroft 1187: #ifdef DEBUG
1.29 cgd 1188: if (busyprt)
1189: vprint("vflush: busy vnode", vp);
1.30 mycroft 1190: #endif
1.309 ad 1191: mutex_exit(&vp->v_interlock);
1.29 cgd 1192: busy++;
1193: }
1.309 ad 1194: mutex_exit(&mntvnode_lock);
1.310 pooka 1195: vnfree(mvp);
1.29 cgd 1196: if (busy)
1197: return (EBUSY);
1198: return (0);
1199: }
1200:
1201: /*
1202: * Disassociate the underlying file system from a vnode.
1.309 ad 1203: *
1204: * Must be called with the interlock held, and will return with it held.
1.29 cgd 1205: */
1.309 ad 1206: void
1207: vclean(vnode_t *vp, int flags)
1.29 cgd 1208: {
1.309 ad 1209: lwp_t *l = curlwp;
1210: bool recycle, active;
1.318 ad 1211: int error;
1.29 cgd 1212:
1.309 ad 1213: KASSERT(mutex_owned(&vp->v_interlock));
1214: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1215: KASSERT(vp->v_usecount != 0);
1.166 chs 1216:
1.309 ad 1217: /* If cleaning is already in progress wait until done and return. */
1218: if (vp->v_iflag & VI_XLOCK) {
1219: vwait(vp, VI_XLOCK);
1220: return;
1221: }
1.166 chs 1222:
1.309 ad 1223: /* If already clean, nothing to do. */
1224: if ((vp->v_iflag & VI_CLEAN) != 0) {
1225: return;
1.112 mycroft 1226: }
1.87 pk 1227:
1.29 cgd 1228: /*
1.309 ad 1229: * Prevent the vnode from being recycled or brought into use
1230: * while we clean it out.
1.29 cgd 1231: */
1.302 ad 1232: vp->v_iflag |= VI_XLOCK;
1233: if (vp->v_iflag & VI_EXECMAP) {
1.307 ad 1234: atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1235: atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1.147 chs 1236: }
1.302 ad 1237: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1.309 ad 1238: active = (vp->v_usecount > 1);
1.142 chs 1239:
1.309 ad 1240: /* XXXAD should not lock vnode under layer */
1241: VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1.80 fvdl 1242:
1.98 wrstuden 1243: /*
1.142 chs 1244: * Clean out any cached data associated with the vnode.
1.318 ad 1245: * If purging an active vnode, it must be closed and
1246: * deactivated before being reclaimed. Note that the
1247: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1248: */
1.166 chs 1249: if (flags & DOCLOSE) {
1.256 christos 1250: error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.318 ad 1251: if (error != 0)
1.256 christos 1252: error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.211 dbj 1253: KASSERT(error == 0);
1.302 ad 1254: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.318 ad 1255: if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1256: spec_node_revoke(vp);
1.231 mycroft 1257: }
1.166 chs 1258: }
1.29 cgd 1259: if (active) {
1.309 ad 1260: VOP_INACTIVE(vp, &recycle);
1.80 fvdl 1261: } else {
1262: /*
1263: * Any other processes trying to obtain this lock must first
1.302 ad 1264: * wait for VI_XLOCK to clear, then call the new lock operation.
1.80 fvdl 1265: */
1266: VOP_UNLOCK(vp, 0);
1.29 cgd 1267: }
1.142 chs 1268:
1.309 ad 1269: /* Disassociate the underlying file system from the vnode. */
1270: if (VOP_RECLAIM(vp)) {
1271: vpanic(vp, "vclean: cannot reclaim");
1.87 pk 1272: }
1.30 mycroft 1273:
1.169 chs 1274: KASSERT(vp->v_uobj.uo_npages == 0);
1.255 yamt 1275: if (vp->v_type == VREG && vp->v_ractx != NULL) {
1276: uvm_ra_freectx(vp->v_ractx);
1277: vp->v_ractx = NULL;
1278: }
1.80 fvdl 1279: cache_purge(vp);
1280:
1.309 ad 1281: /* Done with purge, notify sleepers of the grim news. */
1.30 mycroft 1282: vp->v_op = dead_vnodeop_p;
1283: vp->v_tag = VT_NON;
1.309 ad 1284: mutex_enter(&vp->v_interlock);
1285: vp->v_vnlock = &vp->v_lock;
1.332 ad 1286: KNOTE(&vp->v_klist, NOTE_REVOKE);
1.312 ad 1287: vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1.304 ad 1288: vp->v_vflag &= ~VV_LOCKSWORK;
1.319 ad 1289: if ((flags & DOCLOSE) != 0) {
1.318 ad 1290: vp->v_iflag |= VI_CLEAN;
1291: }
1.309 ad 1292: cv_broadcast(&vp->v_cv);
1293:
1294: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.29 cgd 1295: }
1296:
1297: /*
1.80 fvdl 1298: * Recycle an unused vnode to the front of the free list.
1299: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1300: */
1.80 fvdl 1301: int
1.309 ad 1302: vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1.217 junyoung 1303: {
1304:
1.309 ad 1305: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1306:
1307: mutex_enter(&vp->v_interlock);
1308: if (vp->v_usecount != 0) {
1309: mutex_exit(&vp->v_interlock);
1310: return (0);
1.29 cgd 1311: }
1.309 ad 1312: if (inter_lkp)
1313: mutex_exit(inter_lkp);
1314: vremfree(vp);
1315: vp->v_usecount++;
1316: vclean(vp, DOCLOSE);
1.324 pooka 1317: vrelel(vp, 0);
1.309 ad 1318: return (1);
1.29 cgd 1319: }
1320:
1321: /*
1.309 ad 1322: * Eliminate all activity associated with a vnode in preparation for
1323: * reuse. Drops a reference from the vnode.
1.29 cgd 1324: */
1325: void
1.309 ad 1326: vgone(vnode_t *vp)
1.80 fvdl 1327: {
1.166 chs 1328:
1.309 ad 1329: mutex_enter(&vp->v_interlock);
1330: vclean(vp, DOCLOSE);
1.324 pooka 1331: vrelel(vp, 0);
1.29 cgd 1332: }
1333:
1334: /*
1335: * Lookup a vnode by device number.
1336: */
1.50 christos 1337: int
1.309 ad 1338: vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1.29 cgd 1339: {
1.309 ad 1340: vnode_t *vp;
1.80 fvdl 1341: int rc = 0;
1.29 cgd 1342:
1.318 ad 1343: mutex_enter(&specfs_lock);
1344: for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1.29 cgd 1345: if (dev != vp->v_rdev || type != vp->v_type)
1346: continue;
1347: *vpp = vp;
1.80 fvdl 1348: rc = 1;
1349: break;
1.29 cgd 1350: }
1.318 ad 1351: mutex_exit(&specfs_lock);
1.80 fvdl 1352: return (rc);
1.96 thorpej 1353: }
1354:
1355: /*
1356: * Revoke all the vnodes corresponding to the specified minor number
1357: * range (endpoints inclusive) of the specified major.
1358: */
1359: void
1.247 thorpej 1360: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 1361: {
1.316 ad 1362: vnode_t *vp, **vpp;
1363: dev_t dev;
1.96 thorpej 1364: int mn;
1365:
1.274 mrg 1366: vp = NULL; /* XXX gcc */
1367:
1.318 ad 1368: mutex_enter(&specfs_lock);
1.316 ad 1369: for (mn = minl; mn <= minh; mn++) {
1370: dev = makedev(maj, mn);
1.318 ad 1371: vpp = &specfs_hash[SPECHASH(dev)];
1.316 ad 1372: for (vp = *vpp; vp != NULL;) {
1373: mutex_enter(&vp->v_interlock);
1374: if ((vp->v_iflag & VI_CLEAN) != 0 ||
1375: dev != vp->v_rdev || type != vp->v_type) {
1376: mutex_exit(&vp->v_interlock);
1377: vp = vp->v_specnext;
1378: continue;
1379: }
1.318 ad 1380: mutex_exit(&specfs_lock);
1.316 ad 1381: if (vget(vp, LK_INTERLOCK) == 0) {
1382: VOP_REVOKE(vp, REVOKEALL);
1383: vrele(vp);
1384: }
1.318 ad 1385: mutex_enter(&specfs_lock);
1.316 ad 1386: vp = *vpp;
1387: }
1388: }
1.318 ad 1389: mutex_exit(&specfs_lock);
1.29 cgd 1390: }
1391:
1392: /*
1393: * Calculate the total number of references to a special device.
1394: */
1.30 mycroft 1395: int
1.309 ad 1396: vcount(vnode_t *vp)
1.29 cgd 1397: {
1398: int count;
1399:
1.318 ad 1400: mutex_enter(&specfs_lock);
1.309 ad 1401: mutex_enter(&vp->v_interlock);
1.318 ad 1402: if (vp->v_specnode == NULL) {
1.309 ad 1403: count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
1404: mutex_exit(&vp->v_interlock);
1.318 ad 1405: mutex_exit(&specfs_lock);
1.309 ad 1406: return (count);
1407: }
1408: mutex_exit(&vp->v_interlock);
1.318 ad 1409: count = vp->v_specnode->sn_dev->sd_opencnt;
1410: mutex_exit(&specfs_lock);
1.29 cgd 1411: return (count);
1412: }
1413:
1.101 mrg 1414: /*
1.316 ad 1415: * Eliminate all activity associated with the requested vnode
1416: * and with all vnodes aliased to the requested vnode.
1417: */
1418: void
1419: vrevoke(vnode_t *vp)
1420: {
1421: vnode_t *vq, **vpp;
1422: enum vtype type;
1423: dev_t dev;
1424:
1425: KASSERT(vp->v_usecount > 0);
1426:
1427: mutex_enter(&vp->v_interlock);
1428: if ((vp->v_iflag & VI_CLEAN) != 0) {
1429: mutex_exit(&vp->v_interlock);
1430: return;
1431: } else {
1432: dev = vp->v_rdev;
1433: type = vp->v_type;
1434: mutex_exit(&vp->v_interlock);
1435: }
1436:
1.318 ad 1437: vpp = &specfs_hash[SPECHASH(dev)];
1438: mutex_enter(&specfs_lock);
1.316 ad 1439: for (vq = *vpp; vq != NULL;) {
1.333 ad 1440: /* If clean or being cleaned, then ignore it. */
1441: mutex_enter(&vq->v_interlock);
1442: if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
1.317 ad 1443: vq->v_rdev != dev || vq->v_type != type) {
1.333 ad 1444: mutex_exit(&vq->v_interlock);
1.316 ad 1445: vq = vq->v_specnext;
1446: continue;
1447: }
1.318 ad 1448: mutex_exit(&specfs_lock);
1449: if (vq->v_usecount == 0) {
1.317 ad 1450: vremfree(vq);
1.316 ad 1451: }
1.318 ad 1452: vq->v_usecount++;
1.316 ad 1453: vclean(vq, DOCLOSE);
1.324 pooka 1454: vrelel(vq, 0);
1.318 ad 1455: mutex_enter(&specfs_lock);
1.316 ad 1456: vq = *vpp;
1457: }
1.318 ad 1458: mutex_exit(&specfs_lock);
1.316 ad 1459: }
1460:
1461: /*
1.220 lukem 1462: * sysctl helper routine to return list of supported fstypes
1463: */
1464: static int
1465: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
1466: {
1.291 christos 1467: char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220 lukem 1468: char *where = oldp;
1469: struct vfsops *v;
1470: size_t needed, left, slen;
1471: int error, first;
1472:
1473: if (newp != NULL)
1474: return (EPERM);
1475: if (namelen != 0)
1476: return (EINVAL);
1477:
1478: first = 1;
1479: error = 0;
1480: needed = 0;
1481: left = *oldlenp;
1482:
1.311 ad 1483: sysctl_unlock();
1.302 ad 1484: mutex_enter(&vfs_list_lock);
1.220 lukem 1485: LIST_FOREACH(v, &vfs_list, vfs_list) {
1486: if (where == NULL)
1487: needed += strlen(v->vfs_name) + 1;
1488: else {
1.245 christos 1489: memset(bf, 0, sizeof(bf));
1.220 lukem 1490: if (first) {
1.245 christos 1491: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 1492: first = 0;
1493: } else {
1.245 christos 1494: bf[0] = ' ';
1495: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 1496: }
1.245 christos 1497: bf[sizeof(bf)-1] = '\0';
1498: slen = strlen(bf);
1.220 lukem 1499: if (left < slen + 1)
1500: break;
1501: /* +1 to copy out the trailing NUL byte */
1.302 ad 1502: v->vfs_refcount++;
1503: mutex_exit(&vfs_list_lock);
1.245 christos 1504: error = copyout(bf, where, slen + 1);
1.302 ad 1505: mutex_enter(&vfs_list_lock);
1506: v->vfs_refcount--;
1.220 lukem 1507: if (error)
1508: break;
1509: where += slen;
1510: needed += slen;
1511: left -= slen;
1512: }
1513: }
1.302 ad 1514: mutex_exit(&vfs_list_lock);
1.311 ad 1515: sysctl_relock();
1.220 lukem 1516: *oldlenp = needed;
1517: return (error);
1518: }
1519:
1520: /*
1.80 fvdl 1521: * Top level filesystem related information gathering.
1522: */
1.212 atatat 1523: SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1.80 fvdl 1524: {
1.218 atatat 1525: sysctl_createv(clog, 0, NULL, NULL,
1526: CTLFLAG_PERMANENT,
1.212 atatat 1527: CTLTYPE_NODE, "vfs", NULL,
1528: NULL, 0, NULL, 0,
1529: CTL_VFS, CTL_EOL);
1.218 atatat 1530: sysctl_createv(clog, 0, NULL, NULL,
1531: CTLFLAG_PERMANENT,
1.226 atatat 1532: CTLTYPE_NODE, "generic",
1533: SYSCTL_DESCR("Non-specific vfs related information"),
1.212 atatat 1534: NULL, 0, NULL, 0,
1535: CTL_VFS, VFS_GENERIC, CTL_EOL);
1.218 atatat 1536: sysctl_createv(clog, 0, NULL, NULL,
1537: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.226 atatat 1538: CTLTYPE_INT, "usermount",
1539: SYSCTL_DESCR("Whether unprivileged users may mount "
1540: "filesystems"),
1.212 atatat 1541: NULL, 0, &dovfsusermount, 0,
1542: CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1.220 lukem 1543: sysctl_createv(clog, 0, NULL, NULL,
1544: CTLFLAG_PERMANENT,
1545: CTLTYPE_STRING, "fstypes",
1546: SYSCTL_DESCR("List of file systems present"),
1547: sysctl_vfs_generic_fstypes, 0, NULL, 0,
1548: CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1.263 chs 1549: sysctl_createv(clog, 0, NULL, NULL,
1550: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1551: CTLTYPE_INT, "magiclinks",
1552: SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"),
1553: NULL, 0, &vfs_magiclinks, 0,
1554: CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL);
1.80 fvdl 1555: }
1556:
1.212 atatat 1557:
1.29 cgd 1558: int kinfo_vdebug = 1;
1559: int kinfo_vgetfailed;
1560: #define KINFO_VNODESLOP 10
1561: /*
1562: * Dump vnode list (via sysctl).
1563: * Copyout address of vnode followed by vnode.
1564: */
1565: /* ARGSUSED */
1.50 christos 1566: int
1.212 atatat 1567: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29 cgd 1568: {
1.212 atatat 1569: char *where = oldp;
1570: size_t *sizep = oldlenp;
1.80 fvdl 1571: struct mount *mp, *nmp;
1.311 ad 1572: vnode_t *vp, *mvp, vbuf;
1.80 fvdl 1573: char *bp = where, *savebp;
1.29 cgd 1574: char *ewhere;
1575: int error;
1.212 atatat 1576:
1577: if (namelen != 0)
1578: return (EOPNOTSUPP);
1579: if (newp != NULL)
1580: return (EPERM);
1.29 cgd 1581:
1.309 ad 1582: #define VPTRSZ sizeof(vnode_t *)
1583: #define VNODESZ sizeof(vnode_t)
1.29 cgd 1584: if (where == NULL) {
1585: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1586: return (0);
1587: }
1588: ewhere = where + *sizep;
1.80 fvdl 1589:
1.311 ad 1590: sysctl_unlock();
1.302 ad 1591: mutex_enter(&mountlist_lock);
1.177 matt 1592: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1593: mp = nmp) {
1.336.2.1! yamt 1594: if (vfs_busy(mp, &nmp)) {
1.29 cgd 1595: continue;
1.80 fvdl 1596: }
1.29 cgd 1597: savebp = bp;
1.309 ad 1598: /* Allocate a marker vnode. */
1.311 ad 1599: if ((mvp = vnalloc(mp)) == NULL) {
1600: sysctl_relock();
1.309 ad 1601: return (ENOMEM);
1.311 ad 1602: }
1.309 ad 1603: mutex_enter(&mntvnode_lock);
1604: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
1605: vmark(mvp, vp);
1.29 cgd 1606: /*
1607: * Check that the vp is still associated with
1608: * this filesystem. RACE: could have been
1609: * recycled onto the same filesystem.
1610: */
1.309 ad 1611: if (vp->v_mount != mp || vismarker(vp))
1612: continue;
1.29 cgd 1613: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.309 ad 1614: (void)vunmark(mvp);
1615: mutex_exit(&mntvnode_lock);
1.310 pooka 1616: vnfree(mvp);
1.311 ad 1617: sysctl_relock();
1.29 cgd 1618: *sizep = bp - where;
1619: return (ENOMEM);
1620: }
1.311 ad 1621: memcpy(&vbuf, vp, VNODESZ);
1.309 ad 1622: mutex_exit(&mntvnode_lock);
1.311 ad 1623: if ((error = copyout(vp, bp, VPTRSZ)) ||
1624: (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1.309 ad 1625: mutex_enter(&mntvnode_lock);
1626: (void)vunmark(mvp);
1627: mutex_exit(&mntvnode_lock);
1.310 pooka 1628: vnfree(mvp);
1.311 ad 1629: sysctl_relock();
1.29 cgd 1630: return (error);
1.309 ad 1631: }
1.29 cgd 1632: bp += VPTRSZ + VNODESZ;
1.309 ad 1633: mutex_enter(&mntvnode_lock);
1.29 cgd 1634: }
1.309 ad 1635: mutex_exit(&mntvnode_lock);
1.310 pooka 1636: vnfree(mvp);
1.336.2.1! yamt 1637: vfs_unbusy(mp, false, &nmp);
1.29 cgd 1638: }
1.302 ad 1639: mutex_exit(&mountlist_lock);
1.311 ad 1640: sysctl_relock();
1.29 cgd 1641:
1642: *sizep = bp - where;
1643: return (0);
1.30 mycroft 1644: }
1645:
1646: /*
1.309 ad 1647: * Remove clean vnodes from a mountpoint's vnode list.
1648: */
1649: void
1650: vfs_scrubvnlist(struct mount *mp)
1651: {
1652: vnode_t *vp, *nvp;
1653:
1.327 ad 1654: retry:
1.309 ad 1655: mutex_enter(&mntvnode_lock);
1656: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1657: nvp = TAILQ_NEXT(vp, v_mntvnodes);
1658: mutex_enter(&vp->v_interlock);
1.315 ad 1659: if ((vp->v_iflag & VI_CLEAN) != 0) {
1.309 ad 1660: TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.315 ad 1661: vp->v_mount = NULL;
1.327 ad 1662: mutex_exit(&mntvnode_lock);
1663: mutex_exit(&vp->v_interlock);
1664: vfs_destroy(mp);
1665: goto retry;
1.315 ad 1666: }
1.309 ad 1667: mutex_exit(&vp->v_interlock);
1668: }
1669: mutex_exit(&mntvnode_lock);
1670: }
1671:
1672: /*
1.30 mycroft 1673: * Check to see if a filesystem is mounted on a block device.
1674: */
1675: int
1.309 ad 1676: vfs_mountedon(vnode_t *vp)
1.30 mycroft 1677: {
1.309 ad 1678: vnode_t *vq;
1.80 fvdl 1679: int error = 0;
1.30 mycroft 1680:
1.261 reinoud 1681: if (vp->v_type != VBLK)
1682: return ENOTBLK;
1.113 fvdl 1683: if (vp->v_specmountpoint != NULL)
1.30 mycroft 1684: return (EBUSY);
1.318 ad 1685: mutex_enter(&specfs_lock);
1686: for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
1687: vq = vq->v_specnext) {
1688: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1689: continue;
1690: if (vq->v_specmountpoint != NULL) {
1691: error = EBUSY;
1692: break;
1.30 mycroft 1693: }
1694: }
1.318 ad 1695: mutex_exit(&specfs_lock);
1.80 fvdl 1696: return (error);
1.30 mycroft 1697: }
1698:
1.35 ws 1699: /*
1.39 mycroft 1700: * Unmount all file systems.
1701: * We traverse the list in reverse order under the assumption that doing so
1702: * will avoid needing to worry about dependencies.
1703: */
1704: void
1.256 christos 1705: vfs_unmountall(struct lwp *l)
1.39 mycroft 1706: {
1.123 augustss 1707: struct mount *mp, *nmp;
1.40 mycroft 1708: int allerror, error;
1.39 mycroft 1709:
1.235 lukem 1710: printf("unmounting file systems...");
1.325 dyoung 1711: for (allerror = 0, mp = CIRCLEQ_LAST(&mountlist);
1712: !CIRCLEQ_EMPTY(&mountlist);
1713: mp = nmp) {
1714: nmp = CIRCLEQ_PREV(mp, mnt_list);
1.54 jtk 1715: #ifdef DEBUG
1.235 lukem 1716: printf("\nunmounting %s (%s)...",
1.56 christos 1717: mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54 jtk 1718: #endif
1.336.2.1! yamt 1719: atomic_inc_uint(&mp->mnt_refcnt);
1.256 christos 1720: if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
1.57 christos 1721: printf("unmount of %s failed with error %d\n",
1.40 mycroft 1722: mp->mnt_stat.f_mntonname, error);
1723: allerror = 1;
1724: }
1.39 mycroft 1725: }
1.235 lukem 1726: printf(" done\n");
1.39 mycroft 1727: if (allerror)
1.57 christos 1728: printf("WARNING: some file systems would not unmount\n");
1.40 mycroft 1729: }
1730:
1731: /*
1732: * Sync and unmount file systems before shutting down.
1733: */
1734: void
1.247 thorpej 1735: vfs_shutdown(void)
1.40 mycroft 1736: {
1.265 skrll 1737: struct lwp *l;
1.40 mycroft 1738:
1.265 skrll 1739: /* XXX we're certainly not running in lwp0's context! */
1740: l = curlwp;
1741: if (l == NULL)
1742: l = &lwp0;
1.185 christos 1743:
1.70 cgd 1744: printf("syncing disks... ");
1745:
1.305 pooka 1746: /* remove user processes from run queue */
1.138 bouyer 1747: suspendsched();
1.40 mycroft 1748: (void) spl0();
1749:
1.128 sommerfe 1750: /* avoid coming back this way again if we panic. */
1751: doing_shutdown = 1;
1752:
1.184 thorpej 1753: sys_sync(l, NULL, NULL);
1.40 mycroft 1754:
1755: /* Wait for sync to finish. */
1.213 pk 1756: if (buf_syncwait() != 0) {
1.124 augustss 1757: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1758: Debugger();
1759: #endif
1.57 christos 1760: printf("giving up\n");
1.84 thorpej 1761: return;
1.73 thorpej 1762: } else
1.57 christos 1763: printf("done\n");
1.73 thorpej 1764:
1.84 thorpej 1765: /*
1766: * If we've panic'd, don't make the situation potentially
1767: * worse by unmounting the file systems.
1768: */
1769: if (panicstr != NULL)
1770: return;
1771:
1772: /* Release inodes held by texts before update. */
1.73 thorpej 1773: #ifdef notdef
1.84 thorpej 1774: vnshutdown();
1.73 thorpej 1775: #endif
1.84 thorpej 1776: /* Unmount file systems. */
1.256 christos 1777: vfs_unmountall(l);
1.58 thorpej 1778: }
1779:
1780: /*
1781: * Mount the root file system. If the operator didn't specify a
1782: * file system to use, try all possible file systems until one
1783: * succeeds.
1784: */
1785: int
1.247 thorpej 1786: vfs_mountroot(void)
1.58 thorpej 1787: {
1.79 thorpej 1788: struct vfsops *v;
1.239 mycroft 1789: int error = ENODEV;
1.58 thorpej 1790:
1791: if (root_device == NULL)
1792: panic("vfs_mountroot: root device unknown");
1793:
1.264 thorpej 1794: switch (device_class(root_device)) {
1.58 thorpej 1795: case DV_IFNET:
1796: if (rootdev != NODEV)
1.173 thorpej 1797: panic("vfs_mountroot: rootdev set for DV_IFNET "
1798: "(0x%08x -> %d,%d)", rootdev,
1799: major(rootdev), minor(rootdev));
1.58 thorpej 1800: break;
1801:
1802: case DV_DISK:
1803: if (rootdev == NODEV)
1804: panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239 mycroft 1805: if (bdevvp(rootdev, &rootvp))
1806: panic("vfs_mountroot: can't get vnode for rootdev");
1.306 pooka 1807: error = VOP_OPEN(rootvp, FREAD, FSCRED);
1.239 mycroft 1808: if (error) {
1809: printf("vfs_mountroot: can't open root device\n");
1810: return (error);
1811: }
1.58 thorpej 1812: break;
1813:
1814: default:
1815: printf("%s: inappropriate for root file system\n",
1.336 cegger 1816: device_xname(root_device));
1.58 thorpej 1817: return (ENODEV);
1818: }
1819:
1820: /*
1821: * If user specified a file system, use it.
1822: */
1.239 mycroft 1823: if (mountroot != NULL) {
1824: error = (*mountroot)();
1825: goto done;
1826: }
1.58 thorpej 1827:
1828: /*
1829: * Try each file system currently configured into the kernel.
1830: */
1.302 ad 1831: mutex_enter(&vfs_list_lock);
1.220 lukem 1832: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 1833: if (v->vfs_mountroot == NULL)
1.58 thorpej 1834: continue;
1835: #ifdef DEBUG
1.197 thorpej 1836: aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 1837: #endif
1.302 ad 1838: v->vfs_refcount++;
1839: mutex_exit(&vfs_list_lock);
1.239 mycroft 1840: error = (*v->vfs_mountroot)();
1.302 ad 1841: mutex_enter(&vfs_list_lock);
1842: v->vfs_refcount--;
1.239 mycroft 1843: if (!error) {
1.197 thorpej 1844: aprint_normal("root file system type: %s\n",
1845: v->vfs_name);
1.79 thorpej 1846: break;
1.58 thorpej 1847: }
1848: }
1.302 ad 1849: mutex_exit(&vfs_list_lock);
1.58 thorpej 1850:
1.79 thorpej 1851: if (v == NULL) {
1.336 cegger 1852: printf("no file system for %s", device_xname(root_device));
1.264 thorpej 1853: if (device_class(root_device) == DV_DISK)
1.79 thorpej 1854: printf(" (dev 0x%x)", rootdev);
1855: printf("\n");
1.239 mycroft 1856: error = EFTYPE;
1.79 thorpej 1857: }
1.239 mycroft 1858:
1859: done:
1.264 thorpej 1860: if (error && device_class(root_device) == DV_DISK) {
1.306 pooka 1861: VOP_CLOSE(rootvp, FREAD, FSCRED);
1.239 mycroft 1862: vrele(rootvp);
1863: }
1864: return (error);
1.58 thorpej 1865: }
1.326 ad 1866:
1867: /*
1868: * Sham lock manager for vnodes. This is a temporary measure.
1869: */
1870: int
1871: vlockmgr(struct vnlock *vl, int flags)
1872: {
1873:
1874: KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
1875:
1876: switch (flags & LK_TYPE_MASK) {
1877: case LK_SHARED:
1878: if (rw_tryenter(&vl->vl_lock, RW_READER)) {
1879: return 0;
1880: }
1881: if ((flags & LK_NOWAIT) != 0) {
1.328 ad 1882: return EBUSY;
1.326 ad 1883: }
1884: rw_enter(&vl->vl_lock, RW_READER);
1885: return 0;
1886:
1887: case LK_EXCLUSIVE:
1888: if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
1889: return 0;
1890: }
1891: if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
1892: rw_write_held(&vl->vl_lock)) {
1893: vl->vl_recursecnt++;
1894: return 0;
1895: }
1896: if ((flags & LK_NOWAIT) != 0) {
1.328 ad 1897: return EBUSY;
1.326 ad 1898: }
1899: rw_enter(&vl->vl_lock, RW_WRITER);
1900: return 0;
1901:
1902: case LK_RELEASE:
1903: if (vl->vl_recursecnt != 0) {
1904: KASSERT(rw_write_held(&vl->vl_lock));
1905: vl->vl_recursecnt--;
1906: return 0;
1907: }
1908: rw_exit(&vl->vl_lock);
1909: return 0;
1910:
1911: default:
1912: panic("vlockmgr: flags %x", flags);
1913: }
1914: }
1915:
1916: int
1917: vlockstatus(struct vnlock *vl)
1918: {
1919:
1920: if (rw_write_held(&vl->vl_lock)) {
1921: return LK_EXCLUSIVE;
1922: }
1923: if (rw_read_held(&vl->vl_lock)) {
1924: return LK_SHARED;
1925: }
1926: return 0;
1927: }
CVSweb <webmaster@jp.NetBSD.org>