Annotation of src/sys/kern/vfs_subr.c, Revision 1.391
1.391 ! pooka 1: /* $NetBSD: vfs_subr.c,v 1.390 2009/11/26 20:52:19 pooka Exp $ */
1.74 thorpej 2:
3: /*-
1.315 ad 4: * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.302 ad 9: * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
1.74 thorpej 10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30: * POSSIBILITY OF SUCH DAMAGE.
31: */
1.32 cgd 32:
1.29 cgd 33: /*
1.30 mycroft 34: * Copyright (c) 1989, 1993
35: * The Regents of the University of California. All rights reserved.
1.29 cgd 36: * (c) UNIX System Laboratories, Inc.
37: * All or some portions of this file are derived from material licensed
38: * to the University of California by American Telephone and Telegraph
39: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40: * the permission of UNIX System Laboratories, Inc.
41: *
42: * Redistribution and use in source and binary forms, with or without
43: * modification, are permitted provided that the following conditions
44: * are met:
45: * 1. Redistributions of source code must retain the above copyright
46: * notice, this list of conditions and the following disclaimer.
47: * 2. Redistributions in binary form must reproduce the above copyright
48: * notice, this list of conditions and the following disclaimer in the
49: * documentation and/or other materials provided with the distribution.
1.204 agc 50: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 51: * may be used to endorse or promote products derived from this software
52: * without specific prior written permission.
53: *
54: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64: * SUCH DAMAGE.
65: *
1.32 cgd 66: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 67: */
68:
69: /*
1.346 ad 70: * Note on v_usecount and locking:
71: *
72: * At nearly all points it is known that v_usecount could be zero, the
73: * vnode interlock will be held.
74: *
75: * To change v_usecount away from zero, the interlock must be held. To
76: * change from a non-zero value to zero, again the interlock must be
77: * held.
78: *
1.379 yamt 79: * There's a flag bit, VC_XLOCK, embedded in v_usecount.
80: * To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock
81: * must be held.
82: * To modify the VC_XLOCK bit, the interlock must be held.
83: * We always keep the usecount (v_usecount & VC_MASK) non-zero while the
84: * VC_XLOCK bit is set.
85: *
86: * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero
87: * value to a non-zero value can safely be done using atomic operations,
88: * without the interlock held.
89: * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero
90: * value can be done using atomic operations, without the interlock held.
1.29 cgd 91: */
1.162 lukem 92:
93: #include <sys/cdefs.h>
1.391 ! pooka 94: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.390 2009/11/26 20:52:19 pooka Exp $");
1.78 mrg 95:
1.125 chs 96: #include "opt_ddb.h"
1.95 thorpej 97: #include "opt_compat_netbsd.h"
1.97 christos 98: #include "opt_compat_43.h"
1.29 cgd 99:
100: #include <sys/param.h>
1.30 mycroft 101: #include <sys/systm.h>
1.363 pooka 102: #include <sys/conf.h>
1.29 cgd 103: #include <sys/proc.h>
1.138 bouyer 104: #include <sys/kernel.h>
1.29 cgd 105: #include <sys/mount.h>
1.46 mycroft 106: #include <sys/fcntl.h>
1.29 cgd 107: #include <sys/vnode.h>
1.30 mycroft 108: #include <sys/stat.h>
1.29 cgd 109: #include <sys/namei.h>
110: #include <sys/ucred.h>
111: #include <sys/buf.h>
112: #include <sys/errno.h>
1.366 yamt 113: #include <sys/kmem.h>
1.51 christos 114: #include <sys/syscallargs.h>
1.58 thorpej 115: #include <sys/device.h>
1.192 christos 116: #include <sys/filedesc.h>
1.266 elad 117: #include <sys/kauth.h>
1.307 ad 118: #include <sys/atomic.h>
1.309 ad 119: #include <sys/kthread.h>
1.355 simonb 120: #include <sys/wapbl.h>
1.50 christos 121:
1.380 elad 122: #include <miscfs/genfs/genfs.h>
1.30 mycroft 123: #include <miscfs/specfs/specdev.h>
1.113 fvdl 124: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 125:
1.125 chs 126: #include <uvm/uvm.h>
1.255 yamt 127: #include <uvm/uvm_readahead.h>
1.125 chs 128: #include <uvm/uvm_ddb.h>
1.129 mrg 129:
130: #include <sys/sysctl.h>
1.77 mrg 131:
1.353 pooka 132: const enum vtype iftovt_tab[16] = {
133: VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
134: VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
135: };
136: const int vttoif_tab[9] = {
137: 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
138: S_IFSOCK, S_IFIFO, S_IFMT,
139: };
140:
141: /*
142: * Insq/Remq for the vnode usage lists.
143: */
144: #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
145: #define bufremvn(bp) { \
146: LIST_REMOVE(bp, b_vnbufs); \
147: (bp)->b_vnbufs.le_next = NOLIST; \
148: }
149:
150: int doforce = 1; /* 1 => permit forcible unmounting */
151: int prtactive = 0; /* 1 => print out reclaim of active vnodes */
152:
1.309 ad 153: static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
154: static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
155: static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
156:
1.353 pooka 157: struct mntlist mountlist = /* mounted filesystem list */
158: CIRCLEQ_HEAD_INITIALIZER(mountlist);
159:
160: u_int numvnodes;
161: static specificdata_domain_t mount_specificdata_domain;
162:
1.309 ad 163: static int vrele_pending;
1.351 ad 164: static int vrele_gen;
1.309 ad 165: static kmutex_t vrele_lock;
166: static kcondvar_t vrele_cv;
167: static lwp_t *vrele_lwp;
1.113 fvdl 168:
1.382 dyoung 169: static uint64_t mountgen = 0;
170: static kmutex_t mountgen_lock;
171:
1.353 pooka 172: kmutex_t mountlist_lock;
173: kmutex_t mntid_lock;
174: kmutex_t mntvnode_lock;
175: kmutex_t vnode_free_list_lock;
176: kmutex_t vfs_list_lock;
177:
1.309 ad 178: static pool_cache_t vnode_cache;
1.186 thorpej 179:
1.89 kleink 180: /*
1.353 pooka 181: * These define the root filesystem and device.
182: */
183: struct vnode *rootvnode;
184: struct device *root_device; /* root device */
185:
186: /*
1.89 kleink 187: * Local declarations.
188: */
1.276 hannken 189:
1.309 ad 190: static void vrele_thread(void *);
191: static void insmntque(vnode_t *, struct mount *);
192: static int getdevvp(dev_t, vnode_t **, enum vtype);
1.364 yamt 193: static vnode_t *getcleanvnode(void);
1.309 ad 194: void vpanic(vnode_t *, const char *);
1.382 dyoung 195: static void vfs_shutdown1(struct lwp *);
1.309 ad 196:
1.353 pooka 197: #ifdef DEBUG
198: void printlockedvnodes(void);
199: #endif
200:
1.309 ad 201: #ifdef DIAGNOSTIC
202: void
203: vpanic(vnode_t *vp, const char *msg)
204: {
205:
206: vprint(NULL, vp);
207: panic("%s\n", msg);
208: }
209: #else
210: #define vpanic(vp, msg) /* nothing */
211: #endif
212:
213: void
214: vn_init1(void)
215: {
216:
217: vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
218: NULL, IPL_NONE, NULL, NULL, NULL);
219: KASSERT(vnode_cache != NULL);
220:
221: /* Create deferred release thread. */
222: mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
223: cv_init(&vrele_cv, "vrele");
224: if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
225: NULL, &vrele_lwp, "vrele"))
226: panic("fork vrele");
227: }
1.51 christos 228:
1.353 pooka 229: /*
230: * Initialize the vnode management data structures.
231: */
232: void
233: vntblinit(void)
234: {
235:
1.382 dyoung 236: mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
1.353 pooka 237: mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
238: mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
239: mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
240: mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
241: mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
242:
243: mount_specificdata_domain = specificdata_domain_create();
244:
245: /* Initialize the filesystem syncer. */
246: vn_initialize_syncerd();
247: vn_init1();
248: }
249:
1.202 yamt 250: int
1.256 christos 251: vfs_drainvnodes(long target, struct lwp *l)
1.202 yamt 252: {
253:
254: while (numvnodes > target) {
1.309 ad 255: vnode_t *vp;
1.202 yamt 256:
1.309 ad 257: mutex_enter(&vnode_free_list_lock);
258: vp = getcleanvnode();
1.202 yamt 259: if (vp == NULL)
260: return EBUSY; /* give up */
1.309 ad 261: ungetnewvnode(vp);
1.202 yamt 262: }
263:
264: return 0;
265: }
266:
267: /*
1.353 pooka 268: * Lookup a mount point by filesystem identifier.
269: *
270: * XXX Needs to add a reference to the mount point.
271: */
272: struct mount *
273: vfs_getvfs(fsid_t *fsid)
274: {
275: struct mount *mp;
276:
277: mutex_enter(&mountlist_lock);
278: CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
279: if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
280: mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
281: mutex_exit(&mountlist_lock);
282: return (mp);
283: }
284: }
285: mutex_exit(&mountlist_lock);
286: return ((struct mount *)0);
287: }
288:
289: /*
290: * Drop a reference to a mount structure, freeing if the last reference.
291: */
292: void
293: vfs_destroy(struct mount *mp)
294: {
295:
1.357 ad 296: if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
1.353 pooka 297: return;
298: }
299:
300: /*
301: * Nothing else has visibility of the mount: we can now
302: * free the data structures.
303: */
1.357 ad 304: KASSERT(mp->mnt_refcnt == 0);
1.353 pooka 305: specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
306: rw_destroy(&mp->mnt_unmounting);
307: mutex_destroy(&mp->mnt_updating);
308: mutex_destroy(&mp->mnt_renamelock);
309: if (mp->mnt_op != NULL) {
310: vfs_delref(mp->mnt_op);
311: }
312: kmem_free(mp, sizeof(*mp));
313: }
314:
315: /*
1.202 yamt 316: * grab a vnode from freelist and clean it.
317: */
1.309 ad 318: vnode_t *
319: getcleanvnode(void)
1.202 yamt 320: {
1.309 ad 321: vnode_t *vp;
322: vnodelst_t *listhd;
1.202 yamt 323:
1.309 ad 324: KASSERT(mutex_owned(&vnode_free_list_lock));
1.229 yamt 325:
1.309 ad 326: retry:
1.229 yamt 327: listhd = &vnode_free_list;
328: try_nextlist:
329: TAILQ_FOREACH(vp, listhd, v_freelist) {
1.309 ad 330: /*
331: * It's safe to test v_usecount and v_iflag
332: * without holding the interlock here, since
333: * these vnodes should never appear on the
334: * lists.
335: */
336: if (vp->v_usecount != 0) {
337: vpanic(vp, "free vnode isn't");
338: }
339: if ((vp->v_iflag & VI_CLEAN) != 0) {
340: vpanic(vp, "clean vnode on freelist");
341: }
342: if (vp->v_freelisthd != listhd) {
343: printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
344: vpanic(vp, "list head mismatch");
345: }
346: if (!mutex_tryenter(&vp->v_interlock))
1.208 hannken 347: continue;
1.227 yamt 348: /*
1.309 ad 349: * Our lwp might hold the underlying vnode
350: * locked, so don't try to reclaim a VI_LAYER
351: * node if it's locked.
1.227 yamt 352: */
1.302 ad 353: if ((vp->v_iflag & VI_XLOCK) == 0 &&
354: ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285 hannken 355: break;
1.202 yamt 356: }
1.309 ad 357: mutex_exit(&vp->v_interlock);
1.202 yamt 358: }
359:
1.309 ad 360: if (vp == NULL) {
1.229 yamt 361: if (listhd == &vnode_free_list) {
362: listhd = &vnode_hold_list;
363: goto try_nextlist;
364: }
1.309 ad 365: mutex_exit(&vnode_free_list_lock);
366: return NULL;
1.202 yamt 367: }
368:
1.309 ad 369: /* Remove it from the freelist. */
1.202 yamt 370: TAILQ_REMOVE(listhd, vp, v_freelist);
1.309 ad 371: vp->v_freelisthd = NULL;
372: mutex_exit(&vnode_free_list_lock);
373:
1.386 bouyer 374: if (vp->v_usecount != 0) {
375: /*
376: * was referenced again before we got the interlock
377: * Don't return to freelist - the holder of the last
378: * reference will destroy it.
379: */
1.387 bouyer 380: mutex_exit(&vp->v_interlock);
1.386 bouyer 381: mutex_enter(&vnode_free_list_lock);
382: goto retry;
383: }
384:
1.309 ad 385: /*
386: * The vnode is still associated with a file system, so we must
387: * clean it out before reusing it. We need to add a reference
388: * before doing this. If the vnode gains another reference while
389: * being cleaned out then we lose - retry.
390: */
1.379 yamt 391: atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK);
1.309 ad 392: vclean(vp, DOCLOSE);
1.379 yamt 393: KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
394: atomic_add_int(&vp->v_usecount, -VC_XLOCK);
1.309 ad 395: if (vp->v_usecount == 1) {
396: /* We're about to dirty it. */
397: vp->v_iflag &= ~VI_CLEAN;
398: mutex_exit(&vp->v_interlock);
1.318 ad 399: if (vp->v_type == VBLK || vp->v_type == VCHR) {
400: spec_node_destroy(vp);
401: }
402: vp->v_type = VNON;
1.309 ad 403: } else {
404: /*
405: * Don't return to freelist - the holder of the last
406: * reference will destroy it.
407: */
1.348 ad 408: vrelel(vp, 0); /* releases vp->v_interlock */
1.309 ad 409: mutex_enter(&vnode_free_list_lock);
410: goto retry;
411: }
412:
413: if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
414: !TAILQ_EMPTY(&vp->v_uobj.memq)) {
415: vpanic(vp, "cleaned vnode isn't");
416: }
417: if (vp->v_numoutput != 0) {
418: vpanic(vp, "clean vnode has pending I/O's");
419: }
420: if ((vp->v_iflag & VI_ONWORKLST) != 0) {
421: vpanic(vp, "clean vnode on syncer list");
422: }
1.202 yamt 423:
424: return vp;
425: }
426:
1.327 ad 427: /*
1.338 ad 428: * Mark a mount point as busy, and gain a new reference to it. Used to
1.344 ad 429: * prevent the file system from being unmounted during critical sections.
1.338 ad 430: *
1.339 ad 431: * => The caller must hold a pre-existing reference to the mount.
1.344 ad 432: * => Will fail if the file system is being unmounted, or is unmounted.
1.29 cgd 433: */
1.50 christos 434: int
1.344 ad 435: vfs_busy(struct mount *mp, struct mount **nextp)
1.29 cgd 436: {
437:
1.344 ad 438: KASSERT(mp->mnt_refcnt > 0);
1.338 ad 439:
1.344 ad 440: if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
441: if (nextp != NULL) {
442: KASSERT(mutex_owned(&mountlist_lock));
1.339 ad 443: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
444: }
1.344 ad 445: return EBUSY;
1.339 ad 446: }
1.344 ad 447: if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
448: rw_exit(&mp->mnt_unmounting);
1.338 ad 449: if (nextp != NULL) {
1.344 ad 450: KASSERT(mutex_owned(&mountlist_lock));
451: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
1.338 ad 452: }
1.344 ad 453: return ENOENT;
1.327 ad 454: }
1.344 ad 455: if (nextp != NULL) {
456: mutex_exit(&mountlist_lock);
457: }
458: atomic_inc_uint(&mp->mnt_refcnt);
459: return 0;
1.29 cgd 460: }
461:
462: /*
1.344 ad 463: * Unbusy a busy filesystem.
1.339 ad 464: *
1.344 ad 465: * => If keepref is true, preserve reference added by vfs_busy().
466: * => If nextp != NULL, acquire mountlist_lock.
1.29 cgd 467: */
468: void
1.339 ad 469: vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
1.29 cgd 470: {
471:
1.327 ad 472: KASSERT(mp->mnt_refcnt > 0);
473:
1.344 ad 474: if (nextp != NULL) {
475: mutex_enter(&mountlist_lock);
476: }
477: rw_exit(&mp->mnt_unmounting);
478: if (!keepref) {
479: vfs_destroy(mp);
1.327 ad 480: }
1.339 ad 481: if (nextp != NULL) {
1.344 ad 482: KASSERT(mutex_owned(&mountlist_lock));
1.339 ad 483: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
484: }
1.29 cgd 485: }
486:
1.376 dyoung 487: struct mount *
488: vfs_mountalloc(struct vfsops *vfsops, struct vnode *vp)
489: {
490: int error;
491: struct mount *mp;
492:
493: mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
494: if (mp == NULL)
495: return NULL;
496:
497: mp->mnt_op = vfsops;
498: mp->mnt_refcnt = 1;
499: TAILQ_INIT(&mp->mnt_vnodelist);
500: rw_init(&mp->mnt_unmounting);
501: mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
502: mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
503: error = vfs_busy(mp, NULL);
504: KASSERT(error == 0);
505: mp->mnt_vnodecovered = vp;
506: mount_initspecific(mp);
507:
1.382 dyoung 508: mutex_enter(&mountgen_lock);
509: mp->mnt_gen = mountgen++;
510: mutex_exit(&mountgen_lock);
511:
1.376 dyoung 512: return mp;
513: }
514:
1.29 cgd 515: /*
1.80 fvdl 516: * Lookup a filesystem type, and if found allocate and initialize
517: * a mount structure for it.
518: *
519: * Devname is usually updated by mount(8) after booting.
1.29 cgd 520: */
1.50 christos 521: int
1.247 thorpej 522: vfs_rootmountalloc(const char *fstypename, const char *devname,
523: struct mount **mpp)
1.29 cgd 524: {
1.80 fvdl 525: struct vfsops *vfsp = NULL;
526: struct mount *mp;
1.29 cgd 527:
1.309 ad 528: mutex_enter(&vfs_list_lock);
1.152 jdolecek 529: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291 christos 530: if (!strncmp(vfsp->vfs_name, fstypename,
531: sizeof(mp->mnt_stat.f_fstypename)))
1.80 fvdl 532: break;
1.315 ad 533: if (vfsp == NULL) {
534: mutex_exit(&vfs_list_lock);
1.80 fvdl 535: return (ENODEV);
1.315 ad 536: }
1.309 ad 537: vfsp->vfs_refcount++;
538: mutex_exit(&vfs_list_lock);
539:
1.376 dyoung 540: if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
1.327 ad 541: return ENOMEM;
1.80 fvdl 542: mp->mnt_flag = MNT_RDONLY;
1.291 christos 543: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
544: sizeof(mp->mnt_stat.f_fstypename));
1.80 fvdl 545: mp->mnt_stat.f_mntonname[0] = '/';
1.314 pooka 546: mp->mnt_stat.f_mntonname[1] = '\0';
1.291 christos 547: mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
548: '\0';
549: (void)copystr(devname, mp->mnt_stat.f_mntfromname,
550: sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.80 fvdl 551: *mpp = mp;
1.29 cgd 552: return (0);
553: }
554:
1.30 mycroft 555: /*
556: * Routines having to do with the management of the vnode table.
557: */
1.217 junyoung 558: extern int (**dead_vnodeop_p)(void *);
1.30 mycroft 559:
1.29 cgd 560: /*
561: * Return the next vnode from the free list.
562: */
1.50 christos 563: int
1.247 thorpej 564: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
1.309 ad 565: vnode_t **vpp)
1.29 cgd 566: {
1.142 chs 567: struct uvm_object *uobj;
1.113 fvdl 568: static int toggle;
1.309 ad 569: vnode_t *vp;
1.153 thorpej 570: int error = 0, tryalloc;
1.158 chs 571:
1.159 enami 572: try_again:
1.327 ad 573: if (mp != NULL) {
1.103 sommerfe 574: /*
1.327 ad 575: * Mark filesystem busy while we're creating a
576: * vnode. If unmount is in progress, this will
1.342 ad 577: * fail.
1.103 sommerfe 578: */
1.344 ad 579: error = vfs_busy(mp, NULL);
1.327 ad 580: if (error)
1.103 sommerfe 581: return error;
582: }
1.29 cgd 583:
1.113 fvdl 584: /*
585: * We must choose whether to allocate a new vnode or recycle an
586: * existing one. The criterion for allocating a new one is that
587: * the total number of vnodes is less than the number desired or
588: * there are no vnodes on either free list. Generally we only
589: * want to recycle vnodes that have no buffers associated with
590: * them, so we look first on the vnode_free_list. If it is empty,
591: * we next consider vnodes with referencing buffers on the
592: * vnode_hold_list. The toggle ensures that half the time we
593: * will use a buffer from the vnode_hold_list, and half the time
594: * we will allocate a new one unless the list has grown to twice
595: * the desired size. We are reticent to recycle vnodes from the
596: * vnode_hold_list because we will lose the identity of all its
597: * referencing buffers.
598: */
1.142 chs 599:
1.153 thorpej 600: vp = NULL;
601:
1.309 ad 602: mutex_enter(&vnode_free_list_lock);
1.153 thorpej 603:
1.113 fvdl 604: toggle ^= 1;
605: if (numvnodes > 2 * desiredvnodes)
606: toggle = 0;
607:
1.153 thorpej 608: tryalloc = numvnodes < desiredvnodes ||
1.159 enami 609: (TAILQ_FIRST(&vnode_free_list) == NULL &&
610: (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153 thorpej 611:
1.309 ad 612: if (tryalloc) {
1.206 yamt 613: numvnodes++;
1.309 ad 614: mutex_exit(&vnode_free_list_lock);
1.310 pooka 615: if ((vp = vnalloc(NULL)) == NULL) {
1.309 ad 616: mutex_enter(&vnode_free_list_lock);
617: numvnodes--;
618: } else
619: vp->v_usecount = 1;
620: }
621:
622: if (vp == NULL) {
623: vp = getcleanvnode();
624: if (vp == NULL) {
1.327 ad 625: if (mp != NULL) {
1.339 ad 626: vfs_unbusy(mp, false, NULL);
1.327 ad 627: }
1.153 thorpej 628: if (tryalloc) {
629: printf("WARNING: unable to allocate new "
630: "vnode, retrying...\n");
1.345 ad 631: kpause("newvn", false, hz, NULL);
1.153 thorpej 632: goto try_again;
633: }
1.132 jdolecek 634: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 635: *vpp = 0;
636: return (ENFILE);
637: }
1.302 ad 638: vp->v_iflag = 0;
639: vp->v_vflag = 0;
640: vp->v_uflag = 0;
1.158 chs 641: vp->v_socket = NULL;
1.29 cgd 642: }
1.309 ad 643:
644: KASSERT(vp->v_usecount == 1);
645: KASSERT(vp->v_freelisthd == NULL);
646: KASSERT(LIST_EMPTY(&vp->v_nclist));
647: KASSERT(LIST_EMPTY(&vp->v_dnclist));
648:
1.29 cgd 649: vp->v_type = VNON;
1.104 wrstuden 650: vp->v_vnlock = &vp->v_lock;
1.29 cgd 651: vp->v_tag = tag;
652: vp->v_op = vops;
653: insmntque(vp, mp);
1.30 mycroft 654: *vpp = vp;
655: vp->v_data = 0;
1.142 chs 656:
657: /*
658: * initialize uvm_object within vnode.
659: */
660:
1.158 chs 661: uobj = &vp->v_uobj;
662: KASSERT(uobj->pgops == &uvm_vnodeops);
663: KASSERT(uobj->uo_npages == 0);
664: KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288 yamt 665: vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142 chs 666:
1.309 ad 667: if (mp != NULL) {
668: if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
669: vp->v_vflag |= VV_MPSAFE;
1.339 ad 670: vfs_unbusy(mp, true, NULL);
1.309 ad 671: }
672:
1.29 cgd 673: return (0);
1.130 fvdl 674: }
675:
676: /*
677: * This is really just the reverse of getnewvnode(). Needed for
678: * VFS_VGET functions who may need to push back a vnode in case
679: * of a locking race.
680: */
681: void
1.309 ad 682: ungetnewvnode(vnode_t *vp)
683: {
684:
685: KASSERT(vp->v_usecount == 1);
686: KASSERT(vp->v_data == NULL);
687: KASSERT(vp->v_freelisthd == NULL);
688:
689: mutex_enter(&vp->v_interlock);
690: vp->v_iflag |= VI_CLEAN;
1.324 pooka 691: vrelel(vp, 0);
1.309 ad 692: }
693:
694: /*
695: * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
696: * marker vnode and we are prepared to wait for the allocation.
697: */
698: vnode_t *
1.310 pooka 699: vnalloc(struct mount *mp)
1.130 fvdl 700: {
1.309 ad 701: vnode_t *vp;
702:
703: vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
704: if (vp == NULL) {
705: return NULL;
706: }
707:
708: memset(vp, 0, sizeof(*vp));
709: UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
710: cv_init(&vp->v_cv, "vnode");
711: /*
712: * done by memset() above.
713: * LIST_INIT(&vp->v_nclist);
714: * LIST_INIT(&vp->v_dnclist);
715: */
716:
717: if (mp != NULL) {
718: vp->v_mount = mp;
719: vp->v_type = VBAD;
720: vp->v_iflag = VI_MARKER;
721: } else {
1.326 ad 722: rw_init(&vp->v_lock.vl_lock);
1.309 ad 723: }
724:
725: return vp;
726: }
727:
728: /*
729: * Free an unused, unreferenced vnode.
730: */
731: void
1.310 pooka 732: vnfree(vnode_t *vp)
1.309 ad 733: {
734:
735: KASSERT(vp->v_usecount == 0);
736:
737: if ((vp->v_iflag & VI_MARKER) == 0) {
1.326 ad 738: rw_destroy(&vp->v_lock.vl_lock);
1.309 ad 739: mutex_enter(&vnode_free_list_lock);
740: numvnodes--;
741: mutex_exit(&vnode_free_list_lock);
742: }
743:
744: UVM_OBJ_DESTROY(&vp->v_uobj);
745: cv_destroy(&vp->v_cv);
746: pool_cache_put(vnode_cache, vp);
747: }
748:
749: /*
750: * Remove a vnode from its freelist.
751: */
752: static inline void
753: vremfree(vnode_t *vp)
754: {
755:
756: KASSERT(mutex_owned(&vp->v_interlock));
1.350 ad 757: KASSERT(vp->v_usecount == 0);
1.130 fvdl 758:
1.217 junyoung 759: /*
1.309 ad 760: * Note that the reference count must not change until
761: * the vnode is removed.
1.130 fvdl 762: */
1.309 ad 763: mutex_enter(&vnode_free_list_lock);
764: if (vp->v_holdcnt > 0) {
765: KASSERT(vp->v_freelisthd == &vnode_hold_list);
766: } else {
767: KASSERT(vp->v_freelisthd == &vnode_free_list);
768: }
769: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
770: vp->v_freelisthd = NULL;
771: mutex_exit(&vnode_free_list_lock);
1.29 cgd 772: }
773:
774: /*
775: * Move a vnode from one mount queue to another.
776: */
1.260 yamt 777: static void
1.309 ad 778: insmntque(vnode_t *vp, struct mount *mp)
1.29 cgd 779: {
1.327 ad 780: struct mount *omp;
1.29 cgd 781:
1.103 sommerfe 782: #ifdef DIAGNOSTIC
783: if ((mp != NULL) &&
1.207 dbj 784: (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113 fvdl 785: vp->v_tag != VT_VFS) {
1.103 sommerfe 786: panic("insmntque into dying filesystem");
787: }
788: #endif
1.217 junyoung 789:
1.309 ad 790: mutex_enter(&mntvnode_lock);
1.29 cgd 791: /*
792: * Delete from old mount point vnode list, if on one.
793: */
1.327 ad 794: if ((omp = vp->v_mount) != NULL)
1.272 reinoud 795: TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29 cgd 796: /*
1.327 ad 797: * Insert into list of vnodes for the new mount point, if
798: * available. The caller must take a reference on the mount
799: * structure and donate to the vnode.
1.29 cgd 800: */
1.279 pooka 801: if ((vp->v_mount = mp) != NULL)
802: TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.309 ad 803: mutex_exit(&mntvnode_lock);
1.327 ad 804:
805: if (omp != NULL) {
806: /* Release reference to old mount. */
1.344 ad 807: vfs_destroy(omp);
1.327 ad 808: }
1.29 cgd 809: }
810:
811: /*
1.353 pooka 812: * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
813: * recycled.
814: */
815: void
816: vwait(vnode_t *vp, int flags)
817: {
818:
819: KASSERT(mutex_owned(&vp->v_interlock));
820: KASSERT(vp->v_usecount != 0);
821:
822: while ((vp->v_iflag & flags) != 0)
823: cv_wait(&vp->v_cv, &vp->v_interlock);
824: }
825:
826: /*
827: * Insert a marker vnode into a mount's vnode list, after the
828: * specified vnode. mntvnode_lock must be held.
829: */
830: void
831: vmark(vnode_t *mvp, vnode_t *vp)
832: {
833: struct mount *mp;
834:
835: mp = mvp->v_mount;
836:
837: KASSERT(mutex_owned(&mntvnode_lock));
838: KASSERT((mvp->v_iflag & VI_MARKER) != 0);
839: KASSERT(vp->v_mount == mp);
840:
841: TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
842: }
843:
844: /*
845: * Remove a marker vnode from a mount's vnode list, and return
846: * a pointer to the next vnode in the list. mntvnode_lock must
847: * be held.
848: */
849: vnode_t *
850: vunmark(vnode_t *mvp)
851: {
852: vnode_t *vp;
853: struct mount *mp;
854:
855: mp = mvp->v_mount;
856:
857: KASSERT(mutex_owned(&mntvnode_lock));
858: KASSERT((mvp->v_iflag & VI_MARKER) != 0);
859:
860: vp = TAILQ_NEXT(mvp, v_mntvnodes);
861: TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
862:
863: KASSERT(vp == NULL || vp->v_mount == mp);
864:
865: return vp;
866: }
867:
868: /*
869: * Update outstanding I/O count and do wakeup if requested.
870: */
871: void
872: vwakeup(struct buf *bp)
873: {
874: struct vnode *vp;
875:
876: if ((vp = bp->b_vp) == NULL)
877: return;
878:
879: KASSERT(bp->b_objlock == &vp->v_interlock);
880: KASSERT(mutex_owned(bp->b_objlock));
881:
882: if (--vp->v_numoutput < 0)
883: panic("vwakeup: neg numoutput, vp %p", vp);
884: if (vp->v_numoutput == 0)
885: cv_broadcast(&vp->v_cv);
886: }
887:
888: /*
889: * Flush out and invalidate all buffers associated with a vnode.
890: * Called with the underlying vnode locked, which should prevent new dirty
891: * buffers from being queued.
892: */
893: int
894: vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
895: bool catch, int slptimeo)
896: {
897: struct buf *bp, *nbp;
898: int error;
899: int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
900: (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
901:
902: /* XXXUBC this doesn't look at flags or slp* */
903: mutex_enter(&vp->v_interlock);
904: error = VOP_PUTPAGES(vp, 0, 0, flushflags);
905: if (error) {
906: return error;
907: }
908:
909: if (flags & V_SAVE) {
910: error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
911: if (error)
912: return (error);
913: KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
914: }
915:
916: mutex_enter(&bufcache_lock);
917: restart:
918: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
919: nbp = LIST_NEXT(bp, b_vnbufs);
920: error = bbusy(bp, catch, slptimeo, NULL);
921: if (error != 0) {
922: if (error == EPASSTHROUGH)
923: goto restart;
924: mutex_exit(&bufcache_lock);
925: return (error);
926: }
927: brelsel(bp, BC_INVAL | BC_VFLUSH);
928: }
929:
930: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
931: nbp = LIST_NEXT(bp, b_vnbufs);
932: error = bbusy(bp, catch, slptimeo, NULL);
933: if (error != 0) {
934: if (error == EPASSTHROUGH)
935: goto restart;
936: mutex_exit(&bufcache_lock);
937: return (error);
938: }
939: /*
940: * XXX Since there are no node locks for NFS, I believe
941: * there is a slight chance that a delayed write will
942: * occur while sleeping just above, so check for it.
943: */
944: if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
945: #ifdef DEBUG
946: printf("buffer still DELWRI\n");
947: #endif
948: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
949: mutex_exit(&bufcache_lock);
950: VOP_BWRITE(bp);
951: mutex_enter(&bufcache_lock);
952: goto restart;
953: }
954: brelsel(bp, BC_INVAL | BC_VFLUSH);
955: }
956:
957: #ifdef DIAGNOSTIC
958: if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
959: panic("vinvalbuf: flush failed, vp %p", vp);
960: #endif
961:
962: mutex_exit(&bufcache_lock);
963:
964: return (0);
965: }
966:
967: /*
968: * Destroy any in core blocks past the truncation length.
969: * Called with the underlying vnode locked, which should prevent new dirty
970: * buffers from being queued.
971: */
972: int
973: vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
974: {
975: struct buf *bp, *nbp;
976: int error;
977: voff_t off;
978:
979: off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
980: mutex_enter(&vp->v_interlock);
981: error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
982: if (error) {
983: return error;
984: }
985:
986: mutex_enter(&bufcache_lock);
987: restart:
988: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
989: nbp = LIST_NEXT(bp, b_vnbufs);
990: if (bp->b_lblkno < lbn)
991: continue;
992: error = bbusy(bp, catch, slptimeo, NULL);
993: if (error != 0) {
994: if (error == EPASSTHROUGH)
995: goto restart;
996: mutex_exit(&bufcache_lock);
997: return (error);
998: }
999: brelsel(bp, BC_INVAL | BC_VFLUSH);
1000: }
1001:
1002: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
1003: nbp = LIST_NEXT(bp, b_vnbufs);
1004: if (bp->b_lblkno < lbn)
1005: continue;
1006: error = bbusy(bp, catch, slptimeo, NULL);
1007: if (error != 0) {
1008: if (error == EPASSTHROUGH)
1009: goto restart;
1010: mutex_exit(&bufcache_lock);
1011: return (error);
1012: }
1013: brelsel(bp, BC_INVAL | BC_VFLUSH);
1014: }
1015: mutex_exit(&bufcache_lock);
1016:
1017: return (0);
1018: }
1019:
1020: /*
1021: * Flush all dirty buffers from a vnode.
1022: * Called with the underlying vnode locked, which should prevent new dirty
1023: * buffers from being queued.
1024: */
1025: void
1026: vflushbuf(struct vnode *vp, int sync)
1027: {
1028: struct buf *bp, *nbp;
1029: int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
1030: bool dirty;
1031:
1032: mutex_enter(&vp->v_interlock);
1033: (void) VOP_PUTPAGES(vp, 0, 0, flags);
1034:
1035: loop:
1036: mutex_enter(&bufcache_lock);
1037: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
1038: nbp = LIST_NEXT(bp, b_vnbufs);
1039: if ((bp->b_cflags & BC_BUSY))
1040: continue;
1041: if ((bp->b_oflags & BO_DELWRI) == 0)
1042: panic("vflushbuf: not dirty, bp %p", bp);
1043: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
1044: mutex_exit(&bufcache_lock);
1045: /*
1046: * Wait for I/O associated with indirect blocks to complete,
1047: * since there is no way to quickly wait for them below.
1048: */
1049: if (bp->b_vp == vp || sync == 0)
1050: (void) bawrite(bp);
1051: else
1052: (void) bwrite(bp);
1053: goto loop;
1054: }
1055: mutex_exit(&bufcache_lock);
1056:
1057: if (sync == 0)
1058: return;
1059:
1060: mutex_enter(&vp->v_interlock);
1061: while (vp->v_numoutput != 0)
1062: cv_wait(&vp->v_cv, &vp->v_interlock);
1063: dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
1064: mutex_exit(&vp->v_interlock);
1065:
1066: if (dirty) {
1067: vprint("vflushbuf: dirty", vp);
1068: goto loop;
1069: }
1070: }
1071:
1072: /*
1.29 cgd 1073: * Create a vnode for a block device.
1.59 thorpej 1074: * Used for root filesystem and swap areas.
1.29 cgd 1075: * Also used for memory file system special devices.
1076: */
1.50 christos 1077: int
1.309 ad 1078: bdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 1079: {
1.30 mycroft 1080:
1081: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 1082: }
1083:
1084: /*
1085: * Create a vnode for a character device.
1086: * Used for kernfs and some console handling.
1087: */
1.50 christos 1088: int
1.309 ad 1089: cdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 1090: {
1.30 mycroft 1091:
1092: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 1093: }
1094:
1095: /*
1.353 pooka 1096: * Associate a buffer with a vnode. There must already be a hold on
1097: * the vnode.
1098: */
1099: void
1100: bgetvp(struct vnode *vp, struct buf *bp)
1101: {
1102:
1103: KASSERT(bp->b_vp == NULL);
1104: KASSERT(bp->b_objlock == &buffer_lock);
1105: KASSERT(mutex_owned(&vp->v_interlock));
1106: KASSERT(mutex_owned(&bufcache_lock));
1107: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1108: KASSERT(!cv_has_waiters(&bp->b_done));
1109:
1110: vholdl(vp);
1111: bp->b_vp = vp;
1112: if (vp->v_type == VBLK || vp->v_type == VCHR)
1113: bp->b_dev = vp->v_rdev;
1114: else
1115: bp->b_dev = NODEV;
1116:
1117: /*
1118: * Insert onto list for new vnode.
1119: */
1120: bufinsvn(bp, &vp->v_cleanblkhd);
1121: bp->b_objlock = &vp->v_interlock;
1122: }
1123:
1124: /*
1125: * Disassociate a buffer from a vnode.
1126: */
1127: void
1128: brelvp(struct buf *bp)
1129: {
1130: struct vnode *vp = bp->b_vp;
1131:
1132: KASSERT(vp != NULL);
1133: KASSERT(bp->b_objlock == &vp->v_interlock);
1134: KASSERT(mutex_owned(&vp->v_interlock));
1135: KASSERT(mutex_owned(&bufcache_lock));
1136: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1137: KASSERT(!cv_has_waiters(&bp->b_done));
1138:
1139: /*
1140: * Delete from old vnode list, if on one.
1141: */
1142: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1143: bufremvn(bp);
1144:
1145: if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) &&
1146: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1147: vp->v_iflag &= ~VI_WRMAPDIRTY;
1148: vn_syncer_remove_from_worklist(vp);
1149: }
1150:
1151: bp->b_objlock = &buffer_lock;
1152: bp->b_vp = NULL;
1153: holdrelel(vp);
1154: }
1155:
1156: /*
1157: * Reassign a buffer from one vnode list to another.
1158: * The list reassignment must be within the same vnode.
1159: * Used to assign file specific control information
1160: * (indirect blocks) to the list to which they belong.
1161: */
1162: void
1163: reassignbuf(struct buf *bp, struct vnode *vp)
1164: {
1165: struct buflists *listheadp;
1166: int delayx;
1167:
1168: KASSERT(mutex_owned(&bufcache_lock));
1169: KASSERT(bp->b_objlock == &vp->v_interlock);
1170: KASSERT(mutex_owned(&vp->v_interlock));
1171: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1172:
1173: /*
1174: * Delete from old vnode list, if on one.
1175: */
1176: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1177: bufremvn(bp);
1178:
1179: /*
1180: * If dirty, put on list of dirty buffers;
1181: * otherwise insert onto list of clean buffers.
1182: */
1183: if ((bp->b_oflags & BO_DELWRI) == 0) {
1184: listheadp = &vp->v_cleanblkhd;
1185: if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
1186: (vp->v_iflag & VI_ONWORKLST) &&
1187: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1188: vp->v_iflag &= ~VI_WRMAPDIRTY;
1189: vn_syncer_remove_from_worklist(vp);
1190: }
1191: } else {
1192: listheadp = &vp->v_dirtyblkhd;
1193: if ((vp->v_iflag & VI_ONWORKLST) == 0) {
1194: switch (vp->v_type) {
1195: case VDIR:
1196: delayx = dirdelay;
1197: break;
1198: case VBLK:
1199: if (vp->v_specmountpoint != NULL) {
1200: delayx = metadelay;
1201: break;
1202: }
1203: /* fall through */
1204: default:
1205: delayx = filedelay;
1206: break;
1207: }
1208: if (!vp->v_mount ||
1209: (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1210: vn_syncer_add_to_worklist(vp, delayx);
1211: }
1212: }
1213: bufinsvn(bp, listheadp);
1214: }
1215:
1216: /*
1.29 cgd 1217: * Create a vnode for a device.
1218: * Used by bdevvp (block device) for root file system etc.,
1219: * and by cdevvp (character device) for console and kernfs.
1220: */
1.260 yamt 1221: static int
1.309 ad 1222: getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
1.29 cgd 1223: {
1.309 ad 1224: vnode_t *vp;
1225: vnode_t *nvp;
1.29 cgd 1226: int error;
1227:
1.80 fvdl 1228: if (dev == NODEV) {
1.302 ad 1229: *vpp = NULL;
1.29 cgd 1230: return (0);
1.80 fvdl 1231: }
1.50 christos 1232: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 1233: if (error) {
1.302 ad 1234: *vpp = NULL;
1.29 cgd 1235: return (error);
1236: }
1237: vp = nvp;
1238: vp->v_type = type;
1.309 ad 1239: vp->v_vflag |= VV_MPSAFE;
1.297 pooka 1240: uvm_vnp_setsize(vp, 0);
1.318 ad 1241: spec_node_init(vp, dev);
1.29 cgd 1242: *vpp = vp;
1243: return (0);
1244: }
1245:
1246: /*
1.349 ad 1247: * Try to gain a reference to a vnode, without acquiring its interlock.
1248: * The caller must hold a lock that will prevent the vnode from being
1249: * recycled or freed.
1250: */
1251: bool
1252: vtryget(vnode_t *vp)
1253: {
1254: u_int use, next;
1255:
1256: /*
1257: * If the vnode is being freed, don't make life any harder
1258: * for vclean() by adding another reference without waiting.
1259: * This is not strictly necessary, but we'll do it anyway.
1260: */
1261: if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) {
1262: return false;
1263: }
1264: for (use = vp->v_usecount;; use = next) {
1.379 yamt 1265: if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) {
1.349 ad 1266: /* Need interlock held if first reference. */
1267: return false;
1268: }
1269: next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
1270: if (__predict_true(next == use)) {
1271: return true;
1272: }
1273: }
1274: }
1275:
1276: /*
1.29 cgd 1277: * Grab a particular vnode from the free list, increment its
1.83 fvdl 1278: * reference count and lock it. If the vnode lock bit is set the
1279: * vnode is being eliminated in vgone. In that case, we can not
1280: * grab the vnode, so the process is awakened when the transition is
1281: * completed, and an error returned to indicate that the vnode is no
1282: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 1283: */
1.30 mycroft 1284: int
1.309 ad 1285: vget(vnode_t *vp, int flags)
1.29 cgd 1286: {
1.175 perseant 1287: int error;
1.29 cgd 1288:
1.309 ad 1289: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1290:
1291: if ((flags & LK_INTERLOCK) == 0)
1292: mutex_enter(&vp->v_interlock);
1293:
1294: /*
1295: * Before adding a reference, we must remove the vnode
1296: * from its freelist.
1297: */
1298: if (vp->v_usecount == 0) {
1.350 ad 1299: vremfree(vp);
1.346 ad 1300: vp->v_usecount = 1;
1301: } else {
1302: atomic_inc_uint(&vp->v_usecount);
1.309 ad 1303: }
1304:
1.30 mycroft 1305: /*
1306: * If the vnode is in the process of being cleaned out for
1307: * another use, we wait for the cleaning to finish and then
1.312 ad 1308: * return failure. Cleaning is determined by checking if
1309: * the VI_XLOCK or VI_FREEING flags are set.
1.80 fvdl 1310: */
1.312 ad 1311: if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
1.313 ad 1312: if ((flags & LK_NOWAIT) != 0) {
1.324 pooka 1313: vrelel(vp, 0);
1.142 chs 1314: return EBUSY;
1315: }
1.312 ad 1316: vwait(vp, VI_XLOCK | VI_FREEING);
1.324 pooka 1317: vrelel(vp, 0);
1.313 ad 1318: return ENOENT;
1.29 cgd 1319: }
1.386 bouyer 1320:
1321: if ((vp->v_iflag & VI_INACTNOW) != 0) {
1322: /*
1323: * if it's being desactived, wait for it to complete.
1324: * Make sure to not return a clean vnode.
1325: */
1326: if ((flags & LK_NOWAIT) != 0) {
1327: vrelel(vp, 0);
1328: return EBUSY;
1329: }
1330: vwait(vp, VI_INACTNOW);
1331: if ((vp->v_iflag & VI_CLEAN) != 0) {
1332: vrelel(vp, 0);
1333: return ENOENT;
1334: }
1335: }
1.80 fvdl 1336: if (flags & LK_TYPE_MASK) {
1.313 ad 1337: error = vn_lock(vp, flags | LK_INTERLOCK);
1338: if (error != 0) {
1.257 yamt 1339: vrele(vp);
1.113 fvdl 1340: }
1.313 ad 1341: return error;
1.80 fvdl 1342: }
1.309 ad 1343: mutex_exit(&vp->v_interlock);
1.313 ad 1344: return 0;
1.29 cgd 1345: }
1346:
1347: /*
1348: * vput(), just unlock and vrele()
1349: */
1350: void
1.309 ad 1351: vput(vnode_t *vp)
1.29 cgd 1352: {
1.30 mycroft 1353:
1.309 ad 1354: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1355:
1356: VOP_UNLOCK(vp, 0);
1357: vrele(vp);
1.29 cgd 1358: }
1359:
1360: /*
1.346 ad 1361: * Try to drop reference on a vnode. Abort if we are releasing the
1.359 ad 1362: * last reference. Note: this _must_ succeed if not the last reference.
1.346 ad 1363: */
1364: static inline bool
1365: vtryrele(vnode_t *vp)
1366: {
1367: u_int use, next;
1368:
1369: for (use = vp->v_usecount;; use = next) {
1.379 yamt 1370: if (use == 1) {
1.346 ad 1371: return false;
1372: }
1.379 yamt 1373: KASSERT((use & VC_MASK) > 1);
1.346 ad 1374: next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
1375: if (__predict_true(next == use)) {
1376: return true;
1377: }
1378: }
1379: }
1380:
1381: /*
1.309 ad 1382: * Vnode release. If reference count drops to zero, call inactive
1383: * routine and either return to freelist or free to the pool.
1.29 cgd 1384: */
1.309 ad 1385: void
1.324 pooka 1386: vrelel(vnode_t *vp, int flags)
1.29 cgd 1387: {
1.309 ad 1388: bool recycle, defer;
1389: int error;
1390:
1391: KASSERT(mutex_owned(&vp->v_interlock));
1392: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.315 ad 1393: KASSERT(vp->v_freelisthd == NULL);
1.29 cgd 1394:
1.359 ad 1395: if (__predict_false(vp->v_op == dead_vnodeop_p &&
1396: (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
1.309 ad 1397: vpanic(vp, "dead but not clean");
1398: }
1399:
1400: /*
1401: * If not the last reference, just drop the reference count
1402: * and unlock.
1403: */
1.346 ad 1404: if (vtryrele(vp)) {
1.309 ad 1405: vp->v_iflag |= VI_INACTREDO;
1406: mutex_exit(&vp->v_interlock);
1.29 cgd 1407: return;
1.80 fvdl 1408: }
1.309 ad 1409: if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
1.359 ad 1410: vpanic(vp, "vrelel: bad ref count");
1.29 cgd 1411: }
1.309 ad 1412:
1.359 ad 1413: KASSERT((vp->v_iflag & VI_XLOCK) == 0);
1414:
1.30 mycroft 1415: /*
1.309 ad 1416: * If not clean, deactivate the vnode, but preserve
1417: * our reference across the call to VOP_INACTIVE().
1.30 mycroft 1418: */
1.309 ad 1419: retry:
1420: if ((vp->v_iflag & VI_CLEAN) == 0) {
1421: recycle = false;
1.346 ad 1422: vp->v_iflag |= VI_INACTNOW;
1423:
1.309 ad 1424: /*
1425: * XXX This ugly block can be largely eliminated if
1426: * locking is pushed down into the file systems.
1427: */
1428: if (curlwp == uvm.pagedaemon_lwp) {
1429: /* The pagedaemon can't wait around; defer. */
1430: defer = true;
1431: } else if (curlwp == vrele_lwp) {
1432: /* We have to try harder. */
1433: vp->v_iflag &= ~VI_INACTREDO;
1434: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1435: LK_RETRY);
1436: if (error != 0) {
1437: /* XXX */
1438: vpanic(vp, "vrele: unable to lock %p");
1439: }
1440: defer = false;
1441: } else if ((vp->v_iflag & VI_LAYER) != 0) {
1442: /*
1443: * Acquiring the stack's lock in vclean() even
1444: * for an honest vput/vrele is dangerous because
1445: * our caller may hold other vnode locks; defer.
1446: */
1447: defer = true;
1448: } else {
1449: /* If we can't acquire the lock, then defer. */
1450: vp->v_iflag &= ~VI_INACTREDO;
1451: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1452: LK_NOWAIT);
1453: if (error != 0) {
1454: defer = true;
1455: mutex_enter(&vp->v_interlock);
1456: } else {
1457: defer = false;
1458: }
1459: }
1460:
1461: if (defer) {
1462: /*
1463: * Defer reclaim to the kthread; it's not safe to
1464: * clean it here. We donate it our last reference.
1465: */
1466: KASSERT(mutex_owned(&vp->v_interlock));
1467: KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
1.346 ad 1468: vp->v_iflag &= ~VI_INACTNOW;
1.309 ad 1469: vp->v_iflag |= VI_INACTPEND;
1470: mutex_enter(&vrele_lock);
1471: TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
1472: if (++vrele_pending > (desiredvnodes >> 8))
1473: cv_signal(&vrele_cv);
1474: mutex_exit(&vrele_lock);
1.386 bouyer 1475: cv_broadcast(&vp->v_cv);
1.309 ad 1476: mutex_exit(&vp->v_interlock);
1477: return;
1478: }
1479:
1.318 ad 1480: #ifdef DIAGNOSTIC
1.321 ad 1481: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1482: vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1.318 ad 1483: vprint("vrelel: missing VOP_CLOSE()", vp);
1484: }
1485: #endif
1486:
1.309 ad 1487: /*
1.312 ad 1488: * The vnode can gain another reference while being
1489: * deactivated. If VOP_INACTIVE() indicates that
1490: * the described file has been deleted, then recycle
1491: * the vnode irrespective of additional references.
1492: * Another thread may be waiting to re-use the on-disk
1493: * inode.
1494: *
1495: * Note that VOP_INACTIVE() will drop the vnode lock.
1.309 ad 1496: */
1497: VOP_INACTIVE(vp, &recycle);
1498: mutex_enter(&vp->v_interlock);
1.346 ad 1499: vp->v_iflag &= ~VI_INACTNOW;
1.386 bouyer 1500: cv_broadcast(&vp->v_cv);
1.312 ad 1501: if (!recycle) {
1.346 ad 1502: if (vtryrele(vp)) {
1.312 ad 1503: mutex_exit(&vp->v_interlock);
1504: return;
1505: }
1.309 ad 1506:
1.312 ad 1507: /*
1508: * If we grew another reference while
1509: * VOP_INACTIVE() was underway, retry.
1510: */
1511: if ((vp->v_iflag & VI_INACTREDO) != 0) {
1512: goto retry;
1513: }
1.309 ad 1514: }
1515:
1516: /* Take care of space accounting. */
1517: if (vp->v_iflag & VI_EXECMAP) {
1518: atomic_add_int(&uvmexp.execpages,
1519: -vp->v_uobj.uo_npages);
1520: atomic_add_int(&uvmexp.filepages,
1521: vp->v_uobj.uo_npages);
1522: }
1.346 ad 1523: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
1.309 ad 1524: vp->v_vflag &= ~VV_MAPPED;
1525:
1526: /*
1527: * Recycle the vnode if the file is now unused (unlinked),
1528: * otherwise just free it.
1529: */
1530: if (recycle) {
1531: vclean(vp, DOCLOSE);
1532: }
1533: KASSERT(vp->v_usecount > 0);
1.298 pooka 1534: }
1.309 ad 1535:
1.346 ad 1536: if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
1.309 ad 1537: /* Gained another reference while being reclaimed. */
1538: mutex_exit(&vp->v_interlock);
1539: return;
1.147 chs 1540: }
1.298 pooka 1541:
1.309 ad 1542: if ((vp->v_iflag & VI_CLEAN) != 0) {
1543: /*
1544: * It's clean so destroy it. It isn't referenced
1545: * anywhere since it has been reclaimed.
1546: */
1547: KASSERT(vp->v_holdcnt == 0);
1548: KASSERT(vp->v_writecount == 0);
1549: mutex_exit(&vp->v_interlock);
1550: insmntque(vp, NULL);
1.318 ad 1551: if (vp->v_type == VBLK || vp->v_type == VCHR) {
1552: spec_node_destroy(vp);
1553: }
1.310 pooka 1554: vnfree(vp);
1.298 pooka 1555: } else {
1.309 ad 1556: /*
1557: * Otherwise, put it back onto the freelist. It
1558: * can't be destroyed while still associated with
1559: * a file system.
1560: */
1561: mutex_enter(&vnode_free_list_lock);
1562: if (vp->v_holdcnt > 0) {
1563: vp->v_freelisthd = &vnode_hold_list;
1564: } else {
1565: vp->v_freelisthd = &vnode_free_list;
1566: }
1567: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1568: mutex_exit(&vnode_free_list_lock);
1569: mutex_exit(&vp->v_interlock);
1.298 pooka 1570: }
1571: }
1572:
1573: void
1.309 ad 1574: vrele(vnode_t *vp)
1.298 pooka 1575: {
1576:
1.309 ad 1577: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1578:
1.346 ad 1579: if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
1580: return;
1581: }
1.309 ad 1582: mutex_enter(&vp->v_interlock);
1.324 pooka 1583: vrelel(vp, 0);
1.298 pooka 1584: }
1585:
1.309 ad 1586: static void
1587: vrele_thread(void *cookie)
1.298 pooka 1588: {
1.309 ad 1589: vnode_t *vp;
1.298 pooka 1590:
1.309 ad 1591: for (;;) {
1592: mutex_enter(&vrele_lock);
1593: while (TAILQ_EMPTY(&vrele_list)) {
1.351 ad 1594: vrele_gen++;
1595: cv_broadcast(&vrele_cv);
1.309 ad 1596: cv_timedwait(&vrele_cv, &vrele_lock, hz);
1597: }
1598: vp = TAILQ_FIRST(&vrele_list);
1599: TAILQ_REMOVE(&vrele_list, vp, v_freelist);
1600: vrele_pending--;
1601: mutex_exit(&vrele_lock);
1602:
1603: /*
1604: * If not the last reference, then ignore the vnode
1605: * and look for more work.
1606: */
1607: mutex_enter(&vp->v_interlock);
1608: KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
1609: vp->v_iflag &= ~VI_INACTPEND;
1.324 pooka 1610: vrelel(vp, 0);
1.309 ad 1611: }
1.29 cgd 1612: }
1613:
1614: /*
1615: * Page or buffer structure gets a reference.
1.258 chs 1616: * Called with v_interlock held.
1.29 cgd 1617: */
1.30 mycroft 1618: void
1.309 ad 1619: vholdl(vnode_t *vp)
1.29 cgd 1620: {
1621:
1.309 ad 1622: KASSERT(mutex_owned(&vp->v_interlock));
1623: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1624:
1625: if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
1626: mutex_enter(&vnode_free_list_lock);
1627: KASSERT(vp->v_freelisthd == &vnode_free_list);
1628: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1629: vp->v_freelisthd = &vnode_hold_list;
1630: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1631: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1632: }
1.29 cgd 1633: }
1634:
1635: /*
1636: * Page or buffer structure frees a reference.
1.258 chs 1637: * Called with v_interlock held.
1.29 cgd 1638: */
1.30 mycroft 1639: void
1.309 ad 1640: holdrelel(vnode_t *vp)
1.29 cgd 1641: {
1642:
1.309 ad 1643: KASSERT(mutex_owned(&vp->v_interlock));
1644: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.142 chs 1645:
1.309 ad 1646: if (vp->v_holdcnt <= 0) {
1647: vpanic(vp, "holdrelel: holdcnt vp %p");
1648: }
1.142 chs 1649:
1.309 ad 1650: vp->v_holdcnt--;
1651: if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1652: mutex_enter(&vnode_free_list_lock);
1653: KASSERT(vp->v_freelisthd == &vnode_hold_list);
1654: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1655: vp->v_freelisthd = &vnode_free_list;
1656: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1657: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1658: }
1.81 ross 1659: }
1660:
1661: /*
1.309 ad 1662: * Vnode reference, where a reference is already held by some other
1663: * object (for example, a file structure).
1.81 ross 1664: */
1665: void
1.309 ad 1666: vref(vnode_t *vp)
1.81 ross 1667: {
1668:
1.309 ad 1669: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.346 ad 1670: KASSERT(vp->v_usecount != 0);
1.309 ad 1671:
1.346 ad 1672: atomic_inc_uint(&vp->v_usecount);
1.29 cgd 1673: }
1674:
1675: /*
1676: * Remove any vnodes in the vnode table belonging to mount point mp.
1677: *
1.183 yamt 1678: * If FORCECLOSE is not specified, there should not be any active ones,
1.29 cgd 1679: * return error if any are found (nb: this is a user error, not a
1.183 yamt 1680: * system error). If FORCECLOSE is specified, detach any active vnodes
1.29 cgd 1681: * that are found.
1.183 yamt 1682: *
1683: * If WRITECLOSE is set, only flush out regular file vnodes open for
1684: * writing.
1685: *
1686: * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29 cgd 1687: */
1.30 mycroft 1688: #ifdef DEBUG
1689: int busyprt = 0; /* print out busy vnodes */
1690: struct ctldebug debug1 = { "busyprt", &busyprt };
1691: #endif
1.29 cgd 1692:
1.334 ad 1693: static vnode_t *
1694: vflushnext(vnode_t *mvp, int *when)
1695: {
1696:
1697: if (hardclock_ticks > *when) {
1698: mutex_exit(&mntvnode_lock);
1699: yield();
1700: mutex_enter(&mntvnode_lock);
1701: *when = hardclock_ticks + hz / 10;
1702: }
1703:
1704: return vunmark(mvp);
1705: }
1706:
1.50 christos 1707: int
1.309 ad 1708: vflush(struct mount *mp, vnode_t *skipvp, int flags)
1.29 cgd 1709: {
1.309 ad 1710: vnode_t *vp, *mvp;
1.351 ad 1711: int busy = 0, when = 0, gen;
1712:
1713: /*
1714: * First, flush out any vnode references from vrele_list.
1715: */
1716: mutex_enter(&vrele_lock);
1717: gen = vrele_gen;
1.352 pooka 1718: while (vrele_pending && gen == vrele_gen) {
1.351 ad 1719: cv_broadcast(&vrele_cv);
1720: cv_wait(&vrele_cv, &vrele_lock);
1.352 pooka 1721: }
1.351 ad 1722: mutex_exit(&vrele_lock);
1.29 cgd 1723:
1.309 ad 1724: /* Allocate a marker vnode. */
1.310 pooka 1725: if ((mvp = vnalloc(mp)) == NULL)
1.309 ad 1726: return (ENOMEM);
1727:
1.273 reinoud 1728: /*
1729: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1730: * and vclean() are called
1731: */
1.351 ad 1732: mutex_enter(&mntvnode_lock);
1.334 ad 1733: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
1734: vp = vflushnext(mvp, &when)) {
1.309 ad 1735: vmark(mvp, vp);
1736: if (vp->v_mount != mp || vismarker(vp))
1737: continue;
1.29 cgd 1738: /*
1739: * Skip over a selected vnode.
1740: */
1741: if (vp == skipvp)
1742: continue;
1.309 ad 1743: mutex_enter(&vp->v_interlock);
1.29 cgd 1744: /*
1.315 ad 1745: * Ignore clean but still referenced vnodes.
1746: */
1747: if ((vp->v_iflag & VI_CLEAN) != 0) {
1748: mutex_exit(&vp->v_interlock);
1749: continue;
1750: }
1751: /*
1.309 ad 1752: * Skip over a vnodes marked VSYSTEM.
1.29 cgd 1753: */
1.302 ad 1754: if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1.309 ad 1755: mutex_exit(&vp->v_interlock);
1.29 cgd 1756: continue;
1.80 fvdl 1757: }
1.29 cgd 1758: /*
1.30 mycroft 1759: * If WRITECLOSE is set, only flush out regular file
1760: * vnodes open for writing.
1761: */
1762: if ((flags & WRITECLOSE) &&
1.92 thorpej 1763: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1.309 ad 1764: mutex_exit(&vp->v_interlock);
1.30 mycroft 1765: continue;
1.92 thorpej 1766: }
1.30 mycroft 1767: /*
1.29 cgd 1768: * With v_usecount == 0, all we need to do is clear
1769: * out the vnode data structures and we are done.
1770: */
1771: if (vp->v_usecount == 0) {
1.309 ad 1772: mutex_exit(&mntvnode_lock);
1773: vremfree(vp);
1.350 ad 1774: vp->v_usecount = 1;
1.309 ad 1775: vclean(vp, DOCLOSE);
1.324 pooka 1776: vrelel(vp, 0);
1.309 ad 1777: mutex_enter(&mntvnode_lock);
1.29 cgd 1778: continue;
1779: }
1780: /*
1.30 mycroft 1781: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 1782: * For block or character devices, revert to an
1.318 ad 1783: * anonymous device. For all other files, just
1784: * kill them.
1.29 cgd 1785: */
1786: if (flags & FORCECLOSE) {
1.309 ad 1787: mutex_exit(&mntvnode_lock);
1.346 ad 1788: atomic_inc_uint(&vp->v_usecount);
1.29 cgd 1789: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.309 ad 1790: vclean(vp, DOCLOSE);
1.324 pooka 1791: vrelel(vp, 0);
1.29 cgd 1792: } else {
1.309 ad 1793: vclean(vp, 0);
1.318 ad 1794: vp->v_op = spec_vnodeop_p; /* XXXSMP */
1.320 ad 1795: mutex_exit(&vp->v_interlock);
1796: /*
1797: * The vnode isn't clean, but still resides
1798: * on the mount list. Remove it. XXX This
1799: * is a bit dodgy.
1800: */
1801: insmntque(vp, NULL);
1802: vrele(vp);
1.29 cgd 1803: }
1.309 ad 1804: mutex_enter(&mntvnode_lock);
1.29 cgd 1805: continue;
1806: }
1.30 mycroft 1807: #ifdef DEBUG
1.29 cgd 1808: if (busyprt)
1809: vprint("vflush: busy vnode", vp);
1.30 mycroft 1810: #endif
1.309 ad 1811: mutex_exit(&vp->v_interlock);
1.29 cgd 1812: busy++;
1813: }
1.309 ad 1814: mutex_exit(&mntvnode_lock);
1.310 pooka 1815: vnfree(mvp);
1.29 cgd 1816: if (busy)
1817: return (EBUSY);
1818: return (0);
1819: }
1820:
1821: /*
1822: * Disassociate the underlying file system from a vnode.
1.309 ad 1823: *
1824: * Must be called with the interlock held, and will return with it held.
1.29 cgd 1825: */
1.309 ad 1826: void
1827: vclean(vnode_t *vp, int flags)
1.29 cgd 1828: {
1.309 ad 1829: lwp_t *l = curlwp;
1830: bool recycle, active;
1.318 ad 1831: int error;
1.29 cgd 1832:
1.309 ad 1833: KASSERT(mutex_owned(&vp->v_interlock));
1834: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1835: KASSERT(vp->v_usecount != 0);
1.166 chs 1836:
1.309 ad 1837: /* If cleaning is already in progress wait until done and return. */
1838: if (vp->v_iflag & VI_XLOCK) {
1839: vwait(vp, VI_XLOCK);
1840: return;
1841: }
1.166 chs 1842:
1.309 ad 1843: /* If already clean, nothing to do. */
1844: if ((vp->v_iflag & VI_CLEAN) != 0) {
1845: return;
1.112 mycroft 1846: }
1.87 pk 1847:
1.29 cgd 1848: /*
1.309 ad 1849: * Prevent the vnode from being recycled or brought into use
1850: * while we clean it out.
1.29 cgd 1851: */
1.302 ad 1852: vp->v_iflag |= VI_XLOCK;
1853: if (vp->v_iflag & VI_EXECMAP) {
1.307 ad 1854: atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1855: atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1.147 chs 1856: }
1.302 ad 1857: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1.309 ad 1858: active = (vp->v_usecount > 1);
1.142 chs 1859:
1.309 ad 1860: /* XXXAD should not lock vnode under layer */
1861: VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1.80 fvdl 1862:
1.98 wrstuden 1863: /*
1.142 chs 1864: * Clean out any cached data associated with the vnode.
1.318 ad 1865: * If purging an active vnode, it must be closed and
1866: * deactivated before being reclaimed. Note that the
1867: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1868: */
1.166 chs 1869: if (flags & DOCLOSE) {
1.256 christos 1870: error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.355 simonb 1871: if (error != 0) {
1872: /* XXX, fix vn_start_write's grab of mp and use that. */
1873:
1874: if (wapbl_vphaswapbl(vp))
1875: WAPBL_DISCARD(wapbl_vptomp(vp));
1.256 christos 1876: error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.355 simonb 1877: }
1.211 dbj 1878: KASSERT(error == 0);
1.302 ad 1879: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.318 ad 1880: if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1881: spec_node_revoke(vp);
1.231 mycroft 1882: }
1.166 chs 1883: }
1.29 cgd 1884: if (active) {
1.309 ad 1885: VOP_INACTIVE(vp, &recycle);
1.80 fvdl 1886: } else {
1887: /*
1888: * Any other processes trying to obtain this lock must first
1.302 ad 1889: * wait for VI_XLOCK to clear, then call the new lock operation.
1.80 fvdl 1890: */
1891: VOP_UNLOCK(vp, 0);
1.29 cgd 1892: }
1.142 chs 1893:
1.309 ad 1894: /* Disassociate the underlying file system from the vnode. */
1895: if (VOP_RECLAIM(vp)) {
1896: vpanic(vp, "vclean: cannot reclaim");
1.87 pk 1897: }
1.30 mycroft 1898:
1.169 chs 1899: KASSERT(vp->v_uobj.uo_npages == 0);
1.255 yamt 1900: if (vp->v_type == VREG && vp->v_ractx != NULL) {
1901: uvm_ra_freectx(vp->v_ractx);
1902: vp->v_ractx = NULL;
1903: }
1.80 fvdl 1904: cache_purge(vp);
1905:
1.309 ad 1906: /* Done with purge, notify sleepers of the grim news. */
1.360 ad 1907: mutex_enter(&vp->v_interlock);
1.30 mycroft 1908: vp->v_op = dead_vnodeop_p;
1909: vp->v_tag = VT_NON;
1.309 ad 1910: vp->v_vnlock = &vp->v_lock;
1.332 ad 1911: KNOTE(&vp->v_klist, NOTE_REVOKE);
1.312 ad 1912: vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1.304 ad 1913: vp->v_vflag &= ~VV_LOCKSWORK;
1.319 ad 1914: if ((flags & DOCLOSE) != 0) {
1.318 ad 1915: vp->v_iflag |= VI_CLEAN;
1916: }
1.309 ad 1917: cv_broadcast(&vp->v_cv);
1918:
1919: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.29 cgd 1920: }
1921:
1922: /*
1.80 fvdl 1923: * Recycle an unused vnode to the front of the free list.
1924: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1925: */
1.80 fvdl 1926: int
1.309 ad 1927: vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1.217 junyoung 1928: {
1929:
1.309 ad 1930: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1931:
1932: mutex_enter(&vp->v_interlock);
1933: if (vp->v_usecount != 0) {
1934: mutex_exit(&vp->v_interlock);
1935: return (0);
1.29 cgd 1936: }
1.309 ad 1937: if (inter_lkp)
1938: mutex_exit(inter_lkp);
1939: vremfree(vp);
1.350 ad 1940: vp->v_usecount = 1;
1.309 ad 1941: vclean(vp, DOCLOSE);
1.324 pooka 1942: vrelel(vp, 0);
1.309 ad 1943: return (1);
1.29 cgd 1944: }
1945:
1946: /*
1.309 ad 1947: * Eliminate all activity associated with a vnode in preparation for
1948: * reuse. Drops a reference from the vnode.
1.29 cgd 1949: */
1950: void
1.309 ad 1951: vgone(vnode_t *vp)
1.80 fvdl 1952: {
1.166 chs 1953:
1.309 ad 1954: mutex_enter(&vp->v_interlock);
1955: vclean(vp, DOCLOSE);
1.324 pooka 1956: vrelel(vp, 0);
1.29 cgd 1957: }
1958:
1959: /*
1960: * Lookup a vnode by device number.
1961: */
1.50 christos 1962: int
1.309 ad 1963: vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1.29 cgd 1964: {
1.309 ad 1965: vnode_t *vp;
1.80 fvdl 1966: int rc = 0;
1.29 cgd 1967:
1.363 pooka 1968: mutex_enter(&device_lock);
1.318 ad 1969: for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1.29 cgd 1970: if (dev != vp->v_rdev || type != vp->v_type)
1971: continue;
1972: *vpp = vp;
1.80 fvdl 1973: rc = 1;
1974: break;
1.29 cgd 1975: }
1.363 pooka 1976: mutex_exit(&device_lock);
1.80 fvdl 1977: return (rc);
1.96 thorpej 1978: }
1979:
1980: /*
1981: * Revoke all the vnodes corresponding to the specified minor number
1982: * range (endpoints inclusive) of the specified major.
1983: */
1984: void
1.247 thorpej 1985: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 1986: {
1.316 ad 1987: vnode_t *vp, **vpp;
1988: dev_t dev;
1.96 thorpej 1989: int mn;
1990:
1.274 mrg 1991: vp = NULL; /* XXX gcc */
1992:
1.363 pooka 1993: mutex_enter(&device_lock);
1.316 ad 1994: for (mn = minl; mn <= minh; mn++) {
1995: dev = makedev(maj, mn);
1.318 ad 1996: vpp = &specfs_hash[SPECHASH(dev)];
1.316 ad 1997: for (vp = *vpp; vp != NULL;) {
1998: mutex_enter(&vp->v_interlock);
1999: if ((vp->v_iflag & VI_CLEAN) != 0 ||
2000: dev != vp->v_rdev || type != vp->v_type) {
2001: mutex_exit(&vp->v_interlock);
2002: vp = vp->v_specnext;
2003: continue;
2004: }
1.363 pooka 2005: mutex_exit(&device_lock);
1.316 ad 2006: if (vget(vp, LK_INTERLOCK) == 0) {
2007: VOP_REVOKE(vp, REVOKEALL);
2008: vrele(vp);
2009: }
1.363 pooka 2010: mutex_enter(&device_lock);
1.316 ad 2011: vp = *vpp;
2012: }
2013: }
1.363 pooka 2014: mutex_exit(&device_lock);
1.29 cgd 2015: }
2016:
2017: /*
2018: * Calculate the total number of references to a special device.
2019: */
1.30 mycroft 2020: int
1.309 ad 2021: vcount(vnode_t *vp)
1.29 cgd 2022: {
2023: int count;
2024:
1.363 pooka 2025: mutex_enter(&device_lock);
1.309 ad 2026: mutex_enter(&vp->v_interlock);
1.318 ad 2027: if (vp->v_specnode == NULL) {
1.309 ad 2028: count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
2029: mutex_exit(&vp->v_interlock);
1.363 pooka 2030: mutex_exit(&device_lock);
1.309 ad 2031: return (count);
2032: }
2033: mutex_exit(&vp->v_interlock);
1.318 ad 2034: count = vp->v_specnode->sn_dev->sd_opencnt;
1.363 pooka 2035: mutex_exit(&device_lock);
1.29 cgd 2036: return (count);
2037: }
2038:
1.101 mrg 2039: /*
1.316 ad 2040: * Eliminate all activity associated with the requested vnode
2041: * and with all vnodes aliased to the requested vnode.
2042: */
2043: void
2044: vrevoke(vnode_t *vp)
2045: {
2046: vnode_t *vq, **vpp;
2047: enum vtype type;
2048: dev_t dev;
2049:
2050: KASSERT(vp->v_usecount > 0);
2051:
2052: mutex_enter(&vp->v_interlock);
2053: if ((vp->v_iflag & VI_CLEAN) != 0) {
2054: mutex_exit(&vp->v_interlock);
2055: return;
1.368 enami 2056: } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
2057: atomic_inc_uint(&vp->v_usecount);
2058: vclean(vp, DOCLOSE);
2059: vrelel(vp, 0);
2060: return;
1.316 ad 2061: } else {
2062: dev = vp->v_rdev;
2063: type = vp->v_type;
2064: mutex_exit(&vp->v_interlock);
2065: }
2066:
1.318 ad 2067: vpp = &specfs_hash[SPECHASH(dev)];
1.363 pooka 2068: mutex_enter(&device_lock);
1.316 ad 2069: for (vq = *vpp; vq != NULL;) {
1.333 ad 2070: /* If clean or being cleaned, then ignore it. */
2071: mutex_enter(&vq->v_interlock);
2072: if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
1.317 ad 2073: vq->v_rdev != dev || vq->v_type != type) {
1.333 ad 2074: mutex_exit(&vq->v_interlock);
1.316 ad 2075: vq = vq->v_specnext;
2076: continue;
2077: }
1.363 pooka 2078: mutex_exit(&device_lock);
1.350 ad 2079: if (vq->v_usecount == 0) {
1.317 ad 2080: vremfree(vq);
1.350 ad 2081: vq->v_usecount = 1;
2082: } else {
2083: atomic_inc_uint(&vq->v_usecount);
1.316 ad 2084: }
2085: vclean(vq, DOCLOSE);
1.324 pooka 2086: vrelel(vq, 0);
1.363 pooka 2087: mutex_enter(&device_lock);
1.316 ad 2088: vq = *vpp;
2089: }
1.363 pooka 2090: mutex_exit(&device_lock);
1.316 ad 2091: }
2092:
2093: /*
1.220 lukem 2094: * sysctl helper routine to return list of supported fstypes
2095: */
1.358 pooka 2096: int
1.220 lukem 2097: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2098: {
1.291 christos 2099: char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220 lukem 2100: char *where = oldp;
2101: struct vfsops *v;
2102: size_t needed, left, slen;
2103: int error, first;
2104:
2105: if (newp != NULL)
2106: return (EPERM);
2107: if (namelen != 0)
2108: return (EINVAL);
2109:
2110: first = 1;
2111: error = 0;
2112: needed = 0;
2113: left = *oldlenp;
2114:
1.311 ad 2115: sysctl_unlock();
1.302 ad 2116: mutex_enter(&vfs_list_lock);
1.220 lukem 2117: LIST_FOREACH(v, &vfs_list, vfs_list) {
2118: if (where == NULL)
2119: needed += strlen(v->vfs_name) + 1;
2120: else {
1.245 christos 2121: memset(bf, 0, sizeof(bf));
1.220 lukem 2122: if (first) {
1.245 christos 2123: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 2124: first = 0;
2125: } else {
1.245 christos 2126: bf[0] = ' ';
2127: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 2128: }
1.245 christos 2129: bf[sizeof(bf)-1] = '\0';
2130: slen = strlen(bf);
1.220 lukem 2131: if (left < slen + 1)
2132: break;
1.302 ad 2133: v->vfs_refcount++;
2134: mutex_exit(&vfs_list_lock);
1.354 pooka 2135: /* +1 to copy out the trailing NUL byte */
1.245 christos 2136: error = copyout(bf, where, slen + 1);
1.302 ad 2137: mutex_enter(&vfs_list_lock);
2138: v->vfs_refcount--;
1.220 lukem 2139: if (error)
2140: break;
2141: where += slen;
2142: needed += slen;
2143: left -= slen;
2144: }
2145: }
1.302 ad 2146: mutex_exit(&vfs_list_lock);
1.311 ad 2147: sysctl_relock();
1.220 lukem 2148: *oldlenp = needed;
2149: return (error);
2150: }
2151:
1.212 atatat 2152:
1.29 cgd 2153: int kinfo_vdebug = 1;
2154: int kinfo_vgetfailed;
2155: #define KINFO_VNODESLOP 10
2156: /*
2157: * Dump vnode list (via sysctl).
2158: * Copyout address of vnode followed by vnode.
2159: */
2160: /* ARGSUSED */
1.50 christos 2161: int
1.212 atatat 2162: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29 cgd 2163: {
1.212 atatat 2164: char *where = oldp;
2165: size_t *sizep = oldlenp;
1.80 fvdl 2166: struct mount *mp, *nmp;
1.311 ad 2167: vnode_t *vp, *mvp, vbuf;
1.389 enami 2168: char *bp = where;
1.29 cgd 2169: char *ewhere;
2170: int error;
1.212 atatat 2171:
2172: if (namelen != 0)
2173: return (EOPNOTSUPP);
2174: if (newp != NULL)
2175: return (EPERM);
1.29 cgd 2176:
1.309 ad 2177: #define VPTRSZ sizeof(vnode_t *)
2178: #define VNODESZ sizeof(vnode_t)
1.29 cgd 2179: if (where == NULL) {
2180: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2181: return (0);
2182: }
2183: ewhere = where + *sizep;
1.80 fvdl 2184:
1.311 ad 2185: sysctl_unlock();
1.302 ad 2186: mutex_enter(&mountlist_lock);
1.177 matt 2187: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1.389 enami 2188: mp = nmp) {
1.344 ad 2189: if (vfs_busy(mp, &nmp)) {
1.29 cgd 2190: continue;
1.80 fvdl 2191: }
1.309 ad 2192: /* Allocate a marker vnode. */
1.375 elad 2193: mvp = vnalloc(mp);
2194: /* Should never fail for mp != NULL */
2195: KASSERT(mvp != NULL);
1.309 ad 2196: mutex_enter(&mntvnode_lock);
1.389 enami 2197: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp;
2198: vp = vunmark(mvp)) {
1.309 ad 2199: vmark(mvp, vp);
1.29 cgd 2200: /*
2201: * Check that the vp is still associated with
2202: * this filesystem. RACE: could have been
2203: * recycled onto the same filesystem.
2204: */
1.309 ad 2205: if (vp->v_mount != mp || vismarker(vp))
2206: continue;
1.29 cgd 2207: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.309 ad 2208: (void)vunmark(mvp);
2209: mutex_exit(&mntvnode_lock);
1.310 pooka 2210: vnfree(mvp);
1.388 enami 2211: vfs_unbusy(mp, false, NULL);
1.311 ad 2212: sysctl_relock();
1.29 cgd 2213: *sizep = bp - where;
2214: return (ENOMEM);
2215: }
1.311 ad 2216: memcpy(&vbuf, vp, VNODESZ);
1.309 ad 2217: mutex_exit(&mntvnode_lock);
1.367 enami 2218: if ((error = copyout(&vp, bp, VPTRSZ)) ||
1.389 enami 2219: (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1.309 ad 2220: mutex_enter(&mntvnode_lock);
2221: (void)vunmark(mvp);
2222: mutex_exit(&mntvnode_lock);
1.310 pooka 2223: vnfree(mvp);
1.388 enami 2224: vfs_unbusy(mp, false, NULL);
1.311 ad 2225: sysctl_relock();
1.29 cgd 2226: return (error);
1.309 ad 2227: }
1.29 cgd 2228: bp += VPTRSZ + VNODESZ;
1.309 ad 2229: mutex_enter(&mntvnode_lock);
1.29 cgd 2230: }
1.309 ad 2231: mutex_exit(&mntvnode_lock);
1.344 ad 2232: vnfree(mvp);
1.339 ad 2233: vfs_unbusy(mp, false, &nmp);
1.29 cgd 2234: }
1.302 ad 2235: mutex_exit(&mountlist_lock);
1.311 ad 2236: sysctl_relock();
1.29 cgd 2237:
2238: *sizep = bp - where;
2239: return (0);
1.30 mycroft 2240: }
2241:
2242: /*
1.309 ad 2243: * Remove clean vnodes from a mountpoint's vnode list.
2244: */
2245: void
2246: vfs_scrubvnlist(struct mount *mp)
2247: {
2248: vnode_t *vp, *nvp;
2249:
1.327 ad 2250: retry:
1.309 ad 2251: mutex_enter(&mntvnode_lock);
2252: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
2253: nvp = TAILQ_NEXT(vp, v_mntvnodes);
2254: mutex_enter(&vp->v_interlock);
1.315 ad 2255: if ((vp->v_iflag & VI_CLEAN) != 0) {
1.309 ad 2256: TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.315 ad 2257: vp->v_mount = NULL;
1.327 ad 2258: mutex_exit(&mntvnode_lock);
2259: mutex_exit(&vp->v_interlock);
1.344 ad 2260: vfs_destroy(mp);
1.327 ad 2261: goto retry;
1.315 ad 2262: }
1.309 ad 2263: mutex_exit(&vp->v_interlock);
2264: }
2265: mutex_exit(&mntvnode_lock);
2266: }
2267:
2268: /*
1.30 mycroft 2269: * Check to see if a filesystem is mounted on a block device.
2270: */
2271: int
1.309 ad 2272: vfs_mountedon(vnode_t *vp)
1.30 mycroft 2273: {
1.309 ad 2274: vnode_t *vq;
1.80 fvdl 2275: int error = 0;
1.30 mycroft 2276:
1.261 reinoud 2277: if (vp->v_type != VBLK)
2278: return ENOTBLK;
1.113 fvdl 2279: if (vp->v_specmountpoint != NULL)
1.30 mycroft 2280: return (EBUSY);
1.363 pooka 2281: mutex_enter(&device_lock);
1.318 ad 2282: for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
2283: vq = vq->v_specnext) {
2284: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
2285: continue;
2286: if (vq->v_specmountpoint != NULL) {
2287: error = EBUSY;
2288: break;
1.30 mycroft 2289: }
2290: }
1.363 pooka 2291: mutex_exit(&device_lock);
1.80 fvdl 2292: return (error);
1.30 mycroft 2293: }
2294:
1.35 ws 2295: /*
1.39 mycroft 2296: * Unmount all file systems.
2297: * We traverse the list in reverse order under the assumption that doing so
2298: * will avoid needing to worry about dependencies.
2299: */
1.371 dyoung 2300: bool
1.256 christos 2301: vfs_unmountall(struct lwp *l)
1.39 mycroft 2302: {
1.377 dyoung 2303: printf("unmounting file systems...");
2304: return vfs_unmountall1(l, true, true);
2305: }
2306:
1.382 dyoung 2307: static void
2308: vfs_unmount_print(struct mount *mp, const char *pfx)
2309: {
2310: printf("%sunmounted %s on %s type %s\n", pfx,
2311: mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
2312: mp->mnt_stat.f_fstypename);
2313: }
2314:
2315: bool
2316: vfs_unmount_forceone(struct lwp *l)
2317: {
2318: struct mount *mp, *nmp = NULL;
2319: int error;
2320:
2321: CIRCLEQ_FOREACH_REVERSE(mp, &mountlist, mnt_list) {
2322: if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen)
2323: nmp = mp;
2324: }
2325:
2326: if (nmp == NULL)
2327: return false;
2328:
2329: #ifdef DEBUG
2330: printf("\nforcefully unmounting %s (%s)...",
2331: nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname);
2332: #endif
2333: atomic_inc_uint(&nmp->mnt_refcnt);
2334: if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) {
2335: vfs_unmount_print(nmp, "forcefully ");
2336: return true;
2337: } else
2338: atomic_dec_uint(&nmp->mnt_refcnt);
2339:
2340: #ifdef DEBUG
2341: printf("forceful unmount of %s failed with error %d\n",
2342: nmp->mnt_stat.f_mntonname, error);
2343: #endif
2344:
2345: return false;
2346: }
2347:
1.377 dyoung 2348: bool
2349: vfs_unmountall1(struct lwp *l, bool force, bool verbose)
2350: {
1.123 augustss 2351: struct mount *mp, *nmp;
1.382 dyoung 2352: bool any_error = false, progress = false;
1.371 dyoung 2353: int error;
1.39 mycroft 2354:
1.382 dyoung 2355: for (mp = CIRCLEQ_LAST(&mountlist);
2356: mp != (void *)&mountlist;
1.325 dyoung 2357: mp = nmp) {
2358: nmp = CIRCLEQ_PREV(mp, mnt_list);
1.54 jtk 2359: #ifdef DEBUG
1.382 dyoung 2360: printf("\nunmounting %p %s (%s)...",
2361: (void *)mp, mp->mnt_stat.f_mntonname,
2362: mp->mnt_stat.f_mntfromname);
1.54 jtk 2363: #endif
1.344 ad 2364: atomic_inc_uint(&mp->mnt_refcnt);
1.382 dyoung 2365: if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
2366: vfs_unmount_print(mp, "");
1.371 dyoung 2367: progress = true;
1.382 dyoung 2368: } else {
2369: atomic_dec_uint(&mp->mnt_refcnt);
1.377 dyoung 2370: if (verbose) {
2371: printf("unmount of %s failed with error %d\n",
2372: mp->mnt_stat.f_mntonname, error);
2373: }
1.371 dyoung 2374: any_error = true;
1.40 mycroft 2375: }
1.39 mycroft 2376: }
1.377 dyoung 2377: if (verbose)
2378: printf(" done\n");
2379: if (any_error && verbose)
1.57 christos 2380: printf("WARNING: some file systems would not unmount\n");
1.371 dyoung 2381: return progress;
1.40 mycroft 2382: }
2383:
2384: /*
2385: * Sync and unmount file systems before shutting down.
2386: */
2387: void
1.247 thorpej 2388: vfs_shutdown(void)
1.40 mycroft 2389: {
1.265 skrll 2390: struct lwp *l;
1.40 mycroft 2391:
1.265 skrll 2392: /* XXX we're certainly not running in lwp0's context! */
1.377 dyoung 2393: l = (curlwp == NULL) ? &lwp0 : curlwp;
1.185 christos 2394:
1.382 dyoung 2395: vfs_shutdown1(l);
2396: }
2397:
2398: void
2399: vfs_sync_all(struct lwp *l)
2400: {
1.70 cgd 2401: printf("syncing disks... ");
2402:
1.305 pooka 2403: /* remove user processes from run queue */
1.138 bouyer 2404: suspendsched();
1.40 mycroft 2405: (void) spl0();
2406:
1.128 sommerfe 2407: /* avoid coming back this way again if we panic. */
2408: doing_shutdown = 1;
2409:
1.184 thorpej 2410: sys_sync(l, NULL, NULL);
1.40 mycroft 2411:
2412: /* Wait for sync to finish. */
1.213 pk 2413: if (buf_syncwait() != 0) {
1.124 augustss 2414: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2415: Debugger();
2416: #endif
1.57 christos 2417: printf("giving up\n");
1.84 thorpej 2418: return;
1.73 thorpej 2419: } else
1.57 christos 2420: printf("done\n");
1.382 dyoung 2421: }
2422:
2423: static void
2424: vfs_shutdown1(struct lwp *l)
2425: {
2426:
2427: vfs_sync_all(l);
1.73 thorpej 2428:
1.84 thorpej 2429: /*
2430: * If we've panic'd, don't make the situation potentially
2431: * worse by unmounting the file systems.
2432: */
2433: if (panicstr != NULL)
2434: return;
2435:
2436: /* Release inodes held by texts before update. */
1.73 thorpej 2437: #ifdef notdef
1.84 thorpej 2438: vnshutdown();
1.73 thorpej 2439: #endif
1.84 thorpej 2440: /* Unmount file systems. */
1.256 christos 2441: vfs_unmountall(l);
1.58 thorpej 2442: }
2443:
2444: /*
1.384 jmcneill 2445: * Print a list of supported file system types (used by vfs_mountroot)
2446: */
2447: static void
2448: vfs_print_fstypes(void)
2449: {
2450: struct vfsops *v;
2451: int cnt = 0;
2452:
2453: mutex_enter(&vfs_list_lock);
2454: LIST_FOREACH(v, &vfs_list, vfs_list)
2455: ++cnt;
2456: mutex_exit(&vfs_list_lock);
2457:
2458: if (cnt == 0) {
2459: printf("WARNING: No file system modules have been loaded.\n");
2460: return;
2461: }
2462:
2463: printf("Supported file systems:");
2464: mutex_enter(&vfs_list_lock);
2465: LIST_FOREACH(v, &vfs_list, vfs_list) {
2466: printf(" %s", v->vfs_name);
2467: }
2468: mutex_exit(&vfs_list_lock);
2469: printf("\n");
2470: }
2471:
2472: /*
1.58 thorpej 2473: * Mount the root file system. If the operator didn't specify a
2474: * file system to use, try all possible file systems until one
2475: * succeeds.
2476: */
2477: int
1.247 thorpej 2478: vfs_mountroot(void)
1.58 thorpej 2479: {
1.79 thorpej 2480: struct vfsops *v;
1.239 mycroft 2481: int error = ENODEV;
1.58 thorpej 2482:
2483: if (root_device == NULL)
2484: panic("vfs_mountroot: root device unknown");
2485:
1.264 thorpej 2486: switch (device_class(root_device)) {
1.58 thorpej 2487: case DV_IFNET:
2488: if (rootdev != NODEV)
1.173 thorpej 2489: panic("vfs_mountroot: rootdev set for DV_IFNET "
1.365 christos 2490: "(0x%llx -> %llu,%llu)",
2491: (unsigned long long)rootdev,
2492: (unsigned long long)major(rootdev),
2493: (unsigned long long)minor(rootdev));
1.58 thorpej 2494: break;
2495:
2496: case DV_DISK:
2497: if (rootdev == NODEV)
2498: panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239 mycroft 2499: if (bdevvp(rootdev, &rootvp))
2500: panic("vfs_mountroot: can't get vnode for rootdev");
1.306 pooka 2501: error = VOP_OPEN(rootvp, FREAD, FSCRED);
1.239 mycroft 2502: if (error) {
2503: printf("vfs_mountroot: can't open root device\n");
2504: return (error);
2505: }
1.58 thorpej 2506: break;
2507:
1.390 pooka 2508: case DV_VIRTUAL:
2509: break;
2510:
1.58 thorpej 2511: default:
2512: printf("%s: inappropriate for root file system\n",
1.336 cegger 2513: device_xname(root_device));
1.58 thorpej 2514: return (ENODEV);
2515: }
2516:
2517: /*
1.362 pgoyette 2518: * If user specified a root fs type, use it. Make sure the
2519: * specified type exists and has a mount_root()
1.58 thorpej 2520: */
1.362 pgoyette 2521: if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
2522: v = vfs_getopsbyname(rootfstype);
2523: error = EFTYPE;
2524: if (v != NULL) {
2525: if (v->vfs_mountroot != NULL) {
2526: error = (v->vfs_mountroot)();
2527: }
2528: v->vfs_refcount--;
2529: }
1.239 mycroft 2530: goto done;
2531: }
1.58 thorpej 2532:
2533: /*
2534: * Try each file system currently configured into the kernel.
2535: */
1.302 ad 2536: mutex_enter(&vfs_list_lock);
1.220 lukem 2537: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2538: if (v->vfs_mountroot == NULL)
1.58 thorpej 2539: continue;
2540: #ifdef DEBUG
1.197 thorpej 2541: aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 2542: #endif
1.302 ad 2543: v->vfs_refcount++;
2544: mutex_exit(&vfs_list_lock);
1.239 mycroft 2545: error = (*v->vfs_mountroot)();
1.302 ad 2546: mutex_enter(&vfs_list_lock);
2547: v->vfs_refcount--;
1.239 mycroft 2548: if (!error) {
1.197 thorpej 2549: aprint_normal("root file system type: %s\n",
2550: v->vfs_name);
1.79 thorpej 2551: break;
1.58 thorpej 2552: }
2553: }
1.302 ad 2554: mutex_exit(&vfs_list_lock);
1.58 thorpej 2555:
1.79 thorpej 2556: if (v == NULL) {
1.384 jmcneill 2557: vfs_print_fstypes();
1.336 cegger 2558: printf("no file system for %s", device_xname(root_device));
1.264 thorpej 2559: if (device_class(root_device) == DV_DISK)
1.365 christos 2560: printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1.79 thorpej 2561: printf("\n");
1.239 mycroft 2562: error = EFTYPE;
1.79 thorpej 2563: }
1.239 mycroft 2564:
2565: done:
1.264 thorpej 2566: if (error && device_class(root_device) == DV_DISK) {
1.306 pooka 2567: VOP_CLOSE(rootvp, FREAD, FSCRED);
1.239 mycroft 2568: vrele(rootvp);
2569: }
1.391 ! pooka 2570: if (error == 0) {
! 2571: extern struct cwdinfo cwdi0;
! 2572:
! 2573: CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS;
! 2574: CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++;
! 2575:
! 2576: /*
! 2577: * Get the vnode for '/'. Set cwdi0.cwdi_cdir to
! 2578: * reference it.
! 2579: */
! 2580: error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode);
! 2581: if (error)
! 2582: panic("cannot find root vnode, error=%d", error);
! 2583: cwdi0.cwdi_cdir = rootvnode;
! 2584: VREF(cwdi0.cwdi_cdir);
! 2585: VOP_UNLOCK(rootvnode, 0);
! 2586: cwdi0.cwdi_rdir = NULL;
! 2587:
! 2588: /*
! 2589: * Now that root is mounted, we can fixup initproc's CWD
! 2590: * info. All other processes are kthreads, which merely
! 2591: * share proc0's CWD info.
! 2592: */
! 2593: initproc->p_cwdi->cwdi_cdir = rootvnode;
! 2594: VREF(initproc->p_cwdi->cwdi_cdir);
! 2595: initproc->p_cwdi->cwdi_rdir = NULL;
! 2596: }
1.239 mycroft 2597: return (error);
1.58 thorpej 2598: }
1.326 ad 2599:
2600: /*
1.353 pooka 2601: * Get a new unique fsid
1.326 ad 2602: */
1.353 pooka 2603: void
2604: vfs_getnewfsid(struct mount *mp)
1.326 ad 2605: {
1.353 pooka 2606: static u_short xxxfs_mntid;
2607: fsid_t tfsid;
2608: int mtype;
1.326 ad 2609:
1.353 pooka 2610: mutex_enter(&mntid_lock);
2611: mtype = makefstype(mp->mnt_op->vfs_name);
2612: mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
2613: mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
2614: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2615: if (xxxfs_mntid == 0)
2616: ++xxxfs_mntid;
2617: tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
2618: tfsid.__fsid_val[1] = mtype;
2619: if (!CIRCLEQ_EMPTY(&mountlist)) {
2620: while (vfs_getvfs(&tfsid)) {
2621: tfsid.__fsid_val[0]++;
2622: xxxfs_mntid++;
2623: }
2624: }
2625: mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
2626: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2627: mutex_exit(&mntid_lock);
2628: }
1.326 ad 2629:
1.353 pooka 2630: /*
2631: * Make a 'unique' number from a mount type name.
2632: */
2633: long
2634: makefstype(const char *type)
2635: {
2636: long rv;
1.326 ad 2637:
1.353 pooka 2638: for (rv = 0; *type; type++) {
2639: rv <<= 2;
2640: rv ^= *type;
2641: }
2642: return rv;
2643: }
2644:
2645: /*
2646: * Set vnode attributes to VNOVAL
2647: */
2648: void
2649: vattr_null(struct vattr *vap)
2650: {
2651:
2652: vap->va_type = VNON;
2653:
2654: /*
2655: * Assign individually so that it is safe even if size and
2656: * sign of each member are varied.
2657: */
2658: vap->va_mode = VNOVAL;
2659: vap->va_nlink = VNOVAL;
2660: vap->va_uid = VNOVAL;
2661: vap->va_gid = VNOVAL;
2662: vap->va_fsid = VNOVAL;
2663: vap->va_fileid = VNOVAL;
2664: vap->va_size = VNOVAL;
2665: vap->va_blocksize = VNOVAL;
2666: vap->va_atime.tv_sec =
2667: vap->va_mtime.tv_sec =
2668: vap->va_ctime.tv_sec =
2669: vap->va_birthtime.tv_sec = VNOVAL;
2670: vap->va_atime.tv_nsec =
2671: vap->va_mtime.tv_nsec =
2672: vap->va_ctime.tv_nsec =
2673: vap->va_birthtime.tv_nsec = VNOVAL;
2674: vap->va_gen = VNOVAL;
2675: vap->va_flags = VNOVAL;
2676: vap->va_rdev = VNOVAL;
2677: vap->va_bytes = VNOVAL;
2678: vap->va_vaflags = 0;
2679: }
2680:
2681: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
2682: #define ARRAY_PRINT(idx, arr) \
1.370 yamt 2683: ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1.353 pooka 2684:
2685: const char * const vnode_tags[] = { VNODE_TAGS };
2686: const char * const vnode_types[] = { VNODE_TYPES };
2687: const char vnode_flagbits[] = VNODE_FLAGBITS;
2688:
2689: /*
2690: * Print out a description of a vnode.
2691: */
2692: void
2693: vprint(const char *label, struct vnode *vp)
2694: {
2695: struct vnlock *vl;
2696: char bf[96];
2697: int flag;
2698:
2699: vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
2700: flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
1.361 christos 2701: snprintb(bf, sizeof(bf), vnode_flagbits, flag);
1.353 pooka 2702:
2703: if (label != NULL)
2704: printf("%s: ", label);
2705: printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
2706: "usecount %d, writecount %d, holdcount %d\n"
2707: "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
2708: vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
2709: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
2710: vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
2711: vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
2712: if (vp->v_data != NULL) {
2713: printf("\t");
2714: VOP_PRINT(vp);
2715: }
2716: }
2717:
2718: #ifdef DEBUG
2719: /*
2720: * List all of the locked vnodes in the system.
2721: * Called when debugging the kernel.
2722: */
2723: void
2724: printlockedvnodes(void)
2725: {
2726: struct mount *mp, *nmp;
2727: struct vnode *vp;
2728:
2729: printf("Locked vnodes\n");
2730: mutex_enter(&mountlist_lock);
2731: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2732: mp = nmp) {
2733: if (vfs_busy(mp, &nmp)) {
2734: continue;
2735: }
2736: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2737: if (VOP_ISLOCKED(vp))
2738: vprint(NULL, vp);
2739: }
2740: mutex_enter(&mountlist_lock);
2741: vfs_unbusy(mp, false, &nmp);
2742: }
2743: mutex_exit(&mountlist_lock);
2744: }
2745: #endif
2746:
1.380 elad 2747: /* Deprecated. Kept for KPI compatibility. */
1.353 pooka 2748: int
2749: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
2750: mode_t acc_mode, kauth_cred_t cred)
2751: {
2752:
1.380 elad 2753: #ifdef DIAGNOSTIC
1.381 elad 2754: printf("vaccess: deprecated interface used.\n");
1.380 elad 2755: #endif /* DIAGNOSTIC */
1.353 pooka 2756:
1.380 elad 2757: return genfs_can_access(type, file_mode, uid, gid, acc_mode, cred);
1.353 pooka 2758: }
2759:
2760: /*
2761: * Given a file system name, look up the vfsops for that
2762: * file system, or return NULL if file system isn't present
2763: * in the kernel.
2764: */
2765: struct vfsops *
2766: vfs_getopsbyname(const char *name)
2767: {
2768: struct vfsops *v;
2769:
2770: mutex_enter(&vfs_list_lock);
2771: LIST_FOREACH(v, &vfs_list, vfs_list) {
2772: if (strcmp(v->vfs_name, name) == 0)
2773: break;
2774: }
2775: if (v != NULL)
2776: v->vfs_refcount++;
2777: mutex_exit(&vfs_list_lock);
2778:
2779: return (v);
2780: }
2781:
2782: void
2783: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2784: {
2785: const struct statvfs *mbp;
2786:
2787: if (sbp == (mbp = &mp->mnt_stat))
2788: return;
2789:
2790: (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2791: sbp->f_fsid = mbp->f_fsid;
2792: sbp->f_owner = mbp->f_owner;
2793: sbp->f_flag = mbp->f_flag;
2794: sbp->f_syncwrites = mbp->f_syncwrites;
2795: sbp->f_asyncwrites = mbp->f_asyncwrites;
2796: sbp->f_syncreads = mbp->f_syncreads;
2797: sbp->f_asyncreads = mbp->f_asyncreads;
2798: (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2799: (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2800: sizeof(sbp->f_fstypename));
2801: (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2802: sizeof(sbp->f_mntonname));
2803: (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2804: sizeof(sbp->f_mntfromname));
2805: sbp->f_namemax = mbp->f_namemax;
2806: }
2807:
2808: int
2809: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2810: const char *vfsname, struct mount *mp, struct lwp *l)
2811: {
2812: int error;
2813: size_t size;
2814: struct statvfs *sfs = &mp->mnt_stat;
2815: int (*fun)(const void *, void *, size_t, size_t *);
2816:
2817: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
2818: sizeof(mp->mnt_stat.f_fstypename));
2819:
2820: if (onp) {
2821: struct cwdinfo *cwdi = l->l_proc->p_cwdi;
2822: fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2823: if (cwdi->cwdi_rdir != NULL) {
2824: size_t len;
2825: char *bp;
2826: char *path = PNBUF_GET();
2827:
2828: bp = path + MAXPATHLEN;
2829: *--bp = '\0';
2830: rw_enter(&cwdi->cwdi_lock, RW_READER);
2831: error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
2832: path, MAXPATHLEN / 2, 0, l);
2833: rw_exit(&cwdi->cwdi_lock);
2834: if (error) {
2835: PNBUF_PUT(path);
2836: return error;
2837: }
2838:
2839: len = strlen(bp);
2840: if (len > sizeof(sfs->f_mntonname) - 1)
2841: len = sizeof(sfs->f_mntonname) - 1;
2842: (void)strncpy(sfs->f_mntonname, bp, len);
2843: PNBUF_PUT(path);
2844:
2845: if (len < sizeof(sfs->f_mntonname) - 1) {
2846: error = (*fun)(onp, &sfs->f_mntonname[len],
2847: sizeof(sfs->f_mntonname) - len - 1, &size);
2848: if (error)
2849: return error;
2850: size += len;
2851: } else {
2852: size = len;
2853: }
2854: } else {
2855: error = (*fun)(onp, &sfs->f_mntonname,
2856: sizeof(sfs->f_mntonname) - 1, &size);
2857: if (error)
2858: return error;
2859: }
2860: (void)memset(sfs->f_mntonname + size, 0,
2861: sizeof(sfs->f_mntonname) - size);
2862: }
2863:
2864: if (fromp) {
2865: fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2866: error = (*fun)(fromp, sfs->f_mntfromname,
2867: sizeof(sfs->f_mntfromname) - 1, &size);
2868: if (error)
2869: return error;
2870: (void)memset(sfs->f_mntfromname + size, 0,
2871: sizeof(sfs->f_mntfromname) - size);
2872: }
2873: return 0;
2874: }
2875:
2876: void
2877: vfs_timestamp(struct timespec *ts)
2878: {
2879:
2880: nanotime(ts);
2881: }
2882:
2883: time_t rootfstime; /* recorded root fs time, if known */
2884: void
2885: setrootfstime(time_t t)
2886: {
2887: rootfstime = t;
2888: }
2889:
2890: /*
2891: * Sham lock manager for vnodes. This is a temporary measure.
2892: */
2893: int
2894: vlockmgr(struct vnlock *vl, int flags)
2895: {
2896:
2897: KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
2898:
2899: switch (flags & LK_TYPE_MASK) {
2900: case LK_SHARED:
2901: if (rw_tryenter(&vl->vl_lock, RW_READER)) {
2902: return 0;
2903: }
2904: if ((flags & LK_NOWAIT) != 0) {
2905: return EBUSY;
2906: }
2907: rw_enter(&vl->vl_lock, RW_READER);
2908: return 0;
2909:
2910: case LK_EXCLUSIVE:
2911: if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
2912: return 0;
2913: }
2914: if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
2915: rw_write_held(&vl->vl_lock)) {
2916: vl->vl_recursecnt++;
2917: return 0;
2918: }
2919: if ((flags & LK_NOWAIT) != 0) {
2920: return EBUSY;
2921: }
2922: rw_enter(&vl->vl_lock, RW_WRITER);
2923: return 0;
1.326 ad 2924:
2925: case LK_RELEASE:
2926: if (vl->vl_recursecnt != 0) {
2927: KASSERT(rw_write_held(&vl->vl_lock));
2928: vl->vl_recursecnt--;
2929: return 0;
2930: }
2931: rw_exit(&vl->vl_lock);
2932: return 0;
2933:
2934: default:
2935: panic("vlockmgr: flags %x", flags);
2936: }
2937: }
2938:
2939: int
2940: vlockstatus(struct vnlock *vl)
2941: {
2942:
2943: if (rw_write_held(&vl->vl_lock)) {
2944: return LK_EXCLUSIVE;
2945: }
2946: if (rw_read_held(&vl->vl_lock)) {
2947: return LK_SHARED;
2948: }
2949: return 0;
2950: }
1.353 pooka 2951:
2952: /*
2953: * mount_specific_key_create --
2954: * Create a key for subsystem mount-specific data.
2955: */
2956: int
2957: mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
2958: {
2959:
2960: return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
2961: }
2962:
2963: /*
2964: * mount_specific_key_delete --
2965: * Delete a key for subsystem mount-specific data.
2966: */
2967: void
2968: mount_specific_key_delete(specificdata_key_t key)
2969: {
2970:
2971: specificdata_key_delete(mount_specificdata_domain, key);
2972: }
2973:
2974: /*
2975: * mount_initspecific --
2976: * Initialize a mount's specificdata container.
2977: */
2978: void
2979: mount_initspecific(struct mount *mp)
2980: {
2981: int error;
2982:
2983: error = specificdata_init(mount_specificdata_domain,
2984: &mp->mnt_specdataref);
2985: KASSERT(error == 0);
2986: }
2987:
2988: /*
2989: * mount_finispecific --
2990: * Finalize a mount's specificdata container.
2991: */
2992: void
2993: mount_finispecific(struct mount *mp)
2994: {
2995:
2996: specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
2997: }
2998:
2999: /*
3000: * mount_getspecific --
3001: * Return mount-specific data corresponding to the specified key.
3002: */
3003: void *
3004: mount_getspecific(struct mount *mp, specificdata_key_t key)
3005: {
3006:
3007: return (specificdata_getspecific(mount_specificdata_domain,
3008: &mp->mnt_specdataref, key));
3009: }
3010:
3011: /*
3012: * mount_setspecific --
3013: * Set mount-specific data corresponding to the specified key.
3014: */
3015: void
3016: mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
3017: {
3018:
3019: specificdata_setspecific(mount_specificdata_domain,
3020: &mp->mnt_specdataref, key, data);
3021: }
3022:
3023: int
3024: VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
3025: {
3026: int error;
3027:
3028: KERNEL_LOCK(1, NULL);
3029: error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
3030: KERNEL_UNLOCK_ONE(NULL);
3031:
3032: return error;
3033: }
3034:
3035: int
3036: VFS_START(struct mount *mp, int a)
3037: {
3038: int error;
3039:
3040: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3041: KERNEL_LOCK(1, NULL);
3042: }
3043: error = (*(mp->mnt_op->vfs_start))(mp, a);
3044: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3045: KERNEL_UNLOCK_ONE(NULL);
3046: }
3047:
3048: return error;
3049: }
3050:
3051: int
3052: VFS_UNMOUNT(struct mount *mp, int a)
3053: {
3054: int error;
3055:
3056: KERNEL_LOCK(1, NULL);
3057: error = (*(mp->mnt_op->vfs_unmount))(mp, a);
3058: KERNEL_UNLOCK_ONE(NULL);
3059:
3060: return error;
3061: }
3062:
3063: int
3064: VFS_ROOT(struct mount *mp, struct vnode **a)
3065: {
3066: int error;
3067:
3068: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3069: KERNEL_LOCK(1, NULL);
3070: }
3071: error = (*(mp->mnt_op->vfs_root))(mp, a);
3072: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3073: KERNEL_UNLOCK_ONE(NULL);
3074: }
3075:
3076: return error;
3077: }
3078:
3079: int
3080: VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
3081: {
3082: int error;
3083:
3084: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3085: KERNEL_LOCK(1, NULL);
3086: }
3087: error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
3088: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3089: KERNEL_UNLOCK_ONE(NULL);
3090: }
3091:
3092: return error;
3093: }
3094:
3095: int
3096: VFS_STATVFS(struct mount *mp, struct statvfs *a)
3097: {
3098: int error;
3099:
3100: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3101: KERNEL_LOCK(1, NULL);
3102: }
3103: error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
3104: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3105: KERNEL_UNLOCK_ONE(NULL);
3106: }
3107:
3108: return error;
3109: }
3110:
3111: int
3112: VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
3113: {
3114: int error;
3115:
3116: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3117: KERNEL_LOCK(1, NULL);
3118: }
3119: error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
3120: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3121: KERNEL_UNLOCK_ONE(NULL);
3122: }
3123:
3124: return error;
3125: }
3126:
3127: int
3128: VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
3129: {
3130: int error;
3131:
3132: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3133: KERNEL_LOCK(1, NULL);
3134: }
3135: error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
3136: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3137: KERNEL_UNLOCK_ONE(NULL);
3138: }
3139:
3140: return error;
3141: }
3142:
3143: int
3144: VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
3145: {
3146: int error;
3147:
3148: if ((vp->v_vflag & VV_MPSAFE) == 0) {
3149: KERNEL_LOCK(1, NULL);
3150: }
3151: error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
3152: if ((vp->v_vflag & VV_MPSAFE) == 0) {
3153: KERNEL_UNLOCK_ONE(NULL);
3154: }
3155:
3156: return error;
3157: }
3158:
3159: int
3160: VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
3161: {
3162: int error;
3163:
3164: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3165: KERNEL_LOCK(1, NULL);
3166: }
3167: error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
3168: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3169: KERNEL_UNLOCK_ONE(NULL);
3170: }
3171:
3172: return error;
3173: }
3174:
3175: int
3176: VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
3177: {
3178: int error;
3179:
3180: KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
3181: error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
3182: KERNEL_UNLOCK_ONE(NULL); /* XXX */
3183:
3184: return error;
3185: }
3186:
3187: int
3188: VFS_SUSPENDCTL(struct mount *mp, int a)
3189: {
3190: int error;
3191:
3192: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3193: KERNEL_LOCK(1, NULL);
3194: }
3195: error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
3196: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3197: KERNEL_UNLOCK_ONE(NULL);
3198: }
3199:
3200: return error;
3201: }
3202:
1.378 pooka 3203: #if defined(DDB) || defined(DEBUGPRINT)
1.353 pooka 3204: static const char buf_flagbits[] = BUF_FLAGBITS;
3205:
3206: void
3207: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
3208: {
3209: char bf[1024];
3210:
3211: (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
3212: PRIx64 " dev 0x%x\n",
3213: bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
3214:
1.361 christos 3215: snprintb(bf, sizeof(bf),
3216: buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
1.353 pooka 3217: (*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
3218:
3219: (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3220: bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.369 ad 3221: (*pr)(" data %p saveaddr %p\n",
3222: bp->b_data, bp->b_saveaddr);
1.353 pooka 3223: (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
3224: }
3225:
3226:
3227: void
3228: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
3229: {
3230: char bf[256];
3231:
3232: uvm_object_printit(&vp->v_uobj, full, pr);
1.361 christos 3233: snprintb(bf, sizeof(bf),
3234: vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
1.353 pooka 3235: (*pr)("\nVNODE flags %s\n", bf);
3236: (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
3237: vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
3238:
3239: (*pr)("data %p writecount %ld holdcnt %ld\n",
3240: vp->v_data, vp->v_writecount, vp->v_holdcnt);
3241:
3242: (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
3243: ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
3244: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
3245: vp->v_mount, vp->v_mountedhere);
3246:
3247: (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
3248:
3249: if (full) {
3250: struct buf *bp;
3251:
3252: (*pr)("clean bufs:\n");
3253: LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3254: (*pr)(" bp %p\n", bp);
3255: vfs_buf_print(bp, full, pr);
3256: }
3257:
3258: (*pr)("dirty bufs:\n");
3259: LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3260: (*pr)(" bp %p\n", bp);
3261: vfs_buf_print(bp, full, pr);
3262: }
3263: }
3264: }
3265:
3266: void
3267: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
3268: {
3269: char sbuf[256];
3270:
3271: (*pr)("vnodecovered = %p syncer = %p data = %p\n",
3272: mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3273:
3274: (*pr)("fs_bshift %d dev_bshift = %d\n",
3275: mp->mnt_fs_bshift,mp->mnt_dev_bshift);
3276:
1.361 christos 3277: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1.353 pooka 3278: (*pr)("flag = %s\n", sbuf);
3279:
1.361 christos 3280: snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1.353 pooka 3281: (*pr)("iflag = %s\n", sbuf);
3282:
3283: (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
3284: &mp->mnt_unmounting, &mp->mnt_updating);
3285:
3286: (*pr)("statvfs cache:\n");
3287: (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3288: (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3289: (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3290:
3291: (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
3292: (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
3293: (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
3294: (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
3295:
3296: (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
3297: (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
3298: (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
3299: (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
3300:
3301: (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3302: mp->mnt_stat.f_fsidx.__fsid_val[0],
3303: mp->mnt_stat.f_fsidx.__fsid_val[1]);
3304:
3305: (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3306: (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3307:
1.361 christos 3308: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
3309:
1.353 pooka 3310: (*pr)("\tflag = %s\n",sbuf);
3311: (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3312: (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3313: (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
3314: (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3315: (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3316: (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3317: (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3318:
3319: {
3320: int cnt = 0;
3321: struct vnode *vp;
3322: (*pr)("locked vnodes =");
3323: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3324: if (VOP_ISLOCKED(vp)) {
3325: if ((++cnt % 6) == 0) {
3326: (*pr)(" %p,\n\t", vp);
3327: } else {
3328: (*pr)(" %p,", vp);
3329: }
3330: }
3331: }
3332: (*pr)("\n");
3333: }
3334:
3335: if (full) {
3336: int cnt = 0;
3337: struct vnode *vp;
3338: (*pr)("all vnodes =");
3339: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3340: if (!TAILQ_NEXT(vp, v_mntvnodes)) {
3341: (*pr)(" %p", vp);
3342: } else if ((++cnt % 6) == 0) {
3343: (*pr)(" %p,\n\t", vp);
3344: } else {
3345: (*pr)(" %p,", vp);
3346: }
3347: }
3348: (*pr)("\n", vp);
3349: }
3350: }
1.378 pooka 3351: #endif /* DDB || DEBUGPRINT */
1.372 elad 3352:
1.385 elad 3353: /*
3354: * Check if a device pointed to by vp is mounted.
3355: *
3356: * Returns:
3357: * EINVAL if it's not a disk
3358: * EBUSY if it's a disk and mounted
3359: * 0 if it's a disk and not mounted
3360: */
3361: int
3362: rawdev_mounted(struct vnode *vp, struct vnode **bvpp)
3363: {
3364: struct vnode *bvp;
3365: dev_t dev;
3366: int d_type;
3367:
3368: bvp = NULL;
3369: dev = vp->v_rdev;
3370: d_type = D_OTHER;
3371:
3372: if (iskmemvp(vp))
3373: return EINVAL;
3374:
3375: switch (vp->v_type) {
3376: case VCHR: {
3377: const struct cdevsw *cdev;
3378:
3379: cdev = cdevsw_lookup(dev);
3380: if (cdev != NULL) {
3381: dev_t blkdev;
3382:
3383: blkdev = devsw_chr2blk(dev);
3384: if (blkdev != NODEV) {
3385: vfinddev(blkdev, VBLK, &bvp);
3386: if (bvp != NULL)
3387: d_type = (cdev->d_flag & D_TYPEMASK);
3388: }
3389: }
3390:
3391: break;
3392: }
3393:
3394: case VBLK: {
3395: const struct bdevsw *bdev;
3396:
3397: bdev = bdevsw_lookup(dev);
3398: if (bdev != NULL)
3399: d_type = (bdev->d_flag & D_TYPEMASK);
3400:
3401: bvp = vp;
3402:
3403: break;
3404: }
3405:
3406: default:
3407: break;
3408: }
3409:
3410: if (d_type != D_DISK)
3411: return EINVAL;
3412:
3413: if (bvpp != NULL)
3414: *bvpp = bvp;
3415:
3416: /*
3417: * XXX: This is bogus. We should be failing the request
3418: * XXX: not only if this specific slice is mounted, but
3419: * XXX: if it's on a disk with any other mounted slice.
3420: */
3421: if (vfs_mountedon(bvp))
3422: return EBUSY;
3423:
3424: return 0;
3425: }
CVSweb <webmaster@jp.NetBSD.org>