Annotation of src/sys/kern/vfs_subr.c, Revision 1.357.2.3
1.357.2.3! skrll 1: /* $NetBSD: vfs_subr.c,v 1.357.2.2 2009/03/03 18:32:57 skrll Exp $ */
1.74 thorpej 2:
3: /*-
1.315 ad 4: * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.302 ad 9: * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
1.74 thorpej 10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30: * POSSIBILITY OF SUCH DAMAGE.
31: */
1.32 cgd 32:
1.29 cgd 33: /*
1.30 mycroft 34: * Copyright (c) 1989, 1993
35: * The Regents of the University of California. All rights reserved.
1.29 cgd 36: * (c) UNIX System Laboratories, Inc.
37: * All or some portions of this file are derived from material licensed
38: * to the University of California by American Telephone and Telegraph
39: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40: * the permission of UNIX System Laboratories, Inc.
41: *
42: * Redistribution and use in source and binary forms, with or without
43: * modification, are permitted provided that the following conditions
44: * are met:
45: * 1. Redistributions of source code must retain the above copyright
46: * notice, this list of conditions and the following disclaimer.
47: * 2. Redistributions in binary form must reproduce the above copyright
48: * notice, this list of conditions and the following disclaimer in the
49: * documentation and/or other materials provided with the distribution.
1.204 agc 50: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 51: * may be used to endorse or promote products derived from this software
52: * without specific prior written permission.
53: *
54: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64: * SUCH DAMAGE.
65: *
1.32 cgd 66: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 67: */
68:
69: /*
1.346 ad 70: * Note on v_usecount and locking:
71: *
72: * At nearly all points it is known that v_usecount could be zero, the
73: * vnode interlock will be held.
74: *
75: * To change v_usecount away from zero, the interlock must be held. To
76: * change from a non-zero value to zero, again the interlock must be
77: * held.
78: *
79: * Changing the usecount from a non-zero value to a non-zero value can
80: * safely be done using atomic operations, without the interlock held.
1.29 cgd 81: */
1.162 lukem 82:
83: #include <sys/cdefs.h>
1.357.2.3! skrll 84: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.357.2.2 2009/03/03 18:32:57 skrll Exp $");
1.78 mrg 85:
1.125 chs 86: #include "opt_ddb.h"
1.95 thorpej 87: #include "opt_compat_netbsd.h"
1.97 christos 88: #include "opt_compat_43.h"
1.29 cgd 89:
90: #include <sys/param.h>
1.30 mycroft 91: #include <sys/systm.h>
1.357.2.1 skrll 92: #include <sys/conf.h>
1.29 cgd 93: #include <sys/proc.h>
1.138 bouyer 94: #include <sys/kernel.h>
1.29 cgd 95: #include <sys/mount.h>
1.46 mycroft 96: #include <sys/fcntl.h>
1.29 cgd 97: #include <sys/vnode.h>
1.30 mycroft 98: #include <sys/stat.h>
1.29 cgd 99: #include <sys/namei.h>
100: #include <sys/ucred.h>
101: #include <sys/buf.h>
102: #include <sys/errno.h>
1.357.2.1 skrll 103: #include <sys/kmem.h>
1.51 christos 104: #include <sys/syscallargs.h>
1.58 thorpej 105: #include <sys/device.h>
1.192 christos 106: #include <sys/filedesc.h>
1.266 elad 107: #include <sys/kauth.h>
1.307 ad 108: #include <sys/atomic.h>
1.309 ad 109: #include <sys/kthread.h>
1.355 simonb 110: #include <sys/wapbl.h>
1.50 christos 111:
1.30 mycroft 112: #include <miscfs/specfs/specdev.h>
1.113 fvdl 113: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 114:
1.125 chs 115: #include <uvm/uvm.h>
1.255 yamt 116: #include <uvm/uvm_readahead.h>
1.125 chs 117: #include <uvm/uvm_ddb.h>
1.129 mrg 118:
119: #include <sys/sysctl.h>
1.77 mrg 120:
1.353 pooka 121: const enum vtype iftovt_tab[16] = {
122: VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
123: VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
124: };
125: const int vttoif_tab[9] = {
126: 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
127: S_IFSOCK, S_IFIFO, S_IFMT,
128: };
129:
130: /*
131: * Insq/Remq for the vnode usage lists.
132: */
133: #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
134: #define bufremvn(bp) { \
135: LIST_REMOVE(bp, b_vnbufs); \
136: (bp)->b_vnbufs.le_next = NOLIST; \
137: }
138:
139: int doforce = 1; /* 1 => permit forcible unmounting */
140: int prtactive = 0; /* 1 => print out reclaim of active vnodes */
141:
1.309 ad 142: static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
143: static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
144: static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
145:
1.353 pooka 146: struct mntlist mountlist = /* mounted filesystem list */
147: CIRCLEQ_HEAD_INITIALIZER(mountlist);
148:
149: u_int numvnodes;
150: static specificdata_domain_t mount_specificdata_domain;
151:
1.309 ad 152: static int vrele_pending;
1.351 ad 153: static int vrele_gen;
1.309 ad 154: static kmutex_t vrele_lock;
155: static kcondvar_t vrele_cv;
156: static lwp_t *vrele_lwp;
1.113 fvdl 157:
1.353 pooka 158: kmutex_t mountlist_lock;
159: kmutex_t mntid_lock;
160: kmutex_t mntvnode_lock;
161: kmutex_t vnode_free_list_lock;
162: kmutex_t vfs_list_lock;
163:
1.309 ad 164: static pool_cache_t vnode_cache;
1.186 thorpej 165:
1.89 kleink 166: /*
1.353 pooka 167: * These define the root filesystem and device.
168: */
169: struct vnode *rootvnode;
170: struct device *root_device; /* root device */
171:
172: /*
1.89 kleink 173: * Local declarations.
174: */
1.276 hannken 175:
1.309 ad 176: static void vrele_thread(void *);
177: static void insmntque(vnode_t *, struct mount *);
178: static int getdevvp(dev_t, vnode_t **, enum vtype);
1.357.2.1 skrll 179: static vnode_t *getcleanvnode(void);
1.309 ad 180: void vpanic(vnode_t *, const char *);
181:
1.353 pooka 182: #ifdef DEBUG
183: void printlockedvnodes(void);
184: #endif
185:
1.309 ad 186: #ifdef DIAGNOSTIC
187: void
188: vpanic(vnode_t *vp, const char *msg)
189: {
190:
191: vprint(NULL, vp);
192: panic("%s\n", msg);
193: }
194: #else
195: #define vpanic(vp, msg) /* nothing */
196: #endif
197:
198: void
199: vn_init1(void)
200: {
201:
202: vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
203: NULL, IPL_NONE, NULL, NULL, NULL);
204: KASSERT(vnode_cache != NULL);
205:
206: /* Create deferred release thread. */
207: mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
208: cv_init(&vrele_cv, "vrele");
209: if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
210: NULL, &vrele_lwp, "vrele"))
211: panic("fork vrele");
212: }
1.51 christos 213:
1.353 pooka 214: /*
215: * Initialize the vnode management data structures.
216: */
217: void
218: vntblinit(void)
219: {
220:
221: mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
222: mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
223: mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
224: mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
225: mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
226:
227: mount_specificdata_domain = specificdata_domain_create();
228:
229: /* Initialize the filesystem syncer. */
230: vn_initialize_syncerd();
231: vn_init1();
232: }
233:
1.202 yamt 234: int
1.256 christos 235: vfs_drainvnodes(long target, struct lwp *l)
1.202 yamt 236: {
237:
238: while (numvnodes > target) {
1.309 ad 239: vnode_t *vp;
1.202 yamt 240:
1.309 ad 241: mutex_enter(&vnode_free_list_lock);
242: vp = getcleanvnode();
1.202 yamt 243: if (vp == NULL)
244: return EBUSY; /* give up */
1.309 ad 245: ungetnewvnode(vp);
1.202 yamt 246: }
247:
248: return 0;
249: }
250:
251: /*
1.353 pooka 252: * Lookup a mount point by filesystem identifier.
253: *
254: * XXX Needs to add a reference to the mount point.
255: */
256: struct mount *
257: vfs_getvfs(fsid_t *fsid)
258: {
259: struct mount *mp;
260:
261: mutex_enter(&mountlist_lock);
262: CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
263: if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
264: mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
265: mutex_exit(&mountlist_lock);
266: return (mp);
267: }
268: }
269: mutex_exit(&mountlist_lock);
270: return ((struct mount *)0);
271: }
272:
273: /*
274: * Drop a reference to a mount structure, freeing if the last reference.
275: */
276: void
277: vfs_destroy(struct mount *mp)
278: {
279:
1.357 ad 280: if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
1.353 pooka 281: return;
282: }
283:
284: /*
285: * Nothing else has visibility of the mount: we can now
286: * free the data structures.
287: */
1.357 ad 288: KASSERT(mp->mnt_refcnt == 0);
1.353 pooka 289: specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
290: rw_destroy(&mp->mnt_unmounting);
291: mutex_destroy(&mp->mnt_updating);
292: mutex_destroy(&mp->mnt_renamelock);
293: if (mp->mnt_op != NULL) {
294: vfs_delref(mp->mnt_op);
295: }
296: kmem_free(mp, sizeof(*mp));
297: }
298:
299: /*
1.202 yamt 300: * grab a vnode from freelist and clean it.
301: */
1.309 ad 302: vnode_t *
303: getcleanvnode(void)
1.202 yamt 304: {
1.309 ad 305: vnode_t *vp;
306: vnodelst_t *listhd;
1.202 yamt 307:
1.309 ad 308: KASSERT(mutex_owned(&vnode_free_list_lock));
1.229 yamt 309:
1.309 ad 310: retry:
1.229 yamt 311: listhd = &vnode_free_list;
312: try_nextlist:
313: TAILQ_FOREACH(vp, listhd, v_freelist) {
1.309 ad 314: /*
315: * It's safe to test v_usecount and v_iflag
316: * without holding the interlock here, since
317: * these vnodes should never appear on the
318: * lists.
319: */
320: if (vp->v_usecount != 0) {
321: vpanic(vp, "free vnode isn't");
322: }
323: if ((vp->v_iflag & VI_CLEAN) != 0) {
324: vpanic(vp, "clean vnode on freelist");
325: }
326: if (vp->v_freelisthd != listhd) {
327: printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
328: vpanic(vp, "list head mismatch");
329: }
330: if (!mutex_tryenter(&vp->v_interlock))
1.208 hannken 331: continue;
1.227 yamt 332: /*
1.309 ad 333: * Our lwp might hold the underlying vnode
334: * locked, so don't try to reclaim a VI_LAYER
335: * node if it's locked.
1.227 yamt 336: */
1.302 ad 337: if ((vp->v_iflag & VI_XLOCK) == 0 &&
338: ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285 hannken 339: break;
1.202 yamt 340: }
1.309 ad 341: mutex_exit(&vp->v_interlock);
1.202 yamt 342: }
343:
1.309 ad 344: if (vp == NULL) {
1.229 yamt 345: if (listhd == &vnode_free_list) {
346: listhd = &vnode_hold_list;
347: goto try_nextlist;
348: }
1.309 ad 349: mutex_exit(&vnode_free_list_lock);
350: return NULL;
1.202 yamt 351: }
352:
1.309 ad 353: /* Remove it from the freelist. */
1.202 yamt 354: TAILQ_REMOVE(listhd, vp, v_freelist);
1.309 ad 355: vp->v_freelisthd = NULL;
356: mutex_exit(&vnode_free_list_lock);
357:
358: /*
359: * The vnode is still associated with a file system, so we must
360: * clean it out before reusing it. We need to add a reference
361: * before doing this. If the vnode gains another reference while
362: * being cleaned out then we lose - retry.
363: */
1.346 ad 364: atomic_inc_uint(&vp->v_usecount);
1.309 ad 365: vclean(vp, DOCLOSE);
366: if (vp->v_usecount == 1) {
367: /* We're about to dirty it. */
368: vp->v_iflag &= ~VI_CLEAN;
369: mutex_exit(&vp->v_interlock);
1.318 ad 370: if (vp->v_type == VBLK || vp->v_type == VCHR) {
371: spec_node_destroy(vp);
372: }
373: vp->v_type = VNON;
1.309 ad 374: } else {
375: /*
376: * Don't return to freelist - the holder of the last
377: * reference will destroy it.
378: */
1.348 ad 379: vrelel(vp, 0); /* releases vp->v_interlock */
1.309 ad 380: mutex_enter(&vnode_free_list_lock);
381: goto retry;
382: }
383:
384: if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
385: !TAILQ_EMPTY(&vp->v_uobj.memq)) {
386: vpanic(vp, "cleaned vnode isn't");
387: }
388: if (vp->v_numoutput != 0) {
389: vpanic(vp, "clean vnode has pending I/O's");
390: }
391: if ((vp->v_iflag & VI_ONWORKLST) != 0) {
392: vpanic(vp, "clean vnode on syncer list");
393: }
1.202 yamt 394:
395: return vp;
396: }
397:
1.327 ad 398: /*
1.338 ad 399: * Mark a mount point as busy, and gain a new reference to it. Used to
1.344 ad 400: * prevent the file system from being unmounted during critical sections.
1.338 ad 401: *
1.339 ad 402: * => The caller must hold a pre-existing reference to the mount.
1.344 ad 403: * => Will fail if the file system is being unmounted, or is unmounted.
1.29 cgd 404: */
1.50 christos 405: int
1.344 ad 406: vfs_busy(struct mount *mp, struct mount **nextp)
1.29 cgd 407: {
408:
1.344 ad 409: KASSERT(mp->mnt_refcnt > 0);
1.338 ad 410:
1.344 ad 411: if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
412: if (nextp != NULL) {
413: KASSERT(mutex_owned(&mountlist_lock));
1.339 ad 414: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
415: }
1.344 ad 416: return EBUSY;
1.339 ad 417: }
1.344 ad 418: if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
419: rw_exit(&mp->mnt_unmounting);
1.338 ad 420: if (nextp != NULL) {
1.344 ad 421: KASSERT(mutex_owned(&mountlist_lock));
422: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
1.338 ad 423: }
1.344 ad 424: return ENOENT;
1.327 ad 425: }
1.344 ad 426: if (nextp != NULL) {
427: mutex_exit(&mountlist_lock);
428: }
429: atomic_inc_uint(&mp->mnt_refcnt);
430: return 0;
1.29 cgd 431: }
432:
433: /*
1.344 ad 434: * Unbusy a busy filesystem.
1.339 ad 435: *
1.344 ad 436: * => If keepref is true, preserve reference added by vfs_busy().
437: * => If nextp != NULL, acquire mountlist_lock.
1.29 cgd 438: */
439: void
1.339 ad 440: vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
1.29 cgd 441: {
442:
1.327 ad 443: KASSERT(mp->mnt_refcnt > 0);
444:
1.344 ad 445: if (nextp != NULL) {
446: mutex_enter(&mountlist_lock);
447: }
448: rw_exit(&mp->mnt_unmounting);
449: if (!keepref) {
450: vfs_destroy(mp);
1.327 ad 451: }
1.339 ad 452: if (nextp != NULL) {
1.344 ad 453: KASSERT(mutex_owned(&mountlist_lock));
1.339 ad 454: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
455: }
1.29 cgd 456: }
457:
458: /*
1.80 fvdl 459: * Lookup a filesystem type, and if found allocate and initialize
460: * a mount structure for it.
461: *
462: * Devname is usually updated by mount(8) after booting.
1.29 cgd 463: */
1.50 christos 464: int
1.247 thorpej 465: vfs_rootmountalloc(const char *fstypename, const char *devname,
466: struct mount **mpp)
1.29 cgd 467: {
1.80 fvdl 468: struct vfsops *vfsp = NULL;
469: struct mount *mp;
1.29 cgd 470:
1.309 ad 471: mutex_enter(&vfs_list_lock);
1.152 jdolecek 472: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291 christos 473: if (!strncmp(vfsp->vfs_name, fstypename,
474: sizeof(mp->mnt_stat.f_fstypename)))
1.80 fvdl 475: break;
1.315 ad 476: if (vfsp == NULL) {
477: mutex_exit(&vfs_list_lock);
1.80 fvdl 478: return (ENODEV);
1.315 ad 479: }
1.309 ad 480: vfsp->vfs_refcount++;
481: mutex_exit(&vfs_list_lock);
482:
1.327 ad 483: mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
484: if (mp == NULL)
485: return ENOMEM;
486: mp->mnt_refcnt = 1;
1.344 ad 487: rw_init(&mp->mnt_unmounting);
488: mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
1.331 skrll 489: mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
1.344 ad 490: (void)vfs_busy(mp, NULL);
1.272 reinoud 491: TAILQ_INIT(&mp->mnt_vnodelist);
1.80 fvdl 492: mp->mnt_op = vfsp;
493: mp->mnt_flag = MNT_RDONLY;
1.309 ad 494: mp->mnt_vnodecovered = NULL;
1.291 christos 495: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
496: sizeof(mp->mnt_stat.f_fstypename));
1.80 fvdl 497: mp->mnt_stat.f_mntonname[0] = '/';
1.314 pooka 498: mp->mnt_stat.f_mntonname[1] = '\0';
1.291 christos 499: mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
500: '\0';
501: (void)copystr(devname, mp->mnt_stat.f_mntfromname,
502: sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.276 hannken 503: mount_initspecific(mp);
1.80 fvdl 504: *mpp = mp;
1.29 cgd 505: return (0);
506: }
507:
1.30 mycroft 508: /*
509: * Routines having to do with the management of the vnode table.
510: */
1.217 junyoung 511: extern int (**dead_vnodeop_p)(void *);
1.30 mycroft 512:
1.29 cgd 513: /*
514: * Return the next vnode from the free list.
515: */
1.50 christos 516: int
1.247 thorpej 517: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
1.309 ad 518: vnode_t **vpp)
1.29 cgd 519: {
1.142 chs 520: struct uvm_object *uobj;
1.113 fvdl 521: static int toggle;
1.309 ad 522: vnode_t *vp;
1.153 thorpej 523: int error = 0, tryalloc;
1.158 chs 524:
1.159 enami 525: try_again:
1.327 ad 526: if (mp != NULL) {
1.103 sommerfe 527: /*
1.327 ad 528: * Mark filesystem busy while we're creating a
529: * vnode. If unmount is in progress, this will
1.342 ad 530: * fail.
1.103 sommerfe 531: */
1.344 ad 532: error = vfs_busy(mp, NULL);
1.327 ad 533: if (error)
1.103 sommerfe 534: return error;
535: }
1.29 cgd 536:
1.113 fvdl 537: /*
538: * We must choose whether to allocate a new vnode or recycle an
539: * existing one. The criterion for allocating a new one is that
540: * the total number of vnodes is less than the number desired or
541: * there are no vnodes on either free list. Generally we only
542: * want to recycle vnodes that have no buffers associated with
543: * them, so we look first on the vnode_free_list. If it is empty,
544: * we next consider vnodes with referencing buffers on the
545: * vnode_hold_list. The toggle ensures that half the time we
546: * will use a buffer from the vnode_hold_list, and half the time
547: * we will allocate a new one unless the list has grown to twice
548: * the desired size. We are reticent to recycle vnodes from the
549: * vnode_hold_list because we will lose the identity of all its
550: * referencing buffers.
551: */
1.142 chs 552:
1.153 thorpej 553: vp = NULL;
554:
1.309 ad 555: mutex_enter(&vnode_free_list_lock);
1.153 thorpej 556:
1.113 fvdl 557: toggle ^= 1;
558: if (numvnodes > 2 * desiredvnodes)
559: toggle = 0;
560:
1.153 thorpej 561: tryalloc = numvnodes < desiredvnodes ||
1.159 enami 562: (TAILQ_FIRST(&vnode_free_list) == NULL &&
563: (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153 thorpej 564:
1.309 ad 565: if (tryalloc) {
1.206 yamt 566: numvnodes++;
1.309 ad 567: mutex_exit(&vnode_free_list_lock);
1.310 pooka 568: if ((vp = vnalloc(NULL)) == NULL) {
1.309 ad 569: mutex_enter(&vnode_free_list_lock);
570: numvnodes--;
571: } else
572: vp->v_usecount = 1;
573: }
574:
575: if (vp == NULL) {
576: vp = getcleanvnode();
577: if (vp == NULL) {
1.327 ad 578: if (mp != NULL) {
1.339 ad 579: vfs_unbusy(mp, false, NULL);
1.327 ad 580: }
1.153 thorpej 581: if (tryalloc) {
582: printf("WARNING: unable to allocate new "
583: "vnode, retrying...\n");
1.345 ad 584: kpause("newvn", false, hz, NULL);
1.153 thorpej 585: goto try_again;
586: }
1.132 jdolecek 587: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 588: *vpp = 0;
589: return (ENFILE);
590: }
1.302 ad 591: vp->v_iflag = 0;
592: vp->v_vflag = 0;
593: vp->v_uflag = 0;
1.158 chs 594: vp->v_socket = NULL;
1.29 cgd 595: }
1.309 ad 596:
597: KASSERT(vp->v_usecount == 1);
598: KASSERT(vp->v_freelisthd == NULL);
599: KASSERT(LIST_EMPTY(&vp->v_nclist));
600: KASSERT(LIST_EMPTY(&vp->v_dnclist));
601:
1.29 cgd 602: vp->v_type = VNON;
1.104 wrstuden 603: vp->v_vnlock = &vp->v_lock;
1.29 cgd 604: vp->v_tag = tag;
605: vp->v_op = vops;
606: insmntque(vp, mp);
1.30 mycroft 607: *vpp = vp;
608: vp->v_data = 0;
1.142 chs 609:
610: /*
611: * initialize uvm_object within vnode.
612: */
613:
1.158 chs 614: uobj = &vp->v_uobj;
615: KASSERT(uobj->pgops == &uvm_vnodeops);
616: KASSERT(uobj->uo_npages == 0);
617: KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288 yamt 618: vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142 chs 619:
1.309 ad 620: if (mp != NULL) {
621: if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
622: vp->v_vflag |= VV_MPSAFE;
1.339 ad 623: vfs_unbusy(mp, true, NULL);
1.309 ad 624: }
625:
1.29 cgd 626: return (0);
1.130 fvdl 627: }
628:
629: /*
630: * This is really just the reverse of getnewvnode(). Needed for
631: * VFS_VGET functions who may need to push back a vnode in case
632: * of a locking race.
633: */
634: void
1.309 ad 635: ungetnewvnode(vnode_t *vp)
636: {
637:
638: KASSERT(vp->v_usecount == 1);
639: KASSERT(vp->v_data == NULL);
640: KASSERT(vp->v_freelisthd == NULL);
641:
642: mutex_enter(&vp->v_interlock);
643: vp->v_iflag |= VI_CLEAN;
1.324 pooka 644: vrelel(vp, 0);
1.309 ad 645: }
646:
647: /*
648: * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
649: * marker vnode and we are prepared to wait for the allocation.
650: */
651: vnode_t *
1.310 pooka 652: vnalloc(struct mount *mp)
1.130 fvdl 653: {
1.309 ad 654: vnode_t *vp;
655:
656: vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
657: if (vp == NULL) {
658: return NULL;
659: }
660:
661: memset(vp, 0, sizeof(*vp));
662: UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
663: cv_init(&vp->v_cv, "vnode");
664: /*
665: * done by memset() above.
666: * LIST_INIT(&vp->v_nclist);
667: * LIST_INIT(&vp->v_dnclist);
668: */
669:
670: if (mp != NULL) {
671: vp->v_mount = mp;
672: vp->v_type = VBAD;
673: vp->v_iflag = VI_MARKER;
674: } else {
1.326 ad 675: rw_init(&vp->v_lock.vl_lock);
1.309 ad 676: }
677:
678: return vp;
679: }
680:
681: /*
682: * Free an unused, unreferenced vnode.
683: */
684: void
1.310 pooka 685: vnfree(vnode_t *vp)
1.309 ad 686: {
687:
688: KASSERT(vp->v_usecount == 0);
689:
690: if ((vp->v_iflag & VI_MARKER) == 0) {
1.326 ad 691: rw_destroy(&vp->v_lock.vl_lock);
1.309 ad 692: mutex_enter(&vnode_free_list_lock);
693: numvnodes--;
694: mutex_exit(&vnode_free_list_lock);
695: }
696:
697: UVM_OBJ_DESTROY(&vp->v_uobj);
698: cv_destroy(&vp->v_cv);
699: pool_cache_put(vnode_cache, vp);
700: }
701:
702: /*
703: * Remove a vnode from its freelist.
704: */
705: static inline void
706: vremfree(vnode_t *vp)
707: {
708:
709: KASSERT(mutex_owned(&vp->v_interlock));
1.350 ad 710: KASSERT(vp->v_usecount == 0);
1.130 fvdl 711:
1.217 junyoung 712: /*
1.309 ad 713: * Note that the reference count must not change until
714: * the vnode is removed.
1.130 fvdl 715: */
1.309 ad 716: mutex_enter(&vnode_free_list_lock);
717: if (vp->v_holdcnt > 0) {
718: KASSERT(vp->v_freelisthd == &vnode_hold_list);
719: } else {
720: KASSERT(vp->v_freelisthd == &vnode_free_list);
721: }
722: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
723: vp->v_freelisthd = NULL;
724: mutex_exit(&vnode_free_list_lock);
1.29 cgd 725: }
726:
727: /*
728: * Move a vnode from one mount queue to another.
729: */
1.260 yamt 730: static void
1.309 ad 731: insmntque(vnode_t *vp, struct mount *mp)
1.29 cgd 732: {
1.327 ad 733: struct mount *omp;
1.29 cgd 734:
1.103 sommerfe 735: #ifdef DIAGNOSTIC
736: if ((mp != NULL) &&
1.207 dbj 737: (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113 fvdl 738: vp->v_tag != VT_VFS) {
1.103 sommerfe 739: panic("insmntque into dying filesystem");
740: }
741: #endif
1.217 junyoung 742:
1.309 ad 743: mutex_enter(&mntvnode_lock);
1.29 cgd 744: /*
745: * Delete from old mount point vnode list, if on one.
746: */
1.327 ad 747: if ((omp = vp->v_mount) != NULL)
1.272 reinoud 748: TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29 cgd 749: /*
1.327 ad 750: * Insert into list of vnodes for the new mount point, if
751: * available. The caller must take a reference on the mount
752: * structure and donate to the vnode.
1.29 cgd 753: */
1.279 pooka 754: if ((vp->v_mount = mp) != NULL)
755: TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.309 ad 756: mutex_exit(&mntvnode_lock);
1.327 ad 757:
758: if (omp != NULL) {
759: /* Release reference to old mount. */
1.344 ad 760: vfs_destroy(omp);
1.327 ad 761: }
1.29 cgd 762: }
763:
764: /*
1.353 pooka 765: * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
766: * recycled.
767: */
768: void
769: vwait(vnode_t *vp, int flags)
770: {
771:
772: KASSERT(mutex_owned(&vp->v_interlock));
773: KASSERT(vp->v_usecount != 0);
774:
775: while ((vp->v_iflag & flags) != 0)
776: cv_wait(&vp->v_cv, &vp->v_interlock);
777: }
778:
779: /*
780: * Insert a marker vnode into a mount's vnode list, after the
781: * specified vnode. mntvnode_lock must be held.
782: */
783: void
784: vmark(vnode_t *mvp, vnode_t *vp)
785: {
786: struct mount *mp;
787:
788: mp = mvp->v_mount;
789:
790: KASSERT(mutex_owned(&mntvnode_lock));
791: KASSERT((mvp->v_iflag & VI_MARKER) != 0);
792: KASSERT(vp->v_mount == mp);
793:
794: TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
795: }
796:
797: /*
798: * Remove a marker vnode from a mount's vnode list, and return
799: * a pointer to the next vnode in the list. mntvnode_lock must
800: * be held.
801: */
802: vnode_t *
803: vunmark(vnode_t *mvp)
804: {
805: vnode_t *vp;
806: struct mount *mp;
807:
808: mp = mvp->v_mount;
809:
810: KASSERT(mutex_owned(&mntvnode_lock));
811: KASSERT((mvp->v_iflag & VI_MARKER) != 0);
812:
813: vp = TAILQ_NEXT(mvp, v_mntvnodes);
814: TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
815:
816: KASSERT(vp == NULL || vp->v_mount == mp);
817:
818: return vp;
819: }
820:
821: /*
822: * Update outstanding I/O count and do wakeup if requested.
823: */
824: void
825: vwakeup(struct buf *bp)
826: {
827: struct vnode *vp;
828:
829: if ((vp = bp->b_vp) == NULL)
830: return;
831:
832: KASSERT(bp->b_objlock == &vp->v_interlock);
833: KASSERT(mutex_owned(bp->b_objlock));
834:
835: if (--vp->v_numoutput < 0)
836: panic("vwakeup: neg numoutput, vp %p", vp);
837: if (vp->v_numoutput == 0)
838: cv_broadcast(&vp->v_cv);
839: }
840:
841: /*
842: * Flush out and invalidate all buffers associated with a vnode.
843: * Called with the underlying vnode locked, which should prevent new dirty
844: * buffers from being queued.
845: */
846: int
847: vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
848: bool catch, int slptimeo)
849: {
850: struct buf *bp, *nbp;
851: int error;
852: int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
853: (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
854:
855: /* XXXUBC this doesn't look at flags or slp* */
856: mutex_enter(&vp->v_interlock);
857: error = VOP_PUTPAGES(vp, 0, 0, flushflags);
858: if (error) {
859: return error;
860: }
861:
862: if (flags & V_SAVE) {
863: error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
864: if (error)
865: return (error);
866: KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
867: }
868:
869: mutex_enter(&bufcache_lock);
870: restart:
871: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
872: nbp = LIST_NEXT(bp, b_vnbufs);
873: error = bbusy(bp, catch, slptimeo, NULL);
874: if (error != 0) {
875: if (error == EPASSTHROUGH)
876: goto restart;
877: mutex_exit(&bufcache_lock);
878: return (error);
879: }
880: brelsel(bp, BC_INVAL | BC_VFLUSH);
881: }
882:
883: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
884: nbp = LIST_NEXT(bp, b_vnbufs);
885: error = bbusy(bp, catch, slptimeo, NULL);
886: if (error != 0) {
887: if (error == EPASSTHROUGH)
888: goto restart;
889: mutex_exit(&bufcache_lock);
890: return (error);
891: }
892: /*
893: * XXX Since there are no node locks for NFS, I believe
894: * there is a slight chance that a delayed write will
895: * occur while sleeping just above, so check for it.
896: */
897: if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
898: #ifdef DEBUG
899: printf("buffer still DELWRI\n");
900: #endif
901: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
902: mutex_exit(&bufcache_lock);
903: VOP_BWRITE(bp);
904: mutex_enter(&bufcache_lock);
905: goto restart;
906: }
907: brelsel(bp, BC_INVAL | BC_VFLUSH);
908: }
909:
910: #ifdef DIAGNOSTIC
911: if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
912: panic("vinvalbuf: flush failed, vp %p", vp);
913: #endif
914:
915: mutex_exit(&bufcache_lock);
916:
917: return (0);
918: }
919:
920: /*
921: * Destroy any in core blocks past the truncation length.
922: * Called with the underlying vnode locked, which should prevent new dirty
923: * buffers from being queued.
924: */
925: int
926: vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
927: {
928: struct buf *bp, *nbp;
929: int error;
930: voff_t off;
931:
932: off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
933: mutex_enter(&vp->v_interlock);
934: error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
935: if (error) {
936: return error;
937: }
938:
939: mutex_enter(&bufcache_lock);
940: restart:
941: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
942: nbp = LIST_NEXT(bp, b_vnbufs);
943: if (bp->b_lblkno < lbn)
944: continue;
945: error = bbusy(bp, catch, slptimeo, NULL);
946: if (error != 0) {
947: if (error == EPASSTHROUGH)
948: goto restart;
949: mutex_exit(&bufcache_lock);
950: return (error);
951: }
952: brelsel(bp, BC_INVAL | BC_VFLUSH);
953: }
954:
955: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
956: nbp = LIST_NEXT(bp, b_vnbufs);
957: if (bp->b_lblkno < lbn)
958: continue;
959: error = bbusy(bp, catch, slptimeo, NULL);
960: if (error != 0) {
961: if (error == EPASSTHROUGH)
962: goto restart;
963: mutex_exit(&bufcache_lock);
964: return (error);
965: }
966: brelsel(bp, BC_INVAL | BC_VFLUSH);
967: }
968: mutex_exit(&bufcache_lock);
969:
970: return (0);
971: }
972:
973: /*
974: * Flush all dirty buffers from a vnode.
975: * Called with the underlying vnode locked, which should prevent new dirty
976: * buffers from being queued.
977: */
978: void
979: vflushbuf(struct vnode *vp, int sync)
980: {
981: struct buf *bp, *nbp;
982: int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
983: bool dirty;
984:
985: mutex_enter(&vp->v_interlock);
986: (void) VOP_PUTPAGES(vp, 0, 0, flags);
987:
988: loop:
989: mutex_enter(&bufcache_lock);
990: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
991: nbp = LIST_NEXT(bp, b_vnbufs);
992: if ((bp->b_cflags & BC_BUSY))
993: continue;
994: if ((bp->b_oflags & BO_DELWRI) == 0)
995: panic("vflushbuf: not dirty, bp %p", bp);
996: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
997: mutex_exit(&bufcache_lock);
998: /*
999: * Wait for I/O associated with indirect blocks to complete,
1000: * since there is no way to quickly wait for them below.
1001: */
1002: if (bp->b_vp == vp || sync == 0)
1003: (void) bawrite(bp);
1004: else
1005: (void) bwrite(bp);
1006: goto loop;
1007: }
1008: mutex_exit(&bufcache_lock);
1009:
1010: if (sync == 0)
1011: return;
1012:
1013: mutex_enter(&vp->v_interlock);
1014: while (vp->v_numoutput != 0)
1015: cv_wait(&vp->v_cv, &vp->v_interlock);
1016: dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
1017: mutex_exit(&vp->v_interlock);
1018:
1019: if (dirty) {
1020: vprint("vflushbuf: dirty", vp);
1021: goto loop;
1022: }
1023: }
1024:
1025: /*
1.29 cgd 1026: * Create a vnode for a block device.
1.59 thorpej 1027: * Used for root filesystem and swap areas.
1.29 cgd 1028: * Also used for memory file system special devices.
1029: */
1.50 christos 1030: int
1.309 ad 1031: bdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 1032: {
1.30 mycroft 1033:
1034: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 1035: }
1036:
1037: /*
1038: * Create a vnode for a character device.
1039: * Used for kernfs and some console handling.
1040: */
1.50 christos 1041: int
1.309 ad 1042: cdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 1043: {
1.30 mycroft 1044:
1045: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 1046: }
1047:
1048: /*
1.353 pooka 1049: * Associate a buffer with a vnode. There must already be a hold on
1050: * the vnode.
1051: */
1052: void
1053: bgetvp(struct vnode *vp, struct buf *bp)
1054: {
1055:
1056: KASSERT(bp->b_vp == NULL);
1057: KASSERT(bp->b_objlock == &buffer_lock);
1058: KASSERT(mutex_owned(&vp->v_interlock));
1059: KASSERT(mutex_owned(&bufcache_lock));
1060: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1061: KASSERT(!cv_has_waiters(&bp->b_done));
1062:
1063: vholdl(vp);
1064: bp->b_vp = vp;
1065: if (vp->v_type == VBLK || vp->v_type == VCHR)
1066: bp->b_dev = vp->v_rdev;
1067: else
1068: bp->b_dev = NODEV;
1069:
1070: /*
1071: * Insert onto list for new vnode.
1072: */
1073: bufinsvn(bp, &vp->v_cleanblkhd);
1074: bp->b_objlock = &vp->v_interlock;
1075: }
1076:
1077: /*
1078: * Disassociate a buffer from a vnode.
1079: */
1080: void
1081: brelvp(struct buf *bp)
1082: {
1083: struct vnode *vp = bp->b_vp;
1084:
1085: KASSERT(vp != NULL);
1086: KASSERT(bp->b_objlock == &vp->v_interlock);
1087: KASSERT(mutex_owned(&vp->v_interlock));
1088: KASSERT(mutex_owned(&bufcache_lock));
1089: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1090: KASSERT(!cv_has_waiters(&bp->b_done));
1091:
1092: /*
1093: * Delete from old vnode list, if on one.
1094: */
1095: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1096: bufremvn(bp);
1097:
1098: if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) &&
1099: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1100: vp->v_iflag &= ~VI_WRMAPDIRTY;
1101: vn_syncer_remove_from_worklist(vp);
1102: }
1103:
1104: bp->b_objlock = &buffer_lock;
1105: bp->b_vp = NULL;
1106: holdrelel(vp);
1107: }
1108:
1109: /*
1110: * Reassign a buffer from one vnode list to another.
1111: * The list reassignment must be within the same vnode.
1112: * Used to assign file specific control information
1113: * (indirect blocks) to the list to which they belong.
1114: */
1115: void
1116: reassignbuf(struct buf *bp, struct vnode *vp)
1117: {
1118: struct buflists *listheadp;
1119: int delayx;
1120:
1121: KASSERT(mutex_owned(&bufcache_lock));
1122: KASSERT(bp->b_objlock == &vp->v_interlock);
1123: KASSERT(mutex_owned(&vp->v_interlock));
1124: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1125:
1126: /*
1127: * Delete from old vnode list, if on one.
1128: */
1129: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1130: bufremvn(bp);
1131:
1132: /*
1133: * If dirty, put on list of dirty buffers;
1134: * otherwise insert onto list of clean buffers.
1135: */
1136: if ((bp->b_oflags & BO_DELWRI) == 0) {
1137: listheadp = &vp->v_cleanblkhd;
1138: if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
1139: (vp->v_iflag & VI_ONWORKLST) &&
1140: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1141: vp->v_iflag &= ~VI_WRMAPDIRTY;
1142: vn_syncer_remove_from_worklist(vp);
1143: }
1144: } else {
1145: listheadp = &vp->v_dirtyblkhd;
1146: if ((vp->v_iflag & VI_ONWORKLST) == 0) {
1147: switch (vp->v_type) {
1148: case VDIR:
1149: delayx = dirdelay;
1150: break;
1151: case VBLK:
1152: if (vp->v_specmountpoint != NULL) {
1153: delayx = metadelay;
1154: break;
1155: }
1156: /* fall through */
1157: default:
1158: delayx = filedelay;
1159: break;
1160: }
1161: if (!vp->v_mount ||
1162: (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1163: vn_syncer_add_to_worklist(vp, delayx);
1164: }
1165: }
1166: bufinsvn(bp, listheadp);
1167: }
1168:
1169: /*
1.29 cgd 1170: * Create a vnode for a device.
1171: * Used by bdevvp (block device) for root file system etc.,
1172: * and by cdevvp (character device) for console and kernfs.
1173: */
1.260 yamt 1174: static int
1.309 ad 1175: getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
1.29 cgd 1176: {
1.309 ad 1177: vnode_t *vp;
1178: vnode_t *nvp;
1.29 cgd 1179: int error;
1180:
1.80 fvdl 1181: if (dev == NODEV) {
1.302 ad 1182: *vpp = NULL;
1.29 cgd 1183: return (0);
1.80 fvdl 1184: }
1.50 christos 1185: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 1186: if (error) {
1.302 ad 1187: *vpp = NULL;
1.29 cgd 1188: return (error);
1189: }
1190: vp = nvp;
1191: vp->v_type = type;
1.309 ad 1192: vp->v_vflag |= VV_MPSAFE;
1.297 pooka 1193: uvm_vnp_setsize(vp, 0);
1.318 ad 1194: spec_node_init(vp, dev);
1.29 cgd 1195: *vpp = vp;
1196: return (0);
1197: }
1198:
1199: /*
1.349 ad 1200: * Try to gain a reference to a vnode, without acquiring its interlock.
1201: * The caller must hold a lock that will prevent the vnode from being
1202: * recycled or freed.
1203: */
1204: bool
1205: vtryget(vnode_t *vp)
1206: {
1207: u_int use, next;
1208:
1209: /*
1210: * If the vnode is being freed, don't make life any harder
1211: * for vclean() by adding another reference without waiting.
1212: * This is not strictly necessary, but we'll do it anyway.
1213: */
1214: if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) {
1215: return false;
1216: }
1217: for (use = vp->v_usecount;; use = next) {
1218: if (use == 0) {
1219: /* Need interlock held if first reference. */
1220: return false;
1221: }
1222: next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
1223: if (__predict_true(next == use)) {
1224: return true;
1225: }
1226: }
1227: }
1228:
1229: /*
1.29 cgd 1230: * Grab a particular vnode from the free list, increment its
1.83 fvdl 1231: * reference count and lock it. If the vnode lock bit is set the
1232: * vnode is being eliminated in vgone. In that case, we can not
1233: * grab the vnode, so the process is awakened when the transition is
1234: * completed, and an error returned to indicate that the vnode is no
1235: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 1236: */
1.30 mycroft 1237: int
1.309 ad 1238: vget(vnode_t *vp, int flags)
1.29 cgd 1239: {
1.175 perseant 1240: int error;
1.29 cgd 1241:
1.309 ad 1242: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1243:
1244: if ((flags & LK_INTERLOCK) == 0)
1245: mutex_enter(&vp->v_interlock);
1246:
1247: /*
1248: * Before adding a reference, we must remove the vnode
1249: * from its freelist.
1250: */
1251: if (vp->v_usecount == 0) {
1.350 ad 1252: vremfree(vp);
1.346 ad 1253: vp->v_usecount = 1;
1254: } else {
1255: atomic_inc_uint(&vp->v_usecount);
1.309 ad 1256: }
1257:
1.30 mycroft 1258: /*
1259: * If the vnode is in the process of being cleaned out for
1260: * another use, we wait for the cleaning to finish and then
1.312 ad 1261: * return failure. Cleaning is determined by checking if
1262: * the VI_XLOCK or VI_FREEING flags are set.
1.80 fvdl 1263: */
1.312 ad 1264: if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
1.313 ad 1265: if ((flags & LK_NOWAIT) != 0) {
1.324 pooka 1266: vrelel(vp, 0);
1.142 chs 1267: return EBUSY;
1268: }
1.312 ad 1269: vwait(vp, VI_XLOCK | VI_FREEING);
1.324 pooka 1270: vrelel(vp, 0);
1.313 ad 1271: return ENOENT;
1.29 cgd 1272: }
1.80 fvdl 1273: if (flags & LK_TYPE_MASK) {
1.313 ad 1274: error = vn_lock(vp, flags | LK_INTERLOCK);
1275: if (error != 0) {
1.257 yamt 1276: vrele(vp);
1.113 fvdl 1277: }
1.313 ad 1278: return error;
1.80 fvdl 1279: }
1.309 ad 1280: mutex_exit(&vp->v_interlock);
1.313 ad 1281: return 0;
1.29 cgd 1282: }
1283:
1284: /*
1285: * vput(), just unlock and vrele()
1286: */
1287: void
1.309 ad 1288: vput(vnode_t *vp)
1.29 cgd 1289: {
1.30 mycroft 1290:
1.309 ad 1291: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1292:
1293: VOP_UNLOCK(vp, 0);
1294: vrele(vp);
1.29 cgd 1295: }
1296:
1297: /*
1.346 ad 1298: * Try to drop reference on a vnode. Abort if we are releasing the
1.357.2.1 skrll 1299: * last reference. Note: this _must_ succeed if not the last reference.
1.346 ad 1300: */
1301: static inline bool
1302: vtryrele(vnode_t *vp)
1303: {
1304: u_int use, next;
1305:
1306: for (use = vp->v_usecount;; use = next) {
1307: if (use == 1) {
1308: return false;
1309: }
1310: next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
1311: if (__predict_true(next == use)) {
1312: return true;
1313: }
1314: }
1315: }
1316:
1317: /*
1.309 ad 1318: * Vnode release. If reference count drops to zero, call inactive
1319: * routine and either return to freelist or free to the pool.
1.29 cgd 1320: */
1.309 ad 1321: void
1.324 pooka 1322: vrelel(vnode_t *vp, int flags)
1.29 cgd 1323: {
1.309 ad 1324: bool recycle, defer;
1325: int error;
1326:
1327: KASSERT(mutex_owned(&vp->v_interlock));
1328: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.315 ad 1329: KASSERT(vp->v_freelisthd == NULL);
1.29 cgd 1330:
1.357.2.1 skrll 1331: if (__predict_false(vp->v_op == dead_vnodeop_p &&
1332: (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
1.309 ad 1333: vpanic(vp, "dead but not clean");
1334: }
1335:
1336: /*
1337: * If not the last reference, just drop the reference count
1338: * and unlock.
1339: */
1.346 ad 1340: if (vtryrele(vp)) {
1.309 ad 1341: vp->v_iflag |= VI_INACTREDO;
1342: mutex_exit(&vp->v_interlock);
1.29 cgd 1343: return;
1.80 fvdl 1344: }
1.309 ad 1345: if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
1.357.2.1 skrll 1346: vpanic(vp, "vrelel: bad ref count");
1.29 cgd 1347: }
1.309 ad 1348:
1.357.2.1 skrll 1349: KASSERT((vp->v_iflag & VI_XLOCK) == 0);
1350:
1.30 mycroft 1351: /*
1.309 ad 1352: * If not clean, deactivate the vnode, but preserve
1353: * our reference across the call to VOP_INACTIVE().
1.30 mycroft 1354: */
1.309 ad 1355: retry:
1356: if ((vp->v_iflag & VI_CLEAN) == 0) {
1357: recycle = false;
1.346 ad 1358: vp->v_iflag |= VI_INACTNOW;
1359:
1.309 ad 1360: /*
1361: * XXX This ugly block can be largely eliminated if
1362: * locking is pushed down into the file systems.
1363: */
1364: if (curlwp == uvm.pagedaemon_lwp) {
1365: /* The pagedaemon can't wait around; defer. */
1366: defer = true;
1367: } else if (curlwp == vrele_lwp) {
1368: /* We have to try harder. */
1369: vp->v_iflag &= ~VI_INACTREDO;
1370: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1371: LK_RETRY);
1372: if (error != 0) {
1373: /* XXX */
1374: vpanic(vp, "vrele: unable to lock %p");
1375: }
1376: defer = false;
1377: } else if ((vp->v_iflag & VI_LAYER) != 0) {
1378: /*
1379: * Acquiring the stack's lock in vclean() even
1380: * for an honest vput/vrele is dangerous because
1381: * our caller may hold other vnode locks; defer.
1382: */
1383: defer = true;
1384: } else {
1385: /* If we can't acquire the lock, then defer. */
1386: vp->v_iflag &= ~VI_INACTREDO;
1387: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1388: LK_NOWAIT);
1389: if (error != 0) {
1390: defer = true;
1391: mutex_enter(&vp->v_interlock);
1392: } else {
1393: defer = false;
1394: }
1395: }
1396:
1397: if (defer) {
1398: /*
1399: * Defer reclaim to the kthread; it's not safe to
1400: * clean it here. We donate it our last reference.
1401: */
1402: KASSERT(mutex_owned(&vp->v_interlock));
1403: KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
1.346 ad 1404: vp->v_iflag &= ~VI_INACTNOW;
1.309 ad 1405: vp->v_iflag |= VI_INACTPEND;
1406: mutex_enter(&vrele_lock);
1407: TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
1408: if (++vrele_pending > (desiredvnodes >> 8))
1409: cv_signal(&vrele_cv);
1410: mutex_exit(&vrele_lock);
1411: mutex_exit(&vp->v_interlock);
1412: return;
1413: }
1414:
1.318 ad 1415: #ifdef DIAGNOSTIC
1.321 ad 1416: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1417: vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1.318 ad 1418: vprint("vrelel: missing VOP_CLOSE()", vp);
1419: }
1420: #endif
1421:
1.309 ad 1422: /*
1.312 ad 1423: * The vnode can gain another reference while being
1424: * deactivated. If VOP_INACTIVE() indicates that
1425: * the described file has been deleted, then recycle
1426: * the vnode irrespective of additional references.
1427: * Another thread may be waiting to re-use the on-disk
1428: * inode.
1429: *
1430: * Note that VOP_INACTIVE() will drop the vnode lock.
1.309 ad 1431: */
1432: VOP_INACTIVE(vp, &recycle);
1433: mutex_enter(&vp->v_interlock);
1.346 ad 1434: vp->v_iflag &= ~VI_INACTNOW;
1.312 ad 1435: if (!recycle) {
1.346 ad 1436: if (vtryrele(vp)) {
1.312 ad 1437: mutex_exit(&vp->v_interlock);
1438: return;
1439: }
1.309 ad 1440:
1.312 ad 1441: /*
1442: * If we grew another reference while
1443: * VOP_INACTIVE() was underway, retry.
1444: */
1445: if ((vp->v_iflag & VI_INACTREDO) != 0) {
1446: goto retry;
1447: }
1.309 ad 1448: }
1449:
1450: /* Take care of space accounting. */
1451: if (vp->v_iflag & VI_EXECMAP) {
1452: atomic_add_int(&uvmexp.execpages,
1453: -vp->v_uobj.uo_npages);
1454: atomic_add_int(&uvmexp.filepages,
1455: vp->v_uobj.uo_npages);
1456: }
1.346 ad 1457: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
1.309 ad 1458: vp->v_vflag &= ~VV_MAPPED;
1459:
1460: /*
1461: * Recycle the vnode if the file is now unused (unlinked),
1462: * otherwise just free it.
1463: */
1464: if (recycle) {
1465: vclean(vp, DOCLOSE);
1466: }
1467: KASSERT(vp->v_usecount > 0);
1.298 pooka 1468: }
1.309 ad 1469:
1.346 ad 1470: if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
1.309 ad 1471: /* Gained another reference while being reclaimed. */
1472: mutex_exit(&vp->v_interlock);
1473: return;
1.147 chs 1474: }
1.298 pooka 1475:
1.309 ad 1476: if ((vp->v_iflag & VI_CLEAN) != 0) {
1477: /*
1478: * It's clean so destroy it. It isn't referenced
1479: * anywhere since it has been reclaimed.
1480: */
1481: KASSERT(vp->v_holdcnt == 0);
1482: KASSERT(vp->v_writecount == 0);
1483: mutex_exit(&vp->v_interlock);
1484: insmntque(vp, NULL);
1.318 ad 1485: if (vp->v_type == VBLK || vp->v_type == VCHR) {
1486: spec_node_destroy(vp);
1487: }
1.310 pooka 1488: vnfree(vp);
1.298 pooka 1489: } else {
1.309 ad 1490: /*
1491: * Otherwise, put it back onto the freelist. It
1492: * can't be destroyed while still associated with
1493: * a file system.
1494: */
1495: mutex_enter(&vnode_free_list_lock);
1496: if (vp->v_holdcnt > 0) {
1497: vp->v_freelisthd = &vnode_hold_list;
1498: } else {
1499: vp->v_freelisthd = &vnode_free_list;
1500: }
1501: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1502: mutex_exit(&vnode_free_list_lock);
1503: mutex_exit(&vp->v_interlock);
1.298 pooka 1504: }
1505: }
1506:
1507: void
1.309 ad 1508: vrele(vnode_t *vp)
1.298 pooka 1509: {
1510:
1.309 ad 1511: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1512:
1.346 ad 1513: if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
1514: return;
1515: }
1.309 ad 1516: mutex_enter(&vp->v_interlock);
1.324 pooka 1517: vrelel(vp, 0);
1.298 pooka 1518: }
1519:
1.309 ad 1520: static void
1521: vrele_thread(void *cookie)
1.298 pooka 1522: {
1.309 ad 1523: vnode_t *vp;
1.298 pooka 1524:
1.309 ad 1525: for (;;) {
1526: mutex_enter(&vrele_lock);
1527: while (TAILQ_EMPTY(&vrele_list)) {
1.351 ad 1528: vrele_gen++;
1529: cv_broadcast(&vrele_cv);
1.309 ad 1530: cv_timedwait(&vrele_cv, &vrele_lock, hz);
1531: }
1532: vp = TAILQ_FIRST(&vrele_list);
1533: TAILQ_REMOVE(&vrele_list, vp, v_freelist);
1534: vrele_pending--;
1535: mutex_exit(&vrele_lock);
1536:
1537: /*
1538: * If not the last reference, then ignore the vnode
1539: * and look for more work.
1540: */
1541: mutex_enter(&vp->v_interlock);
1542: KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
1543: vp->v_iflag &= ~VI_INACTPEND;
1.324 pooka 1544: vrelel(vp, 0);
1.309 ad 1545: }
1.29 cgd 1546: }
1547:
1548: /*
1549: * Page or buffer structure gets a reference.
1.258 chs 1550: * Called with v_interlock held.
1.29 cgd 1551: */
1.30 mycroft 1552: void
1.309 ad 1553: vholdl(vnode_t *vp)
1.29 cgd 1554: {
1555:
1.309 ad 1556: KASSERT(mutex_owned(&vp->v_interlock));
1557: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1558:
1559: if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
1560: mutex_enter(&vnode_free_list_lock);
1561: KASSERT(vp->v_freelisthd == &vnode_free_list);
1562: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1563: vp->v_freelisthd = &vnode_hold_list;
1564: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1565: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1566: }
1.29 cgd 1567: }
1568:
1569: /*
1570: * Page or buffer structure frees a reference.
1.258 chs 1571: * Called with v_interlock held.
1.29 cgd 1572: */
1.30 mycroft 1573: void
1.309 ad 1574: holdrelel(vnode_t *vp)
1.29 cgd 1575: {
1576:
1.309 ad 1577: KASSERT(mutex_owned(&vp->v_interlock));
1578: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.142 chs 1579:
1.309 ad 1580: if (vp->v_holdcnt <= 0) {
1581: vpanic(vp, "holdrelel: holdcnt vp %p");
1582: }
1.142 chs 1583:
1.309 ad 1584: vp->v_holdcnt--;
1585: if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1586: mutex_enter(&vnode_free_list_lock);
1587: KASSERT(vp->v_freelisthd == &vnode_hold_list);
1588: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1589: vp->v_freelisthd = &vnode_free_list;
1590: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1591: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1592: }
1.81 ross 1593: }
1594:
1595: /*
1.309 ad 1596: * Vnode reference, where a reference is already held by some other
1597: * object (for example, a file structure).
1.81 ross 1598: */
1599: void
1.309 ad 1600: vref(vnode_t *vp)
1.81 ross 1601: {
1602:
1.309 ad 1603: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.346 ad 1604: KASSERT(vp->v_usecount != 0);
1.309 ad 1605:
1.346 ad 1606: atomic_inc_uint(&vp->v_usecount);
1.29 cgd 1607: }
1608:
1609: /*
1610: * Remove any vnodes in the vnode table belonging to mount point mp.
1611: *
1.183 yamt 1612: * If FORCECLOSE is not specified, there should not be any active ones,
1.29 cgd 1613: * return error if any are found (nb: this is a user error, not a
1.183 yamt 1614: * system error). If FORCECLOSE is specified, detach any active vnodes
1.29 cgd 1615: * that are found.
1.183 yamt 1616: *
1617: * If WRITECLOSE is set, only flush out regular file vnodes open for
1618: * writing.
1619: *
1620: * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29 cgd 1621: */
1.30 mycroft 1622: #ifdef DEBUG
1623: int busyprt = 0; /* print out busy vnodes */
1624: struct ctldebug debug1 = { "busyprt", &busyprt };
1625: #endif
1.29 cgd 1626:
1.334 ad 1627: static vnode_t *
1628: vflushnext(vnode_t *mvp, int *when)
1629: {
1630:
1631: if (hardclock_ticks > *when) {
1632: mutex_exit(&mntvnode_lock);
1633: yield();
1634: mutex_enter(&mntvnode_lock);
1635: *when = hardclock_ticks + hz / 10;
1636: }
1637:
1638: return vunmark(mvp);
1639: }
1640:
1.50 christos 1641: int
1.309 ad 1642: vflush(struct mount *mp, vnode_t *skipvp, int flags)
1.29 cgd 1643: {
1.309 ad 1644: vnode_t *vp, *mvp;
1.351 ad 1645: int busy = 0, when = 0, gen;
1646:
1647: /*
1648: * First, flush out any vnode references from vrele_list.
1649: */
1650: mutex_enter(&vrele_lock);
1651: gen = vrele_gen;
1.352 pooka 1652: while (vrele_pending && gen == vrele_gen) {
1.351 ad 1653: cv_broadcast(&vrele_cv);
1654: cv_wait(&vrele_cv, &vrele_lock);
1.352 pooka 1655: }
1.351 ad 1656: mutex_exit(&vrele_lock);
1.29 cgd 1657:
1.309 ad 1658: /* Allocate a marker vnode. */
1.310 pooka 1659: if ((mvp = vnalloc(mp)) == NULL)
1.309 ad 1660: return (ENOMEM);
1661:
1.273 reinoud 1662: /*
1663: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1664: * and vclean() are called
1665: */
1.351 ad 1666: mutex_enter(&mntvnode_lock);
1.334 ad 1667: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
1668: vp = vflushnext(mvp, &when)) {
1.309 ad 1669: vmark(mvp, vp);
1670: if (vp->v_mount != mp || vismarker(vp))
1671: continue;
1.29 cgd 1672: /*
1673: * Skip over a selected vnode.
1674: */
1675: if (vp == skipvp)
1676: continue;
1.309 ad 1677: mutex_enter(&vp->v_interlock);
1.29 cgd 1678: /*
1.315 ad 1679: * Ignore clean but still referenced vnodes.
1680: */
1681: if ((vp->v_iflag & VI_CLEAN) != 0) {
1682: mutex_exit(&vp->v_interlock);
1683: continue;
1684: }
1685: /*
1.309 ad 1686: * Skip over a vnodes marked VSYSTEM.
1.29 cgd 1687: */
1.302 ad 1688: if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1.309 ad 1689: mutex_exit(&vp->v_interlock);
1.29 cgd 1690: continue;
1.80 fvdl 1691: }
1.29 cgd 1692: /*
1.30 mycroft 1693: * If WRITECLOSE is set, only flush out regular file
1694: * vnodes open for writing.
1695: */
1696: if ((flags & WRITECLOSE) &&
1.92 thorpej 1697: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1.309 ad 1698: mutex_exit(&vp->v_interlock);
1.30 mycroft 1699: continue;
1.92 thorpej 1700: }
1.30 mycroft 1701: /*
1.29 cgd 1702: * With v_usecount == 0, all we need to do is clear
1703: * out the vnode data structures and we are done.
1704: */
1705: if (vp->v_usecount == 0) {
1.309 ad 1706: mutex_exit(&mntvnode_lock);
1707: vremfree(vp);
1.350 ad 1708: vp->v_usecount = 1;
1.309 ad 1709: vclean(vp, DOCLOSE);
1.324 pooka 1710: vrelel(vp, 0);
1.309 ad 1711: mutex_enter(&mntvnode_lock);
1.29 cgd 1712: continue;
1713: }
1714: /*
1.30 mycroft 1715: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 1716: * For block or character devices, revert to an
1.318 ad 1717: * anonymous device. For all other files, just
1718: * kill them.
1.29 cgd 1719: */
1720: if (flags & FORCECLOSE) {
1.309 ad 1721: mutex_exit(&mntvnode_lock);
1.346 ad 1722: atomic_inc_uint(&vp->v_usecount);
1.29 cgd 1723: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.309 ad 1724: vclean(vp, DOCLOSE);
1.324 pooka 1725: vrelel(vp, 0);
1.29 cgd 1726: } else {
1.309 ad 1727: vclean(vp, 0);
1.318 ad 1728: vp->v_op = spec_vnodeop_p; /* XXXSMP */
1.320 ad 1729: mutex_exit(&vp->v_interlock);
1730: /*
1731: * The vnode isn't clean, but still resides
1732: * on the mount list. Remove it. XXX This
1733: * is a bit dodgy.
1734: */
1735: insmntque(vp, NULL);
1736: vrele(vp);
1.29 cgd 1737: }
1.309 ad 1738: mutex_enter(&mntvnode_lock);
1.29 cgd 1739: continue;
1740: }
1.30 mycroft 1741: #ifdef DEBUG
1.29 cgd 1742: if (busyprt)
1743: vprint("vflush: busy vnode", vp);
1.30 mycroft 1744: #endif
1.309 ad 1745: mutex_exit(&vp->v_interlock);
1.29 cgd 1746: busy++;
1747: }
1.309 ad 1748: mutex_exit(&mntvnode_lock);
1.310 pooka 1749: vnfree(mvp);
1.29 cgd 1750: if (busy)
1751: return (EBUSY);
1752: return (0);
1753: }
1754:
1755: /*
1756: * Disassociate the underlying file system from a vnode.
1.309 ad 1757: *
1758: * Must be called with the interlock held, and will return with it held.
1.29 cgd 1759: */
1.309 ad 1760: void
1761: vclean(vnode_t *vp, int flags)
1.29 cgd 1762: {
1.309 ad 1763: lwp_t *l = curlwp;
1764: bool recycle, active;
1.318 ad 1765: int error;
1.29 cgd 1766:
1.309 ad 1767: KASSERT(mutex_owned(&vp->v_interlock));
1768: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1769: KASSERT(vp->v_usecount != 0);
1.166 chs 1770:
1.309 ad 1771: /* If cleaning is already in progress wait until done and return. */
1772: if (vp->v_iflag & VI_XLOCK) {
1773: vwait(vp, VI_XLOCK);
1774: return;
1775: }
1.166 chs 1776:
1.309 ad 1777: /* If already clean, nothing to do. */
1778: if ((vp->v_iflag & VI_CLEAN) != 0) {
1779: return;
1.112 mycroft 1780: }
1.87 pk 1781:
1.29 cgd 1782: /*
1.309 ad 1783: * Prevent the vnode from being recycled or brought into use
1784: * while we clean it out.
1.29 cgd 1785: */
1.302 ad 1786: vp->v_iflag |= VI_XLOCK;
1787: if (vp->v_iflag & VI_EXECMAP) {
1.307 ad 1788: atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1789: atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1.147 chs 1790: }
1.302 ad 1791: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1.309 ad 1792: active = (vp->v_usecount > 1);
1.142 chs 1793:
1.309 ad 1794: /* XXXAD should not lock vnode under layer */
1795: VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1.80 fvdl 1796:
1.98 wrstuden 1797: /*
1.142 chs 1798: * Clean out any cached data associated with the vnode.
1.318 ad 1799: * If purging an active vnode, it must be closed and
1800: * deactivated before being reclaimed. Note that the
1801: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1802: */
1.166 chs 1803: if (flags & DOCLOSE) {
1.256 christos 1804: error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.355 simonb 1805: if (error != 0) {
1806: /* XXX, fix vn_start_write's grab of mp and use that. */
1807:
1808: if (wapbl_vphaswapbl(vp))
1809: WAPBL_DISCARD(wapbl_vptomp(vp));
1.256 christos 1810: error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.355 simonb 1811: }
1.211 dbj 1812: KASSERT(error == 0);
1.302 ad 1813: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.318 ad 1814: if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1815: spec_node_revoke(vp);
1.231 mycroft 1816: }
1.166 chs 1817: }
1.29 cgd 1818: if (active) {
1.309 ad 1819: VOP_INACTIVE(vp, &recycle);
1.80 fvdl 1820: } else {
1821: /*
1822: * Any other processes trying to obtain this lock must first
1.302 ad 1823: * wait for VI_XLOCK to clear, then call the new lock operation.
1.80 fvdl 1824: */
1825: VOP_UNLOCK(vp, 0);
1.29 cgd 1826: }
1.142 chs 1827:
1.309 ad 1828: /* Disassociate the underlying file system from the vnode. */
1829: if (VOP_RECLAIM(vp)) {
1830: vpanic(vp, "vclean: cannot reclaim");
1.87 pk 1831: }
1.30 mycroft 1832:
1.169 chs 1833: KASSERT(vp->v_uobj.uo_npages == 0);
1.255 yamt 1834: if (vp->v_type == VREG && vp->v_ractx != NULL) {
1835: uvm_ra_freectx(vp->v_ractx);
1836: vp->v_ractx = NULL;
1837: }
1.80 fvdl 1838: cache_purge(vp);
1839:
1.309 ad 1840: /* Done with purge, notify sleepers of the grim news. */
1.357.2.1 skrll 1841: mutex_enter(&vp->v_interlock);
1.30 mycroft 1842: vp->v_op = dead_vnodeop_p;
1843: vp->v_tag = VT_NON;
1.309 ad 1844: vp->v_vnlock = &vp->v_lock;
1.332 ad 1845: KNOTE(&vp->v_klist, NOTE_REVOKE);
1.312 ad 1846: vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1.304 ad 1847: vp->v_vflag &= ~VV_LOCKSWORK;
1.319 ad 1848: if ((flags & DOCLOSE) != 0) {
1.318 ad 1849: vp->v_iflag |= VI_CLEAN;
1850: }
1.309 ad 1851: cv_broadcast(&vp->v_cv);
1852:
1853: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.29 cgd 1854: }
1855:
1856: /*
1.80 fvdl 1857: * Recycle an unused vnode to the front of the free list.
1858: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1859: */
1.80 fvdl 1860: int
1.309 ad 1861: vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1.217 junyoung 1862: {
1863:
1.309 ad 1864: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1865:
1866: mutex_enter(&vp->v_interlock);
1867: if (vp->v_usecount != 0) {
1868: mutex_exit(&vp->v_interlock);
1869: return (0);
1.29 cgd 1870: }
1.309 ad 1871: if (inter_lkp)
1872: mutex_exit(inter_lkp);
1873: vremfree(vp);
1.350 ad 1874: vp->v_usecount = 1;
1.309 ad 1875: vclean(vp, DOCLOSE);
1.324 pooka 1876: vrelel(vp, 0);
1.309 ad 1877: return (1);
1.29 cgd 1878: }
1879:
1880: /*
1.309 ad 1881: * Eliminate all activity associated with a vnode in preparation for
1882: * reuse. Drops a reference from the vnode.
1.29 cgd 1883: */
1884: void
1.309 ad 1885: vgone(vnode_t *vp)
1.80 fvdl 1886: {
1.166 chs 1887:
1.309 ad 1888: mutex_enter(&vp->v_interlock);
1889: vclean(vp, DOCLOSE);
1.324 pooka 1890: vrelel(vp, 0);
1.29 cgd 1891: }
1892:
1893: /*
1894: * Lookup a vnode by device number.
1895: */
1.50 christos 1896: int
1.309 ad 1897: vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1.29 cgd 1898: {
1.309 ad 1899: vnode_t *vp;
1.80 fvdl 1900: int rc = 0;
1.29 cgd 1901:
1.357.2.1 skrll 1902: mutex_enter(&device_lock);
1.318 ad 1903: for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1.29 cgd 1904: if (dev != vp->v_rdev || type != vp->v_type)
1905: continue;
1906: *vpp = vp;
1.80 fvdl 1907: rc = 1;
1908: break;
1.29 cgd 1909: }
1.357.2.1 skrll 1910: mutex_exit(&device_lock);
1.80 fvdl 1911: return (rc);
1.96 thorpej 1912: }
1913:
1914: /*
1915: * Revoke all the vnodes corresponding to the specified minor number
1916: * range (endpoints inclusive) of the specified major.
1917: */
1918: void
1.247 thorpej 1919: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 1920: {
1.316 ad 1921: vnode_t *vp, **vpp;
1922: dev_t dev;
1.96 thorpej 1923: int mn;
1924:
1.274 mrg 1925: vp = NULL; /* XXX gcc */
1926:
1.357.2.1 skrll 1927: mutex_enter(&device_lock);
1.316 ad 1928: for (mn = minl; mn <= minh; mn++) {
1929: dev = makedev(maj, mn);
1.318 ad 1930: vpp = &specfs_hash[SPECHASH(dev)];
1.316 ad 1931: for (vp = *vpp; vp != NULL;) {
1932: mutex_enter(&vp->v_interlock);
1933: if ((vp->v_iflag & VI_CLEAN) != 0 ||
1934: dev != vp->v_rdev || type != vp->v_type) {
1935: mutex_exit(&vp->v_interlock);
1936: vp = vp->v_specnext;
1937: continue;
1938: }
1.357.2.1 skrll 1939: mutex_exit(&device_lock);
1.316 ad 1940: if (vget(vp, LK_INTERLOCK) == 0) {
1941: VOP_REVOKE(vp, REVOKEALL);
1942: vrele(vp);
1943: }
1.357.2.1 skrll 1944: mutex_enter(&device_lock);
1.316 ad 1945: vp = *vpp;
1946: }
1947: }
1.357.2.1 skrll 1948: mutex_exit(&device_lock);
1.29 cgd 1949: }
1950:
1951: /*
1952: * Calculate the total number of references to a special device.
1953: */
1.30 mycroft 1954: int
1.309 ad 1955: vcount(vnode_t *vp)
1.29 cgd 1956: {
1957: int count;
1958:
1.357.2.1 skrll 1959: mutex_enter(&device_lock);
1.309 ad 1960: mutex_enter(&vp->v_interlock);
1.318 ad 1961: if (vp->v_specnode == NULL) {
1.309 ad 1962: count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
1963: mutex_exit(&vp->v_interlock);
1.357.2.1 skrll 1964: mutex_exit(&device_lock);
1.309 ad 1965: return (count);
1966: }
1967: mutex_exit(&vp->v_interlock);
1.318 ad 1968: count = vp->v_specnode->sn_dev->sd_opencnt;
1.357.2.1 skrll 1969: mutex_exit(&device_lock);
1.29 cgd 1970: return (count);
1971: }
1972:
1.101 mrg 1973: /*
1.316 ad 1974: * Eliminate all activity associated with the requested vnode
1975: * and with all vnodes aliased to the requested vnode.
1976: */
1977: void
1978: vrevoke(vnode_t *vp)
1979: {
1980: vnode_t *vq, **vpp;
1981: enum vtype type;
1982: dev_t dev;
1983:
1984: KASSERT(vp->v_usecount > 0);
1985:
1986: mutex_enter(&vp->v_interlock);
1987: if ((vp->v_iflag & VI_CLEAN) != 0) {
1988: mutex_exit(&vp->v_interlock);
1989: return;
1.357.2.2 skrll 1990: } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
1991: atomic_inc_uint(&vp->v_usecount);
1992: vclean(vp, DOCLOSE);
1993: vrelel(vp, 0);
1994: return;
1.316 ad 1995: } else {
1996: dev = vp->v_rdev;
1997: type = vp->v_type;
1998: mutex_exit(&vp->v_interlock);
1999: }
2000:
1.318 ad 2001: vpp = &specfs_hash[SPECHASH(dev)];
1.357.2.1 skrll 2002: mutex_enter(&device_lock);
1.316 ad 2003: for (vq = *vpp; vq != NULL;) {
1.333 ad 2004: /* If clean or being cleaned, then ignore it. */
2005: mutex_enter(&vq->v_interlock);
2006: if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
1.317 ad 2007: vq->v_rdev != dev || vq->v_type != type) {
1.333 ad 2008: mutex_exit(&vq->v_interlock);
1.316 ad 2009: vq = vq->v_specnext;
2010: continue;
2011: }
1.357.2.1 skrll 2012: mutex_exit(&device_lock);
1.350 ad 2013: if (vq->v_usecount == 0) {
1.317 ad 2014: vremfree(vq);
1.350 ad 2015: vq->v_usecount = 1;
2016: } else {
2017: atomic_inc_uint(&vq->v_usecount);
1.316 ad 2018: }
2019: vclean(vq, DOCLOSE);
1.324 pooka 2020: vrelel(vq, 0);
1.357.2.1 skrll 2021: mutex_enter(&device_lock);
1.316 ad 2022: vq = *vpp;
2023: }
1.357.2.1 skrll 2024: mutex_exit(&device_lock);
1.316 ad 2025: }
2026:
2027: /*
1.220 lukem 2028: * sysctl helper routine to return list of supported fstypes
2029: */
1.357.2.1 skrll 2030: int
1.220 lukem 2031: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2032: {
1.291 christos 2033: char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220 lukem 2034: char *where = oldp;
2035: struct vfsops *v;
2036: size_t needed, left, slen;
2037: int error, first;
2038:
2039: if (newp != NULL)
2040: return (EPERM);
2041: if (namelen != 0)
2042: return (EINVAL);
2043:
2044: first = 1;
2045: error = 0;
2046: needed = 0;
2047: left = *oldlenp;
2048:
1.311 ad 2049: sysctl_unlock();
1.302 ad 2050: mutex_enter(&vfs_list_lock);
1.220 lukem 2051: LIST_FOREACH(v, &vfs_list, vfs_list) {
2052: if (where == NULL)
2053: needed += strlen(v->vfs_name) + 1;
2054: else {
1.245 christos 2055: memset(bf, 0, sizeof(bf));
1.220 lukem 2056: if (first) {
1.245 christos 2057: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 2058: first = 0;
2059: } else {
1.245 christos 2060: bf[0] = ' ';
2061: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 2062: }
1.245 christos 2063: bf[sizeof(bf)-1] = '\0';
2064: slen = strlen(bf);
1.220 lukem 2065: if (left < slen + 1)
2066: break;
1.302 ad 2067: v->vfs_refcount++;
2068: mutex_exit(&vfs_list_lock);
1.354 pooka 2069: /* +1 to copy out the trailing NUL byte */
1.245 christos 2070: error = copyout(bf, where, slen + 1);
1.302 ad 2071: mutex_enter(&vfs_list_lock);
2072: v->vfs_refcount--;
1.220 lukem 2073: if (error)
2074: break;
2075: where += slen;
2076: needed += slen;
2077: left -= slen;
2078: }
2079: }
1.302 ad 2080: mutex_exit(&vfs_list_lock);
1.311 ad 2081: sysctl_relock();
1.220 lukem 2082: *oldlenp = needed;
2083: return (error);
2084: }
2085:
1.212 atatat 2086:
1.29 cgd 2087: int kinfo_vdebug = 1;
2088: int kinfo_vgetfailed;
2089: #define KINFO_VNODESLOP 10
2090: /*
2091: * Dump vnode list (via sysctl).
2092: * Copyout address of vnode followed by vnode.
2093: */
2094: /* ARGSUSED */
1.50 christos 2095: int
1.212 atatat 2096: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29 cgd 2097: {
1.212 atatat 2098: char *where = oldp;
2099: size_t *sizep = oldlenp;
1.80 fvdl 2100: struct mount *mp, *nmp;
1.311 ad 2101: vnode_t *vp, *mvp, vbuf;
1.80 fvdl 2102: char *bp = where, *savebp;
1.29 cgd 2103: char *ewhere;
2104: int error;
1.212 atatat 2105:
2106: if (namelen != 0)
2107: return (EOPNOTSUPP);
2108: if (newp != NULL)
2109: return (EPERM);
1.29 cgd 2110:
1.309 ad 2111: #define VPTRSZ sizeof(vnode_t *)
2112: #define VNODESZ sizeof(vnode_t)
1.29 cgd 2113: if (where == NULL) {
2114: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2115: return (0);
2116: }
2117: ewhere = where + *sizep;
1.80 fvdl 2118:
1.311 ad 2119: sysctl_unlock();
1.302 ad 2120: mutex_enter(&mountlist_lock);
1.177 matt 2121: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2122: mp = nmp) {
1.344 ad 2123: if (vfs_busy(mp, &nmp)) {
1.29 cgd 2124: continue;
1.80 fvdl 2125: }
1.29 cgd 2126: savebp = bp;
1.309 ad 2127: /* Allocate a marker vnode. */
1.357.2.3! skrll 2128: mvp = vnalloc(mp);
! 2129: /* Should never fail for mp != NULL */
! 2130: KASSERT(mvp != NULL);
1.309 ad 2131: mutex_enter(&mntvnode_lock);
2132: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
2133: vmark(mvp, vp);
1.29 cgd 2134: /*
2135: * Check that the vp is still associated with
2136: * this filesystem. RACE: could have been
2137: * recycled onto the same filesystem.
2138: */
1.309 ad 2139: if (vp->v_mount != mp || vismarker(vp))
2140: continue;
1.29 cgd 2141: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.309 ad 2142: (void)vunmark(mvp);
2143: mutex_exit(&mntvnode_lock);
1.310 pooka 2144: vnfree(mvp);
1.311 ad 2145: sysctl_relock();
1.29 cgd 2146: *sizep = bp - where;
2147: return (ENOMEM);
2148: }
1.311 ad 2149: memcpy(&vbuf, vp, VNODESZ);
1.309 ad 2150: mutex_exit(&mntvnode_lock);
1.357.2.2 skrll 2151: if ((error = copyout(&vp, bp, VPTRSZ)) ||
1.311 ad 2152: (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1.309 ad 2153: mutex_enter(&mntvnode_lock);
2154: (void)vunmark(mvp);
2155: mutex_exit(&mntvnode_lock);
1.310 pooka 2156: vnfree(mvp);
1.311 ad 2157: sysctl_relock();
1.29 cgd 2158: return (error);
1.309 ad 2159: }
1.29 cgd 2160: bp += VPTRSZ + VNODESZ;
1.309 ad 2161: mutex_enter(&mntvnode_lock);
1.29 cgd 2162: }
1.309 ad 2163: mutex_exit(&mntvnode_lock);
1.344 ad 2164: vnfree(mvp);
1.339 ad 2165: vfs_unbusy(mp, false, &nmp);
1.29 cgd 2166: }
1.302 ad 2167: mutex_exit(&mountlist_lock);
1.311 ad 2168: sysctl_relock();
1.29 cgd 2169:
2170: *sizep = bp - where;
2171: return (0);
1.30 mycroft 2172: }
2173:
2174: /*
1.309 ad 2175: * Remove clean vnodes from a mountpoint's vnode list.
2176: */
2177: void
2178: vfs_scrubvnlist(struct mount *mp)
2179: {
2180: vnode_t *vp, *nvp;
2181:
1.327 ad 2182: retry:
1.309 ad 2183: mutex_enter(&mntvnode_lock);
2184: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
2185: nvp = TAILQ_NEXT(vp, v_mntvnodes);
2186: mutex_enter(&vp->v_interlock);
1.315 ad 2187: if ((vp->v_iflag & VI_CLEAN) != 0) {
1.309 ad 2188: TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.315 ad 2189: vp->v_mount = NULL;
1.327 ad 2190: mutex_exit(&mntvnode_lock);
2191: mutex_exit(&vp->v_interlock);
1.344 ad 2192: vfs_destroy(mp);
1.327 ad 2193: goto retry;
1.315 ad 2194: }
1.309 ad 2195: mutex_exit(&vp->v_interlock);
2196: }
2197: mutex_exit(&mntvnode_lock);
2198: }
2199:
2200: /*
1.30 mycroft 2201: * Check to see if a filesystem is mounted on a block device.
2202: */
2203: int
1.309 ad 2204: vfs_mountedon(vnode_t *vp)
1.30 mycroft 2205: {
1.309 ad 2206: vnode_t *vq;
1.80 fvdl 2207: int error = 0;
1.30 mycroft 2208:
1.261 reinoud 2209: if (vp->v_type != VBLK)
2210: return ENOTBLK;
1.113 fvdl 2211: if (vp->v_specmountpoint != NULL)
1.30 mycroft 2212: return (EBUSY);
1.357.2.1 skrll 2213: mutex_enter(&device_lock);
1.318 ad 2214: for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
2215: vq = vq->v_specnext) {
2216: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
2217: continue;
2218: if (vq->v_specmountpoint != NULL) {
2219: error = EBUSY;
2220: break;
1.30 mycroft 2221: }
2222: }
1.357.2.1 skrll 2223: mutex_exit(&device_lock);
1.80 fvdl 2224: return (error);
1.30 mycroft 2225: }
2226:
1.35 ws 2227: /*
1.39 mycroft 2228: * Unmount all file systems.
2229: * We traverse the list in reverse order under the assumption that doing so
2230: * will avoid needing to worry about dependencies.
2231: */
1.357.2.3! skrll 2232: bool
1.256 christos 2233: vfs_unmountall(struct lwp *l)
1.39 mycroft 2234: {
1.123 augustss 2235: struct mount *mp, *nmp;
1.357.2.3! skrll 2236: bool any_error, progress;
! 2237: int error;
1.39 mycroft 2238:
1.235 lukem 2239: printf("unmounting file systems...");
1.357.2.3! skrll 2240: for (any_error = false, mp = CIRCLEQ_LAST(&mountlist);
1.325 dyoung 2241: !CIRCLEQ_EMPTY(&mountlist);
2242: mp = nmp) {
2243: nmp = CIRCLEQ_PREV(mp, mnt_list);
1.54 jtk 2244: #ifdef DEBUG
1.235 lukem 2245: printf("\nunmounting %s (%s)...",
1.56 christos 2246: mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54 jtk 2247: #endif
1.344 ad 2248: atomic_inc_uint(&mp->mnt_refcnt);
1.357.2.3! skrll 2249: if ((error = dounmount(mp, MNT_FORCE, l)) == 0)
! 2250: progress = true;
! 2251: else {
1.57 christos 2252: printf("unmount of %s failed with error %d\n",
1.40 mycroft 2253: mp->mnt_stat.f_mntonname, error);
1.357.2.3! skrll 2254: any_error = true;
1.40 mycroft 2255: }
1.39 mycroft 2256: }
1.235 lukem 2257: printf(" done\n");
1.357.2.3! skrll 2258: if (any_error)
1.57 christos 2259: printf("WARNING: some file systems would not unmount\n");
1.357.2.3! skrll 2260: return progress;
1.40 mycroft 2261: }
2262:
2263: /*
2264: * Sync and unmount file systems before shutting down.
2265: */
2266: void
1.247 thorpej 2267: vfs_shutdown(void)
1.40 mycroft 2268: {
1.265 skrll 2269: struct lwp *l;
1.40 mycroft 2270:
1.265 skrll 2271: /* XXX we're certainly not running in lwp0's context! */
2272: l = curlwp;
2273: if (l == NULL)
2274: l = &lwp0;
1.185 christos 2275:
1.70 cgd 2276: printf("syncing disks... ");
2277:
1.305 pooka 2278: /* remove user processes from run queue */
1.138 bouyer 2279: suspendsched();
1.40 mycroft 2280: (void) spl0();
2281:
1.128 sommerfe 2282: /* avoid coming back this way again if we panic. */
2283: doing_shutdown = 1;
2284:
1.184 thorpej 2285: sys_sync(l, NULL, NULL);
1.40 mycroft 2286:
2287: /* Wait for sync to finish. */
1.213 pk 2288: if (buf_syncwait() != 0) {
1.124 augustss 2289: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2290: Debugger();
2291: #endif
1.57 christos 2292: printf("giving up\n");
1.84 thorpej 2293: return;
1.73 thorpej 2294: } else
1.57 christos 2295: printf("done\n");
1.73 thorpej 2296:
1.84 thorpej 2297: /*
2298: * If we've panic'd, don't make the situation potentially
2299: * worse by unmounting the file systems.
2300: */
2301: if (panicstr != NULL)
2302: return;
2303:
2304: /* Release inodes held by texts before update. */
1.73 thorpej 2305: #ifdef notdef
1.84 thorpej 2306: vnshutdown();
1.73 thorpej 2307: #endif
1.84 thorpej 2308: /* Unmount file systems. */
1.256 christos 2309: vfs_unmountall(l);
1.58 thorpej 2310: }
2311:
2312: /*
2313: * Mount the root file system. If the operator didn't specify a
2314: * file system to use, try all possible file systems until one
2315: * succeeds.
2316: */
2317: int
1.247 thorpej 2318: vfs_mountroot(void)
1.58 thorpej 2319: {
1.79 thorpej 2320: struct vfsops *v;
1.239 mycroft 2321: int error = ENODEV;
1.58 thorpej 2322:
2323: if (root_device == NULL)
2324: panic("vfs_mountroot: root device unknown");
2325:
1.264 thorpej 2326: switch (device_class(root_device)) {
1.58 thorpej 2327: case DV_IFNET:
2328: if (rootdev != NODEV)
1.173 thorpej 2329: panic("vfs_mountroot: rootdev set for DV_IFNET "
1.357.2.1 skrll 2330: "(0x%llx -> %llu,%llu)",
2331: (unsigned long long)rootdev,
2332: (unsigned long long)major(rootdev),
2333: (unsigned long long)minor(rootdev));
1.58 thorpej 2334: break;
2335:
2336: case DV_DISK:
2337: if (rootdev == NODEV)
2338: panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239 mycroft 2339: if (bdevvp(rootdev, &rootvp))
2340: panic("vfs_mountroot: can't get vnode for rootdev");
1.306 pooka 2341: error = VOP_OPEN(rootvp, FREAD, FSCRED);
1.239 mycroft 2342: if (error) {
2343: printf("vfs_mountroot: can't open root device\n");
2344: return (error);
2345: }
1.58 thorpej 2346: break;
2347:
2348: default:
2349: printf("%s: inappropriate for root file system\n",
1.336 cegger 2350: device_xname(root_device));
1.58 thorpej 2351: return (ENODEV);
2352: }
2353:
2354: /*
1.357.2.1 skrll 2355: * If user specified a root fs type, use it. Make sure the
2356: * specified type exists and has a mount_root()
1.58 thorpej 2357: */
1.357.2.1 skrll 2358: if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
2359: v = vfs_getopsbyname(rootfstype);
2360: error = EFTYPE;
2361: if (v != NULL) {
2362: if (v->vfs_mountroot != NULL) {
2363: error = (v->vfs_mountroot)();
2364: }
2365: v->vfs_refcount--;
2366: }
1.239 mycroft 2367: goto done;
2368: }
1.58 thorpej 2369:
2370: /*
2371: * Try each file system currently configured into the kernel.
2372: */
1.302 ad 2373: mutex_enter(&vfs_list_lock);
1.220 lukem 2374: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2375: if (v->vfs_mountroot == NULL)
1.58 thorpej 2376: continue;
2377: #ifdef DEBUG
1.197 thorpej 2378: aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 2379: #endif
1.302 ad 2380: v->vfs_refcount++;
2381: mutex_exit(&vfs_list_lock);
1.239 mycroft 2382: error = (*v->vfs_mountroot)();
1.302 ad 2383: mutex_enter(&vfs_list_lock);
2384: v->vfs_refcount--;
1.239 mycroft 2385: if (!error) {
1.197 thorpej 2386: aprint_normal("root file system type: %s\n",
2387: v->vfs_name);
1.79 thorpej 2388: break;
1.58 thorpej 2389: }
2390: }
1.302 ad 2391: mutex_exit(&vfs_list_lock);
1.58 thorpej 2392:
1.79 thorpej 2393: if (v == NULL) {
1.336 cegger 2394: printf("no file system for %s", device_xname(root_device));
1.264 thorpej 2395: if (device_class(root_device) == DV_DISK)
1.357.2.1 skrll 2396: printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1.79 thorpej 2397: printf("\n");
1.239 mycroft 2398: error = EFTYPE;
1.79 thorpej 2399: }
1.239 mycroft 2400:
2401: done:
1.264 thorpej 2402: if (error && device_class(root_device) == DV_DISK) {
1.306 pooka 2403: VOP_CLOSE(rootvp, FREAD, FSCRED);
1.239 mycroft 2404: vrele(rootvp);
2405: }
2406: return (error);
1.58 thorpej 2407: }
1.326 ad 2408:
2409: /*
1.353 pooka 2410: * Get a new unique fsid
1.326 ad 2411: */
1.353 pooka 2412: void
2413: vfs_getnewfsid(struct mount *mp)
1.326 ad 2414: {
1.353 pooka 2415: static u_short xxxfs_mntid;
2416: fsid_t tfsid;
2417: int mtype;
1.326 ad 2418:
1.353 pooka 2419: mutex_enter(&mntid_lock);
2420: mtype = makefstype(mp->mnt_op->vfs_name);
2421: mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
2422: mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
2423: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2424: if (xxxfs_mntid == 0)
2425: ++xxxfs_mntid;
2426: tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
2427: tfsid.__fsid_val[1] = mtype;
2428: if (!CIRCLEQ_EMPTY(&mountlist)) {
2429: while (vfs_getvfs(&tfsid)) {
2430: tfsid.__fsid_val[0]++;
2431: xxxfs_mntid++;
2432: }
2433: }
2434: mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
2435: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2436: mutex_exit(&mntid_lock);
2437: }
1.326 ad 2438:
1.353 pooka 2439: /*
2440: * Make a 'unique' number from a mount type name.
2441: */
2442: long
2443: makefstype(const char *type)
2444: {
2445: long rv;
1.326 ad 2446:
1.353 pooka 2447: for (rv = 0; *type; type++) {
2448: rv <<= 2;
2449: rv ^= *type;
2450: }
2451: return rv;
2452: }
2453:
2454: /*
2455: * Set vnode attributes to VNOVAL
2456: */
2457: void
2458: vattr_null(struct vattr *vap)
2459: {
2460:
2461: vap->va_type = VNON;
2462:
2463: /*
2464: * Assign individually so that it is safe even if size and
2465: * sign of each member are varied.
2466: */
2467: vap->va_mode = VNOVAL;
2468: vap->va_nlink = VNOVAL;
2469: vap->va_uid = VNOVAL;
2470: vap->va_gid = VNOVAL;
2471: vap->va_fsid = VNOVAL;
2472: vap->va_fileid = VNOVAL;
2473: vap->va_size = VNOVAL;
2474: vap->va_blocksize = VNOVAL;
2475: vap->va_atime.tv_sec =
2476: vap->va_mtime.tv_sec =
2477: vap->va_ctime.tv_sec =
2478: vap->va_birthtime.tv_sec = VNOVAL;
2479: vap->va_atime.tv_nsec =
2480: vap->va_mtime.tv_nsec =
2481: vap->va_ctime.tv_nsec =
2482: vap->va_birthtime.tv_nsec = VNOVAL;
2483: vap->va_gen = VNOVAL;
2484: vap->va_flags = VNOVAL;
2485: vap->va_rdev = VNOVAL;
2486: vap->va_bytes = VNOVAL;
2487: vap->va_vaflags = 0;
2488: }
2489:
2490: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
2491: #define ARRAY_PRINT(idx, arr) \
1.357.2.3! skrll 2492: ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1.353 pooka 2493:
2494: const char * const vnode_tags[] = { VNODE_TAGS };
2495: const char * const vnode_types[] = { VNODE_TYPES };
2496: const char vnode_flagbits[] = VNODE_FLAGBITS;
2497:
2498: /*
2499: * Print out a description of a vnode.
2500: */
2501: void
2502: vprint(const char *label, struct vnode *vp)
2503: {
2504: struct vnlock *vl;
2505: char bf[96];
2506: int flag;
2507:
2508: vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
2509: flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
1.357.2.1 skrll 2510: snprintb(bf, sizeof(bf), vnode_flagbits, flag);
1.353 pooka 2511:
2512: if (label != NULL)
2513: printf("%s: ", label);
2514: printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
2515: "usecount %d, writecount %d, holdcount %d\n"
2516: "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
2517: vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
2518: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
2519: vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
2520: vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
2521: if (vp->v_data != NULL) {
2522: printf("\t");
2523: VOP_PRINT(vp);
2524: }
2525: }
2526:
2527: #ifdef DEBUG
2528: /*
2529: * List all of the locked vnodes in the system.
2530: * Called when debugging the kernel.
2531: */
2532: void
2533: printlockedvnodes(void)
2534: {
2535: struct mount *mp, *nmp;
2536: struct vnode *vp;
2537:
2538: printf("Locked vnodes\n");
2539: mutex_enter(&mountlist_lock);
2540: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2541: mp = nmp) {
2542: if (vfs_busy(mp, &nmp)) {
2543: continue;
2544: }
2545: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2546: if (VOP_ISLOCKED(vp))
2547: vprint(NULL, vp);
2548: }
2549: mutex_enter(&mountlist_lock);
2550: vfs_unbusy(mp, false, &nmp);
2551: }
2552: mutex_exit(&mountlist_lock);
2553: }
2554: #endif
2555:
2556: /*
2557: * Do the usual access checking.
2558: * file_mode, uid and gid are from the vnode in question,
2559: * while acc_mode and cred are from the VOP_ACCESS parameter list
2560: */
2561: int
2562: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
2563: mode_t acc_mode, kauth_cred_t cred)
2564: {
2565: mode_t mask;
2566: int error, ismember;
2567:
2568: /*
2569: * Super-user always gets read/write access, but execute access depends
2570: * on at least one execute bit being set.
2571: */
2572: if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) {
2573: if ((acc_mode & VEXEC) && type != VDIR &&
2574: (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2575: return (EACCES);
2576: return (0);
2577: }
2578:
2579: mask = 0;
2580:
2581: /* Otherwise, check the owner. */
2582: if (kauth_cred_geteuid(cred) == uid) {
2583: if (acc_mode & VEXEC)
2584: mask |= S_IXUSR;
2585: if (acc_mode & VREAD)
2586: mask |= S_IRUSR;
2587: if (acc_mode & VWRITE)
2588: mask |= S_IWUSR;
2589: return ((file_mode & mask) == mask ? 0 : EACCES);
2590: }
2591:
2592: /* Otherwise, check the groups. */
2593: error = kauth_cred_ismember_gid(cred, gid, &ismember);
2594: if (error)
2595: return (error);
2596: if (kauth_cred_getegid(cred) == gid || ismember) {
2597: if (acc_mode & VEXEC)
2598: mask |= S_IXGRP;
2599: if (acc_mode & VREAD)
2600: mask |= S_IRGRP;
2601: if (acc_mode & VWRITE)
2602: mask |= S_IWGRP;
2603: return ((file_mode & mask) == mask ? 0 : EACCES);
2604: }
2605:
2606: /* Otherwise, check everyone else. */
2607: if (acc_mode & VEXEC)
2608: mask |= S_IXOTH;
2609: if (acc_mode & VREAD)
2610: mask |= S_IROTH;
2611: if (acc_mode & VWRITE)
2612: mask |= S_IWOTH;
2613: return ((file_mode & mask) == mask ? 0 : EACCES);
2614: }
2615:
2616: /*
2617: * Given a file system name, look up the vfsops for that
2618: * file system, or return NULL if file system isn't present
2619: * in the kernel.
2620: */
2621: struct vfsops *
2622: vfs_getopsbyname(const char *name)
2623: {
2624: struct vfsops *v;
2625:
2626: mutex_enter(&vfs_list_lock);
2627: LIST_FOREACH(v, &vfs_list, vfs_list) {
2628: if (strcmp(v->vfs_name, name) == 0)
2629: break;
2630: }
2631: if (v != NULL)
2632: v->vfs_refcount++;
2633: mutex_exit(&vfs_list_lock);
2634:
2635: return (v);
2636: }
2637:
2638: void
2639: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2640: {
2641: const struct statvfs *mbp;
2642:
2643: if (sbp == (mbp = &mp->mnt_stat))
2644: return;
2645:
2646: (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2647: sbp->f_fsid = mbp->f_fsid;
2648: sbp->f_owner = mbp->f_owner;
2649: sbp->f_flag = mbp->f_flag;
2650: sbp->f_syncwrites = mbp->f_syncwrites;
2651: sbp->f_asyncwrites = mbp->f_asyncwrites;
2652: sbp->f_syncreads = mbp->f_syncreads;
2653: sbp->f_asyncreads = mbp->f_asyncreads;
2654: (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2655: (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2656: sizeof(sbp->f_fstypename));
2657: (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2658: sizeof(sbp->f_mntonname));
2659: (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2660: sizeof(sbp->f_mntfromname));
2661: sbp->f_namemax = mbp->f_namemax;
2662: }
2663:
2664: int
2665: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2666: const char *vfsname, struct mount *mp, struct lwp *l)
2667: {
2668: int error;
2669: size_t size;
2670: struct statvfs *sfs = &mp->mnt_stat;
2671: int (*fun)(const void *, void *, size_t, size_t *);
2672:
2673: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
2674: sizeof(mp->mnt_stat.f_fstypename));
2675:
2676: if (onp) {
2677: struct cwdinfo *cwdi = l->l_proc->p_cwdi;
2678: fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2679: if (cwdi->cwdi_rdir != NULL) {
2680: size_t len;
2681: char *bp;
2682: char *path = PNBUF_GET();
2683:
2684: bp = path + MAXPATHLEN;
2685: *--bp = '\0';
2686: rw_enter(&cwdi->cwdi_lock, RW_READER);
2687: error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
2688: path, MAXPATHLEN / 2, 0, l);
2689: rw_exit(&cwdi->cwdi_lock);
2690: if (error) {
2691: PNBUF_PUT(path);
2692: return error;
2693: }
2694:
2695: len = strlen(bp);
2696: if (len > sizeof(sfs->f_mntonname) - 1)
2697: len = sizeof(sfs->f_mntonname) - 1;
2698: (void)strncpy(sfs->f_mntonname, bp, len);
2699: PNBUF_PUT(path);
2700:
2701: if (len < sizeof(sfs->f_mntonname) - 1) {
2702: error = (*fun)(onp, &sfs->f_mntonname[len],
2703: sizeof(sfs->f_mntonname) - len - 1, &size);
2704: if (error)
2705: return error;
2706: size += len;
2707: } else {
2708: size = len;
2709: }
2710: } else {
2711: error = (*fun)(onp, &sfs->f_mntonname,
2712: sizeof(sfs->f_mntonname) - 1, &size);
2713: if (error)
2714: return error;
2715: }
2716: (void)memset(sfs->f_mntonname + size, 0,
2717: sizeof(sfs->f_mntonname) - size);
2718: }
2719:
2720: if (fromp) {
2721: fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2722: error = (*fun)(fromp, sfs->f_mntfromname,
2723: sizeof(sfs->f_mntfromname) - 1, &size);
2724: if (error)
2725: return error;
2726: (void)memset(sfs->f_mntfromname + size, 0,
2727: sizeof(sfs->f_mntfromname) - size);
2728: }
2729: return 0;
2730: }
2731:
2732: void
2733: vfs_timestamp(struct timespec *ts)
2734: {
2735:
2736: nanotime(ts);
2737: }
2738:
2739: time_t rootfstime; /* recorded root fs time, if known */
2740: void
2741: setrootfstime(time_t t)
2742: {
2743: rootfstime = t;
2744: }
2745:
2746: /*
2747: * Sham lock manager for vnodes. This is a temporary measure.
2748: */
2749: int
2750: vlockmgr(struct vnlock *vl, int flags)
2751: {
2752:
2753: KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
2754:
2755: switch (flags & LK_TYPE_MASK) {
2756: case LK_SHARED:
2757: if (rw_tryenter(&vl->vl_lock, RW_READER)) {
2758: return 0;
2759: }
2760: if ((flags & LK_NOWAIT) != 0) {
2761: return EBUSY;
2762: }
2763: rw_enter(&vl->vl_lock, RW_READER);
2764: return 0;
2765:
2766: case LK_EXCLUSIVE:
2767: if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
2768: return 0;
2769: }
2770: if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
2771: rw_write_held(&vl->vl_lock)) {
2772: vl->vl_recursecnt++;
2773: return 0;
2774: }
2775: if ((flags & LK_NOWAIT) != 0) {
2776: return EBUSY;
2777: }
2778: rw_enter(&vl->vl_lock, RW_WRITER);
2779: return 0;
1.326 ad 2780:
2781: case LK_RELEASE:
2782: if (vl->vl_recursecnt != 0) {
2783: KASSERT(rw_write_held(&vl->vl_lock));
2784: vl->vl_recursecnt--;
2785: return 0;
2786: }
2787: rw_exit(&vl->vl_lock);
2788: return 0;
2789:
2790: default:
2791: panic("vlockmgr: flags %x", flags);
2792: }
2793: }
2794:
2795: int
2796: vlockstatus(struct vnlock *vl)
2797: {
2798:
2799: if (rw_write_held(&vl->vl_lock)) {
2800: return LK_EXCLUSIVE;
2801: }
2802: if (rw_read_held(&vl->vl_lock)) {
2803: return LK_SHARED;
2804: }
2805: return 0;
2806: }
1.353 pooka 2807:
2808: /*
2809: * mount_specific_key_create --
2810: * Create a key for subsystem mount-specific data.
2811: */
2812: int
2813: mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
2814: {
2815:
2816: return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
2817: }
2818:
2819: /*
2820: * mount_specific_key_delete --
2821: * Delete a key for subsystem mount-specific data.
2822: */
2823: void
2824: mount_specific_key_delete(specificdata_key_t key)
2825: {
2826:
2827: specificdata_key_delete(mount_specificdata_domain, key);
2828: }
2829:
2830: /*
2831: * mount_initspecific --
2832: * Initialize a mount's specificdata container.
2833: */
2834: void
2835: mount_initspecific(struct mount *mp)
2836: {
2837: int error;
2838:
2839: error = specificdata_init(mount_specificdata_domain,
2840: &mp->mnt_specdataref);
2841: KASSERT(error == 0);
2842: }
2843:
2844: /*
2845: * mount_finispecific --
2846: * Finalize a mount's specificdata container.
2847: */
2848: void
2849: mount_finispecific(struct mount *mp)
2850: {
2851:
2852: specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
2853: }
2854:
2855: /*
2856: * mount_getspecific --
2857: * Return mount-specific data corresponding to the specified key.
2858: */
2859: void *
2860: mount_getspecific(struct mount *mp, specificdata_key_t key)
2861: {
2862:
2863: return (specificdata_getspecific(mount_specificdata_domain,
2864: &mp->mnt_specdataref, key));
2865: }
2866:
2867: /*
2868: * mount_setspecific --
2869: * Set mount-specific data corresponding to the specified key.
2870: */
2871: void
2872: mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
2873: {
2874:
2875: specificdata_setspecific(mount_specificdata_domain,
2876: &mp->mnt_specdataref, key, data);
2877: }
2878:
2879: int
2880: VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
2881: {
2882: int error;
2883:
2884: KERNEL_LOCK(1, NULL);
2885: error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
2886: KERNEL_UNLOCK_ONE(NULL);
2887:
2888: return error;
2889: }
2890:
2891: int
2892: VFS_START(struct mount *mp, int a)
2893: {
2894: int error;
2895:
2896: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2897: KERNEL_LOCK(1, NULL);
2898: }
2899: error = (*(mp->mnt_op->vfs_start))(mp, a);
2900: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2901: KERNEL_UNLOCK_ONE(NULL);
2902: }
2903:
2904: return error;
2905: }
2906:
2907: int
2908: VFS_UNMOUNT(struct mount *mp, int a)
2909: {
2910: int error;
2911:
2912: KERNEL_LOCK(1, NULL);
2913: error = (*(mp->mnt_op->vfs_unmount))(mp, a);
2914: KERNEL_UNLOCK_ONE(NULL);
2915:
2916: return error;
2917: }
2918:
2919: int
2920: VFS_ROOT(struct mount *mp, struct vnode **a)
2921: {
2922: int error;
2923:
2924: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2925: KERNEL_LOCK(1, NULL);
2926: }
2927: error = (*(mp->mnt_op->vfs_root))(mp, a);
2928: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2929: KERNEL_UNLOCK_ONE(NULL);
2930: }
2931:
2932: return error;
2933: }
2934:
2935: int
2936: VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
2937: {
2938: int error;
2939:
2940: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2941: KERNEL_LOCK(1, NULL);
2942: }
2943: error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
2944: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2945: KERNEL_UNLOCK_ONE(NULL);
2946: }
2947:
2948: return error;
2949: }
2950:
2951: int
2952: VFS_STATVFS(struct mount *mp, struct statvfs *a)
2953: {
2954: int error;
2955:
2956: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2957: KERNEL_LOCK(1, NULL);
2958: }
2959: error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
2960: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2961: KERNEL_UNLOCK_ONE(NULL);
2962: }
2963:
2964: return error;
2965: }
2966:
2967: int
2968: VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
2969: {
2970: int error;
2971:
2972: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2973: KERNEL_LOCK(1, NULL);
2974: }
2975: error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
2976: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2977: KERNEL_UNLOCK_ONE(NULL);
2978: }
2979:
2980: return error;
2981: }
2982:
2983: int
2984: VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
2985: {
2986: int error;
2987:
2988: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2989: KERNEL_LOCK(1, NULL);
2990: }
2991: error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
2992: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2993: KERNEL_UNLOCK_ONE(NULL);
2994: }
2995:
2996: return error;
2997: }
2998:
2999: int
3000: VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
3001: {
3002: int error;
3003:
3004: if ((vp->v_vflag & VV_MPSAFE) == 0) {
3005: KERNEL_LOCK(1, NULL);
3006: }
3007: error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
3008: if ((vp->v_vflag & VV_MPSAFE) == 0) {
3009: KERNEL_UNLOCK_ONE(NULL);
3010: }
3011:
3012: return error;
3013: }
3014:
3015: int
3016: VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
3017: {
3018: int error;
3019:
3020: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3021: KERNEL_LOCK(1, NULL);
3022: }
3023: error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
3024: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3025: KERNEL_UNLOCK_ONE(NULL);
3026: }
3027:
3028: return error;
3029: }
3030:
3031: int
3032: VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
3033: {
3034: int error;
3035:
3036: KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
3037: error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
3038: KERNEL_UNLOCK_ONE(NULL); /* XXX */
3039:
3040: return error;
3041: }
3042:
3043: int
3044: VFS_SUSPENDCTL(struct mount *mp, int a)
3045: {
3046: int error;
3047:
3048: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3049: KERNEL_LOCK(1, NULL);
3050: }
3051: error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
3052: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3053: KERNEL_UNLOCK_ONE(NULL);
3054: }
3055:
3056: return error;
3057: }
3058:
3059: #ifdef DDB
3060: static const char buf_flagbits[] = BUF_FLAGBITS;
3061:
3062: void
3063: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
3064: {
3065: char bf[1024];
3066:
3067: (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
3068: PRIx64 " dev 0x%x\n",
3069: bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
3070:
1.357.2.1 skrll 3071: snprintb(bf, sizeof(bf),
3072: buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
1.353 pooka 3073: (*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
3074:
3075: (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3076: bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.357.2.2 skrll 3077: (*pr)(" data %p saveaddr %p\n",
3078: bp->b_data, bp->b_saveaddr);
1.353 pooka 3079: (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
3080: }
3081:
3082:
3083: void
3084: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
3085: {
3086: char bf[256];
3087:
3088: uvm_object_printit(&vp->v_uobj, full, pr);
1.357.2.1 skrll 3089: snprintb(bf, sizeof(bf),
3090: vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
1.353 pooka 3091: (*pr)("\nVNODE flags %s\n", bf);
3092: (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
3093: vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
3094:
3095: (*pr)("data %p writecount %ld holdcnt %ld\n",
3096: vp->v_data, vp->v_writecount, vp->v_holdcnt);
3097:
3098: (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
3099: ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
3100: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
3101: vp->v_mount, vp->v_mountedhere);
3102:
3103: (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
3104:
3105: if (full) {
3106: struct buf *bp;
3107:
3108: (*pr)("clean bufs:\n");
3109: LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3110: (*pr)(" bp %p\n", bp);
3111: vfs_buf_print(bp, full, pr);
3112: }
3113:
3114: (*pr)("dirty bufs:\n");
3115: LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3116: (*pr)(" bp %p\n", bp);
3117: vfs_buf_print(bp, full, pr);
3118: }
3119: }
3120: }
3121:
3122: void
3123: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
3124: {
3125: char sbuf[256];
3126:
3127: (*pr)("vnodecovered = %p syncer = %p data = %p\n",
3128: mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3129:
3130: (*pr)("fs_bshift %d dev_bshift = %d\n",
3131: mp->mnt_fs_bshift,mp->mnt_dev_bshift);
3132:
1.357.2.1 skrll 3133: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1.353 pooka 3134: (*pr)("flag = %s\n", sbuf);
3135:
1.357.2.1 skrll 3136: snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1.353 pooka 3137: (*pr)("iflag = %s\n", sbuf);
3138:
3139: (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
3140: &mp->mnt_unmounting, &mp->mnt_updating);
3141:
3142: (*pr)("statvfs cache:\n");
3143: (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3144: (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3145: (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3146:
3147: (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
3148: (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
3149: (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
3150: (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
3151:
3152: (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
3153: (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
3154: (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
3155: (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
3156:
3157: (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3158: mp->mnt_stat.f_fsidx.__fsid_val[0],
3159: mp->mnt_stat.f_fsidx.__fsid_val[1]);
3160:
3161: (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3162: (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3163:
1.357.2.1 skrll 3164: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
3165:
1.353 pooka 3166: (*pr)("\tflag = %s\n",sbuf);
3167: (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3168: (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3169: (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
3170: (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3171: (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3172: (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3173: (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3174:
3175: {
3176: int cnt = 0;
3177: struct vnode *vp;
3178: (*pr)("locked vnodes =");
3179: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3180: if (VOP_ISLOCKED(vp)) {
3181: if ((++cnt % 6) == 0) {
3182: (*pr)(" %p,\n\t", vp);
3183: } else {
3184: (*pr)(" %p,", vp);
3185: }
3186: }
3187: }
3188: (*pr)("\n");
3189: }
3190:
3191: if (full) {
3192: int cnt = 0;
3193: struct vnode *vp;
3194: (*pr)("all vnodes =");
3195: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3196: if (!TAILQ_NEXT(vp, v_mntvnodes)) {
3197: (*pr)(" %p", vp);
3198: } else if ((++cnt % 6) == 0) {
3199: (*pr)(" %p,\n\t", vp);
3200: } else {
3201: (*pr)(" %p,", vp);
3202: }
3203: }
3204: (*pr)("\n", vp);
3205: }
3206: }
3207: #endif /* DDB */
1.357.2.3! skrll 3208:
CVSweb <webmaster@jp.NetBSD.org>