Annotation of src/sys/kern/vfs_subr.c, Revision 1.378
1.378 ! pooka 1: /* $NetBSD: vfs_subr.c,v 1.377 2009/04/29 15:44:55 dyoung Exp $ */
1.74 thorpej 2:
3: /*-
1.315 ad 4: * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.302 ad 9: * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
1.74 thorpej 10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30: * POSSIBILITY OF SUCH DAMAGE.
31: */
1.32 cgd 32:
1.29 cgd 33: /*
1.30 mycroft 34: * Copyright (c) 1989, 1993
35: * The Regents of the University of California. All rights reserved.
1.29 cgd 36: * (c) UNIX System Laboratories, Inc.
37: * All or some portions of this file are derived from material licensed
38: * to the University of California by American Telephone and Telegraph
39: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40: * the permission of UNIX System Laboratories, Inc.
41: *
42: * Redistribution and use in source and binary forms, with or without
43: * modification, are permitted provided that the following conditions
44: * are met:
45: * 1. Redistributions of source code must retain the above copyright
46: * notice, this list of conditions and the following disclaimer.
47: * 2. Redistributions in binary form must reproduce the above copyright
48: * notice, this list of conditions and the following disclaimer in the
49: * documentation and/or other materials provided with the distribution.
1.204 agc 50: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 51: * may be used to endorse or promote products derived from this software
52: * without specific prior written permission.
53: *
54: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64: * SUCH DAMAGE.
65: *
1.32 cgd 66: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 67: */
68:
69: /*
1.346 ad 70: * Note on v_usecount and locking:
71: *
72: * At nearly all points it is known that v_usecount could be zero, the
73: * vnode interlock will be held.
74: *
75: * To change v_usecount away from zero, the interlock must be held. To
76: * change from a non-zero value to zero, again the interlock must be
77: * held.
78: *
79: * Changing the usecount from a non-zero value to a non-zero value can
80: * safely be done using atomic operations, without the interlock held.
1.29 cgd 81: */
1.162 lukem 82:
83: #include <sys/cdefs.h>
1.378 ! pooka 84: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.377 2009/04/29 15:44:55 dyoung Exp $");
1.78 mrg 85:
1.125 chs 86: #include "opt_ddb.h"
1.95 thorpej 87: #include "opt_compat_netbsd.h"
1.97 christos 88: #include "opt_compat_43.h"
1.29 cgd 89:
90: #include <sys/param.h>
1.30 mycroft 91: #include <sys/systm.h>
1.363 pooka 92: #include <sys/conf.h>
1.29 cgd 93: #include <sys/proc.h>
1.138 bouyer 94: #include <sys/kernel.h>
1.29 cgd 95: #include <sys/mount.h>
1.46 mycroft 96: #include <sys/fcntl.h>
1.29 cgd 97: #include <sys/vnode.h>
1.30 mycroft 98: #include <sys/stat.h>
1.29 cgd 99: #include <sys/namei.h>
100: #include <sys/ucred.h>
101: #include <sys/buf.h>
102: #include <sys/errno.h>
1.366 yamt 103: #include <sys/kmem.h>
1.51 christos 104: #include <sys/syscallargs.h>
1.58 thorpej 105: #include <sys/device.h>
1.192 christos 106: #include <sys/filedesc.h>
1.266 elad 107: #include <sys/kauth.h>
1.307 ad 108: #include <sys/atomic.h>
1.309 ad 109: #include <sys/kthread.h>
1.355 simonb 110: #include <sys/wapbl.h>
1.50 christos 111:
1.30 mycroft 112: #include <miscfs/specfs/specdev.h>
1.113 fvdl 113: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 114:
1.125 chs 115: #include <uvm/uvm.h>
1.255 yamt 116: #include <uvm/uvm_readahead.h>
1.125 chs 117: #include <uvm/uvm_ddb.h>
1.129 mrg 118:
119: #include <sys/sysctl.h>
1.77 mrg 120:
1.353 pooka 121: const enum vtype iftovt_tab[16] = {
122: VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
123: VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
124: };
125: const int vttoif_tab[9] = {
126: 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
127: S_IFSOCK, S_IFIFO, S_IFMT,
128: };
129:
130: /*
131: * Insq/Remq for the vnode usage lists.
132: */
133: #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
134: #define bufremvn(bp) { \
135: LIST_REMOVE(bp, b_vnbufs); \
136: (bp)->b_vnbufs.le_next = NOLIST; \
137: }
138:
139: int doforce = 1; /* 1 => permit forcible unmounting */
140: int prtactive = 0; /* 1 => print out reclaim of active vnodes */
141:
1.309 ad 142: static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
143: static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
144: static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
145:
1.353 pooka 146: struct mntlist mountlist = /* mounted filesystem list */
147: CIRCLEQ_HEAD_INITIALIZER(mountlist);
148:
149: u_int numvnodes;
150: static specificdata_domain_t mount_specificdata_domain;
151:
1.309 ad 152: static int vrele_pending;
1.351 ad 153: static int vrele_gen;
1.309 ad 154: static kmutex_t vrele_lock;
155: static kcondvar_t vrele_cv;
156: static lwp_t *vrele_lwp;
1.113 fvdl 157:
1.353 pooka 158: kmutex_t mountlist_lock;
159: kmutex_t mntid_lock;
160: kmutex_t mntvnode_lock;
161: kmutex_t vnode_free_list_lock;
162: kmutex_t vfs_list_lock;
163:
1.309 ad 164: static pool_cache_t vnode_cache;
1.186 thorpej 165:
1.89 kleink 166: /*
1.353 pooka 167: * These define the root filesystem and device.
168: */
169: struct vnode *rootvnode;
170: struct device *root_device; /* root device */
171:
172: /*
1.89 kleink 173: * Local declarations.
174: */
1.276 hannken 175:
1.309 ad 176: static void vrele_thread(void *);
177: static void insmntque(vnode_t *, struct mount *);
178: static int getdevvp(dev_t, vnode_t **, enum vtype);
1.364 yamt 179: static vnode_t *getcleanvnode(void);
1.309 ad 180: void vpanic(vnode_t *, const char *);
181:
1.353 pooka 182: #ifdef DEBUG
183: void printlockedvnodes(void);
184: #endif
185:
1.309 ad 186: #ifdef DIAGNOSTIC
187: void
188: vpanic(vnode_t *vp, const char *msg)
189: {
190:
191: vprint(NULL, vp);
192: panic("%s\n", msg);
193: }
194: #else
195: #define vpanic(vp, msg) /* nothing */
196: #endif
197:
198: void
199: vn_init1(void)
200: {
201:
202: vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
203: NULL, IPL_NONE, NULL, NULL, NULL);
204: KASSERT(vnode_cache != NULL);
205:
206: /* Create deferred release thread. */
207: mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
208: cv_init(&vrele_cv, "vrele");
209: if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
210: NULL, &vrele_lwp, "vrele"))
211: panic("fork vrele");
212: }
1.51 christos 213:
1.353 pooka 214: /*
215: * Initialize the vnode management data structures.
216: */
217: void
218: vntblinit(void)
219: {
220:
221: mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
222: mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
223: mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
224: mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
225: mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
226:
227: mount_specificdata_domain = specificdata_domain_create();
228:
229: /* Initialize the filesystem syncer. */
230: vn_initialize_syncerd();
231: vn_init1();
232: }
233:
1.202 yamt 234: int
1.256 christos 235: vfs_drainvnodes(long target, struct lwp *l)
1.202 yamt 236: {
237:
238: while (numvnodes > target) {
1.309 ad 239: vnode_t *vp;
1.202 yamt 240:
1.309 ad 241: mutex_enter(&vnode_free_list_lock);
242: vp = getcleanvnode();
1.202 yamt 243: if (vp == NULL)
244: return EBUSY; /* give up */
1.309 ad 245: ungetnewvnode(vp);
1.202 yamt 246: }
247:
248: return 0;
249: }
250:
251: /*
1.353 pooka 252: * Lookup a mount point by filesystem identifier.
253: *
254: * XXX Needs to add a reference to the mount point.
255: */
256: struct mount *
257: vfs_getvfs(fsid_t *fsid)
258: {
259: struct mount *mp;
260:
261: mutex_enter(&mountlist_lock);
262: CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
263: if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
264: mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
265: mutex_exit(&mountlist_lock);
266: return (mp);
267: }
268: }
269: mutex_exit(&mountlist_lock);
270: return ((struct mount *)0);
271: }
272:
273: /*
274: * Drop a reference to a mount structure, freeing if the last reference.
275: */
276: void
277: vfs_destroy(struct mount *mp)
278: {
279:
1.357 ad 280: if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
1.353 pooka 281: return;
282: }
283:
284: /*
285: * Nothing else has visibility of the mount: we can now
286: * free the data structures.
287: */
1.357 ad 288: KASSERT(mp->mnt_refcnt == 0);
1.353 pooka 289: specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
290: rw_destroy(&mp->mnt_unmounting);
291: mutex_destroy(&mp->mnt_updating);
292: mutex_destroy(&mp->mnt_renamelock);
293: if (mp->mnt_op != NULL) {
294: vfs_delref(mp->mnt_op);
295: }
296: kmem_free(mp, sizeof(*mp));
297: }
298:
299: /*
1.202 yamt 300: * grab a vnode from freelist and clean it.
301: */
1.309 ad 302: vnode_t *
303: getcleanvnode(void)
1.202 yamt 304: {
1.309 ad 305: vnode_t *vp;
306: vnodelst_t *listhd;
1.202 yamt 307:
1.309 ad 308: KASSERT(mutex_owned(&vnode_free_list_lock));
1.229 yamt 309:
1.309 ad 310: retry:
1.229 yamt 311: listhd = &vnode_free_list;
312: try_nextlist:
313: TAILQ_FOREACH(vp, listhd, v_freelist) {
1.309 ad 314: /*
315: * It's safe to test v_usecount and v_iflag
316: * without holding the interlock here, since
317: * these vnodes should never appear on the
318: * lists.
319: */
320: if (vp->v_usecount != 0) {
321: vpanic(vp, "free vnode isn't");
322: }
323: if ((vp->v_iflag & VI_CLEAN) != 0) {
324: vpanic(vp, "clean vnode on freelist");
325: }
326: if (vp->v_freelisthd != listhd) {
327: printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
328: vpanic(vp, "list head mismatch");
329: }
330: if (!mutex_tryenter(&vp->v_interlock))
1.208 hannken 331: continue;
1.227 yamt 332: /*
1.309 ad 333: * Our lwp might hold the underlying vnode
334: * locked, so don't try to reclaim a VI_LAYER
335: * node if it's locked.
1.227 yamt 336: */
1.302 ad 337: if ((vp->v_iflag & VI_XLOCK) == 0 &&
338: ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285 hannken 339: break;
1.202 yamt 340: }
1.309 ad 341: mutex_exit(&vp->v_interlock);
1.202 yamt 342: }
343:
1.309 ad 344: if (vp == NULL) {
1.229 yamt 345: if (listhd == &vnode_free_list) {
346: listhd = &vnode_hold_list;
347: goto try_nextlist;
348: }
1.309 ad 349: mutex_exit(&vnode_free_list_lock);
350: return NULL;
1.202 yamt 351: }
352:
1.309 ad 353: /* Remove it from the freelist. */
1.202 yamt 354: TAILQ_REMOVE(listhd, vp, v_freelist);
1.309 ad 355: vp->v_freelisthd = NULL;
356: mutex_exit(&vnode_free_list_lock);
357:
358: /*
359: * The vnode is still associated with a file system, so we must
360: * clean it out before reusing it. We need to add a reference
361: * before doing this. If the vnode gains another reference while
362: * being cleaned out then we lose - retry.
363: */
1.346 ad 364: atomic_inc_uint(&vp->v_usecount);
1.309 ad 365: vclean(vp, DOCLOSE);
366: if (vp->v_usecount == 1) {
367: /* We're about to dirty it. */
368: vp->v_iflag &= ~VI_CLEAN;
369: mutex_exit(&vp->v_interlock);
1.318 ad 370: if (vp->v_type == VBLK || vp->v_type == VCHR) {
371: spec_node_destroy(vp);
372: }
373: vp->v_type = VNON;
1.309 ad 374: } else {
375: /*
376: * Don't return to freelist - the holder of the last
377: * reference will destroy it.
378: */
1.348 ad 379: vrelel(vp, 0); /* releases vp->v_interlock */
1.309 ad 380: mutex_enter(&vnode_free_list_lock);
381: goto retry;
382: }
383:
384: if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
385: !TAILQ_EMPTY(&vp->v_uobj.memq)) {
386: vpanic(vp, "cleaned vnode isn't");
387: }
388: if (vp->v_numoutput != 0) {
389: vpanic(vp, "clean vnode has pending I/O's");
390: }
391: if ((vp->v_iflag & VI_ONWORKLST) != 0) {
392: vpanic(vp, "clean vnode on syncer list");
393: }
1.202 yamt 394:
395: return vp;
396: }
397:
1.327 ad 398: /*
1.338 ad 399: * Mark a mount point as busy, and gain a new reference to it. Used to
1.344 ad 400: * prevent the file system from being unmounted during critical sections.
1.338 ad 401: *
1.339 ad 402: * => The caller must hold a pre-existing reference to the mount.
1.344 ad 403: * => Will fail if the file system is being unmounted, or is unmounted.
1.29 cgd 404: */
1.50 christos 405: int
1.344 ad 406: vfs_busy(struct mount *mp, struct mount **nextp)
1.29 cgd 407: {
408:
1.344 ad 409: KASSERT(mp->mnt_refcnt > 0);
1.338 ad 410:
1.344 ad 411: if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
412: if (nextp != NULL) {
413: KASSERT(mutex_owned(&mountlist_lock));
1.339 ad 414: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
415: }
1.344 ad 416: return EBUSY;
1.339 ad 417: }
1.344 ad 418: if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
419: rw_exit(&mp->mnt_unmounting);
1.338 ad 420: if (nextp != NULL) {
1.344 ad 421: KASSERT(mutex_owned(&mountlist_lock));
422: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
1.338 ad 423: }
1.344 ad 424: return ENOENT;
1.327 ad 425: }
1.344 ad 426: if (nextp != NULL) {
427: mutex_exit(&mountlist_lock);
428: }
429: atomic_inc_uint(&mp->mnt_refcnt);
430: return 0;
1.29 cgd 431: }
432:
433: /*
1.344 ad 434: * Unbusy a busy filesystem.
1.339 ad 435: *
1.344 ad 436: * => If keepref is true, preserve reference added by vfs_busy().
437: * => If nextp != NULL, acquire mountlist_lock.
1.29 cgd 438: */
439: void
1.339 ad 440: vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
1.29 cgd 441: {
442:
1.327 ad 443: KASSERT(mp->mnt_refcnt > 0);
444:
1.344 ad 445: if (nextp != NULL) {
446: mutex_enter(&mountlist_lock);
447: }
448: rw_exit(&mp->mnt_unmounting);
449: if (!keepref) {
450: vfs_destroy(mp);
1.327 ad 451: }
1.339 ad 452: if (nextp != NULL) {
1.344 ad 453: KASSERT(mutex_owned(&mountlist_lock));
1.339 ad 454: *nextp = CIRCLEQ_NEXT(mp, mnt_list);
455: }
1.29 cgd 456: }
457:
1.376 dyoung 458: struct mount *
459: vfs_mountalloc(struct vfsops *vfsops, struct vnode *vp)
460: {
461: int error;
462: struct mount *mp;
463:
464: mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
465: if (mp == NULL)
466: return NULL;
467:
468: mp->mnt_op = vfsops;
469: mp->mnt_refcnt = 1;
470: TAILQ_INIT(&mp->mnt_vnodelist);
471: rw_init(&mp->mnt_unmounting);
472: mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
473: mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
474: error = vfs_busy(mp, NULL);
475: KASSERT(error == 0);
476: mp->mnt_vnodecovered = vp;
477: mount_initspecific(mp);
478:
479: return mp;
480: }
481:
1.29 cgd 482: /*
1.80 fvdl 483: * Lookup a filesystem type, and if found allocate and initialize
484: * a mount structure for it.
485: *
486: * Devname is usually updated by mount(8) after booting.
1.29 cgd 487: */
1.50 christos 488: int
1.247 thorpej 489: vfs_rootmountalloc(const char *fstypename, const char *devname,
490: struct mount **mpp)
1.29 cgd 491: {
1.80 fvdl 492: struct vfsops *vfsp = NULL;
493: struct mount *mp;
1.29 cgd 494:
1.309 ad 495: mutex_enter(&vfs_list_lock);
1.152 jdolecek 496: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291 christos 497: if (!strncmp(vfsp->vfs_name, fstypename,
498: sizeof(mp->mnt_stat.f_fstypename)))
1.80 fvdl 499: break;
1.315 ad 500: if (vfsp == NULL) {
501: mutex_exit(&vfs_list_lock);
1.80 fvdl 502: return (ENODEV);
1.315 ad 503: }
1.309 ad 504: vfsp->vfs_refcount++;
505: mutex_exit(&vfs_list_lock);
506:
1.376 dyoung 507: if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
1.327 ad 508: return ENOMEM;
1.80 fvdl 509: mp->mnt_flag = MNT_RDONLY;
1.291 christos 510: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
511: sizeof(mp->mnt_stat.f_fstypename));
1.80 fvdl 512: mp->mnt_stat.f_mntonname[0] = '/';
1.314 pooka 513: mp->mnt_stat.f_mntonname[1] = '\0';
1.291 christos 514: mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
515: '\0';
516: (void)copystr(devname, mp->mnt_stat.f_mntfromname,
517: sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.80 fvdl 518: *mpp = mp;
1.29 cgd 519: return (0);
520: }
521:
1.30 mycroft 522: /*
523: * Routines having to do with the management of the vnode table.
524: */
1.217 junyoung 525: extern int (**dead_vnodeop_p)(void *);
1.30 mycroft 526:
1.29 cgd 527: /*
528: * Return the next vnode from the free list.
529: */
1.50 christos 530: int
1.247 thorpej 531: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
1.309 ad 532: vnode_t **vpp)
1.29 cgd 533: {
1.142 chs 534: struct uvm_object *uobj;
1.113 fvdl 535: static int toggle;
1.309 ad 536: vnode_t *vp;
1.153 thorpej 537: int error = 0, tryalloc;
1.158 chs 538:
1.159 enami 539: try_again:
1.327 ad 540: if (mp != NULL) {
1.103 sommerfe 541: /*
1.327 ad 542: * Mark filesystem busy while we're creating a
543: * vnode. If unmount is in progress, this will
1.342 ad 544: * fail.
1.103 sommerfe 545: */
1.344 ad 546: error = vfs_busy(mp, NULL);
1.327 ad 547: if (error)
1.103 sommerfe 548: return error;
549: }
1.29 cgd 550:
1.113 fvdl 551: /*
552: * We must choose whether to allocate a new vnode or recycle an
553: * existing one. The criterion for allocating a new one is that
554: * the total number of vnodes is less than the number desired or
555: * there are no vnodes on either free list. Generally we only
556: * want to recycle vnodes that have no buffers associated with
557: * them, so we look first on the vnode_free_list. If it is empty,
558: * we next consider vnodes with referencing buffers on the
559: * vnode_hold_list. The toggle ensures that half the time we
560: * will use a buffer from the vnode_hold_list, and half the time
561: * we will allocate a new one unless the list has grown to twice
562: * the desired size. We are reticent to recycle vnodes from the
563: * vnode_hold_list because we will lose the identity of all its
564: * referencing buffers.
565: */
1.142 chs 566:
1.153 thorpej 567: vp = NULL;
568:
1.309 ad 569: mutex_enter(&vnode_free_list_lock);
1.153 thorpej 570:
1.113 fvdl 571: toggle ^= 1;
572: if (numvnodes > 2 * desiredvnodes)
573: toggle = 0;
574:
1.153 thorpej 575: tryalloc = numvnodes < desiredvnodes ||
1.159 enami 576: (TAILQ_FIRST(&vnode_free_list) == NULL &&
577: (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153 thorpej 578:
1.309 ad 579: if (tryalloc) {
1.206 yamt 580: numvnodes++;
1.309 ad 581: mutex_exit(&vnode_free_list_lock);
1.310 pooka 582: if ((vp = vnalloc(NULL)) == NULL) {
1.309 ad 583: mutex_enter(&vnode_free_list_lock);
584: numvnodes--;
585: } else
586: vp->v_usecount = 1;
587: }
588:
589: if (vp == NULL) {
590: vp = getcleanvnode();
591: if (vp == NULL) {
1.327 ad 592: if (mp != NULL) {
1.339 ad 593: vfs_unbusy(mp, false, NULL);
1.327 ad 594: }
1.153 thorpej 595: if (tryalloc) {
596: printf("WARNING: unable to allocate new "
597: "vnode, retrying...\n");
1.345 ad 598: kpause("newvn", false, hz, NULL);
1.153 thorpej 599: goto try_again;
600: }
1.132 jdolecek 601: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 602: *vpp = 0;
603: return (ENFILE);
604: }
1.302 ad 605: vp->v_iflag = 0;
606: vp->v_vflag = 0;
607: vp->v_uflag = 0;
1.158 chs 608: vp->v_socket = NULL;
1.29 cgd 609: }
1.309 ad 610:
611: KASSERT(vp->v_usecount == 1);
612: KASSERT(vp->v_freelisthd == NULL);
613: KASSERT(LIST_EMPTY(&vp->v_nclist));
614: KASSERT(LIST_EMPTY(&vp->v_dnclist));
615:
1.29 cgd 616: vp->v_type = VNON;
1.104 wrstuden 617: vp->v_vnlock = &vp->v_lock;
1.29 cgd 618: vp->v_tag = tag;
619: vp->v_op = vops;
620: insmntque(vp, mp);
1.30 mycroft 621: *vpp = vp;
622: vp->v_data = 0;
1.142 chs 623:
624: /*
625: * initialize uvm_object within vnode.
626: */
627:
1.158 chs 628: uobj = &vp->v_uobj;
629: KASSERT(uobj->pgops == &uvm_vnodeops);
630: KASSERT(uobj->uo_npages == 0);
631: KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288 yamt 632: vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142 chs 633:
1.309 ad 634: if (mp != NULL) {
635: if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
636: vp->v_vflag |= VV_MPSAFE;
1.339 ad 637: vfs_unbusy(mp, true, NULL);
1.309 ad 638: }
639:
1.29 cgd 640: return (0);
1.130 fvdl 641: }
642:
643: /*
644: * This is really just the reverse of getnewvnode(). Needed for
645: * VFS_VGET functions who may need to push back a vnode in case
646: * of a locking race.
647: */
648: void
1.309 ad 649: ungetnewvnode(vnode_t *vp)
650: {
651:
652: KASSERT(vp->v_usecount == 1);
653: KASSERT(vp->v_data == NULL);
654: KASSERT(vp->v_freelisthd == NULL);
655:
656: mutex_enter(&vp->v_interlock);
657: vp->v_iflag |= VI_CLEAN;
1.324 pooka 658: vrelel(vp, 0);
1.309 ad 659: }
660:
661: /*
662: * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
663: * marker vnode and we are prepared to wait for the allocation.
664: */
665: vnode_t *
1.310 pooka 666: vnalloc(struct mount *mp)
1.130 fvdl 667: {
1.309 ad 668: vnode_t *vp;
669:
670: vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
671: if (vp == NULL) {
672: return NULL;
673: }
674:
675: memset(vp, 0, sizeof(*vp));
676: UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
677: cv_init(&vp->v_cv, "vnode");
678: /*
679: * done by memset() above.
680: * LIST_INIT(&vp->v_nclist);
681: * LIST_INIT(&vp->v_dnclist);
682: */
683:
684: if (mp != NULL) {
685: vp->v_mount = mp;
686: vp->v_type = VBAD;
687: vp->v_iflag = VI_MARKER;
688: } else {
1.326 ad 689: rw_init(&vp->v_lock.vl_lock);
1.309 ad 690: }
691:
692: return vp;
693: }
694:
695: /*
696: * Free an unused, unreferenced vnode.
697: */
698: void
1.310 pooka 699: vnfree(vnode_t *vp)
1.309 ad 700: {
701:
702: KASSERT(vp->v_usecount == 0);
703:
704: if ((vp->v_iflag & VI_MARKER) == 0) {
1.326 ad 705: rw_destroy(&vp->v_lock.vl_lock);
1.309 ad 706: mutex_enter(&vnode_free_list_lock);
707: numvnodes--;
708: mutex_exit(&vnode_free_list_lock);
709: }
710:
711: UVM_OBJ_DESTROY(&vp->v_uobj);
712: cv_destroy(&vp->v_cv);
713: pool_cache_put(vnode_cache, vp);
714: }
715:
716: /*
717: * Remove a vnode from its freelist.
718: */
719: static inline void
720: vremfree(vnode_t *vp)
721: {
722:
723: KASSERT(mutex_owned(&vp->v_interlock));
1.350 ad 724: KASSERT(vp->v_usecount == 0);
1.130 fvdl 725:
1.217 junyoung 726: /*
1.309 ad 727: * Note that the reference count must not change until
728: * the vnode is removed.
1.130 fvdl 729: */
1.309 ad 730: mutex_enter(&vnode_free_list_lock);
731: if (vp->v_holdcnt > 0) {
732: KASSERT(vp->v_freelisthd == &vnode_hold_list);
733: } else {
734: KASSERT(vp->v_freelisthd == &vnode_free_list);
735: }
736: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
737: vp->v_freelisthd = NULL;
738: mutex_exit(&vnode_free_list_lock);
1.29 cgd 739: }
740:
741: /*
742: * Move a vnode from one mount queue to another.
743: */
1.260 yamt 744: static void
1.309 ad 745: insmntque(vnode_t *vp, struct mount *mp)
1.29 cgd 746: {
1.327 ad 747: struct mount *omp;
1.29 cgd 748:
1.103 sommerfe 749: #ifdef DIAGNOSTIC
750: if ((mp != NULL) &&
1.207 dbj 751: (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113 fvdl 752: vp->v_tag != VT_VFS) {
1.103 sommerfe 753: panic("insmntque into dying filesystem");
754: }
755: #endif
1.217 junyoung 756:
1.309 ad 757: mutex_enter(&mntvnode_lock);
1.29 cgd 758: /*
759: * Delete from old mount point vnode list, if on one.
760: */
1.327 ad 761: if ((omp = vp->v_mount) != NULL)
1.272 reinoud 762: TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29 cgd 763: /*
1.327 ad 764: * Insert into list of vnodes for the new mount point, if
765: * available. The caller must take a reference on the mount
766: * structure and donate to the vnode.
1.29 cgd 767: */
1.279 pooka 768: if ((vp->v_mount = mp) != NULL)
769: TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.309 ad 770: mutex_exit(&mntvnode_lock);
1.327 ad 771:
772: if (omp != NULL) {
773: /* Release reference to old mount. */
1.344 ad 774: vfs_destroy(omp);
1.327 ad 775: }
1.29 cgd 776: }
777:
778: /*
1.353 pooka 779: * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
780: * recycled.
781: */
782: void
783: vwait(vnode_t *vp, int flags)
784: {
785:
786: KASSERT(mutex_owned(&vp->v_interlock));
787: KASSERT(vp->v_usecount != 0);
788:
789: while ((vp->v_iflag & flags) != 0)
790: cv_wait(&vp->v_cv, &vp->v_interlock);
791: }
792:
793: /*
794: * Insert a marker vnode into a mount's vnode list, after the
795: * specified vnode. mntvnode_lock must be held.
796: */
797: void
798: vmark(vnode_t *mvp, vnode_t *vp)
799: {
800: struct mount *mp;
801:
802: mp = mvp->v_mount;
803:
804: KASSERT(mutex_owned(&mntvnode_lock));
805: KASSERT((mvp->v_iflag & VI_MARKER) != 0);
806: KASSERT(vp->v_mount == mp);
807:
808: TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
809: }
810:
811: /*
812: * Remove a marker vnode from a mount's vnode list, and return
813: * a pointer to the next vnode in the list. mntvnode_lock must
814: * be held.
815: */
816: vnode_t *
817: vunmark(vnode_t *mvp)
818: {
819: vnode_t *vp;
820: struct mount *mp;
821:
822: mp = mvp->v_mount;
823:
824: KASSERT(mutex_owned(&mntvnode_lock));
825: KASSERT((mvp->v_iflag & VI_MARKER) != 0);
826:
827: vp = TAILQ_NEXT(mvp, v_mntvnodes);
828: TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
829:
830: KASSERT(vp == NULL || vp->v_mount == mp);
831:
832: return vp;
833: }
834:
835: /*
836: * Update outstanding I/O count and do wakeup if requested.
837: */
838: void
839: vwakeup(struct buf *bp)
840: {
841: struct vnode *vp;
842:
843: if ((vp = bp->b_vp) == NULL)
844: return;
845:
846: KASSERT(bp->b_objlock == &vp->v_interlock);
847: KASSERT(mutex_owned(bp->b_objlock));
848:
849: if (--vp->v_numoutput < 0)
850: panic("vwakeup: neg numoutput, vp %p", vp);
851: if (vp->v_numoutput == 0)
852: cv_broadcast(&vp->v_cv);
853: }
854:
855: /*
856: * Flush out and invalidate all buffers associated with a vnode.
857: * Called with the underlying vnode locked, which should prevent new dirty
858: * buffers from being queued.
859: */
860: int
861: vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
862: bool catch, int slptimeo)
863: {
864: struct buf *bp, *nbp;
865: int error;
866: int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
867: (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
868:
869: /* XXXUBC this doesn't look at flags or slp* */
870: mutex_enter(&vp->v_interlock);
871: error = VOP_PUTPAGES(vp, 0, 0, flushflags);
872: if (error) {
873: return error;
874: }
875:
876: if (flags & V_SAVE) {
877: error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
878: if (error)
879: return (error);
880: KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
881: }
882:
883: mutex_enter(&bufcache_lock);
884: restart:
885: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
886: nbp = LIST_NEXT(bp, b_vnbufs);
887: error = bbusy(bp, catch, slptimeo, NULL);
888: if (error != 0) {
889: if (error == EPASSTHROUGH)
890: goto restart;
891: mutex_exit(&bufcache_lock);
892: return (error);
893: }
894: brelsel(bp, BC_INVAL | BC_VFLUSH);
895: }
896:
897: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
898: nbp = LIST_NEXT(bp, b_vnbufs);
899: error = bbusy(bp, catch, slptimeo, NULL);
900: if (error != 0) {
901: if (error == EPASSTHROUGH)
902: goto restart;
903: mutex_exit(&bufcache_lock);
904: return (error);
905: }
906: /*
907: * XXX Since there are no node locks for NFS, I believe
908: * there is a slight chance that a delayed write will
909: * occur while sleeping just above, so check for it.
910: */
911: if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
912: #ifdef DEBUG
913: printf("buffer still DELWRI\n");
914: #endif
915: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
916: mutex_exit(&bufcache_lock);
917: VOP_BWRITE(bp);
918: mutex_enter(&bufcache_lock);
919: goto restart;
920: }
921: brelsel(bp, BC_INVAL | BC_VFLUSH);
922: }
923:
924: #ifdef DIAGNOSTIC
925: if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
926: panic("vinvalbuf: flush failed, vp %p", vp);
927: #endif
928:
929: mutex_exit(&bufcache_lock);
930:
931: return (0);
932: }
933:
934: /*
935: * Destroy any in core blocks past the truncation length.
936: * Called with the underlying vnode locked, which should prevent new dirty
937: * buffers from being queued.
938: */
939: int
940: vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
941: {
942: struct buf *bp, *nbp;
943: int error;
944: voff_t off;
945:
946: off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
947: mutex_enter(&vp->v_interlock);
948: error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
949: if (error) {
950: return error;
951: }
952:
953: mutex_enter(&bufcache_lock);
954: restart:
955: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
956: nbp = LIST_NEXT(bp, b_vnbufs);
957: if (bp->b_lblkno < lbn)
958: continue;
959: error = bbusy(bp, catch, slptimeo, NULL);
960: if (error != 0) {
961: if (error == EPASSTHROUGH)
962: goto restart;
963: mutex_exit(&bufcache_lock);
964: return (error);
965: }
966: brelsel(bp, BC_INVAL | BC_VFLUSH);
967: }
968:
969: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
970: nbp = LIST_NEXT(bp, b_vnbufs);
971: if (bp->b_lblkno < lbn)
972: continue;
973: error = bbusy(bp, catch, slptimeo, NULL);
974: if (error != 0) {
975: if (error == EPASSTHROUGH)
976: goto restart;
977: mutex_exit(&bufcache_lock);
978: return (error);
979: }
980: brelsel(bp, BC_INVAL | BC_VFLUSH);
981: }
982: mutex_exit(&bufcache_lock);
983:
984: return (0);
985: }
986:
987: /*
988: * Flush all dirty buffers from a vnode.
989: * Called with the underlying vnode locked, which should prevent new dirty
990: * buffers from being queued.
991: */
992: void
993: vflushbuf(struct vnode *vp, int sync)
994: {
995: struct buf *bp, *nbp;
996: int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
997: bool dirty;
998:
999: mutex_enter(&vp->v_interlock);
1000: (void) VOP_PUTPAGES(vp, 0, 0, flags);
1001:
1002: loop:
1003: mutex_enter(&bufcache_lock);
1004: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
1005: nbp = LIST_NEXT(bp, b_vnbufs);
1006: if ((bp->b_cflags & BC_BUSY))
1007: continue;
1008: if ((bp->b_oflags & BO_DELWRI) == 0)
1009: panic("vflushbuf: not dirty, bp %p", bp);
1010: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
1011: mutex_exit(&bufcache_lock);
1012: /*
1013: * Wait for I/O associated with indirect blocks to complete,
1014: * since there is no way to quickly wait for them below.
1015: */
1016: if (bp->b_vp == vp || sync == 0)
1017: (void) bawrite(bp);
1018: else
1019: (void) bwrite(bp);
1020: goto loop;
1021: }
1022: mutex_exit(&bufcache_lock);
1023:
1024: if (sync == 0)
1025: return;
1026:
1027: mutex_enter(&vp->v_interlock);
1028: while (vp->v_numoutput != 0)
1029: cv_wait(&vp->v_cv, &vp->v_interlock);
1030: dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
1031: mutex_exit(&vp->v_interlock);
1032:
1033: if (dirty) {
1034: vprint("vflushbuf: dirty", vp);
1035: goto loop;
1036: }
1037: }
1038:
1039: /*
1.29 cgd 1040: * Create a vnode for a block device.
1.59 thorpej 1041: * Used for root filesystem and swap areas.
1.29 cgd 1042: * Also used for memory file system special devices.
1043: */
1.50 christos 1044: int
1.309 ad 1045: bdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 1046: {
1.30 mycroft 1047:
1048: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 1049: }
1050:
1051: /*
1052: * Create a vnode for a character device.
1053: * Used for kernfs and some console handling.
1054: */
1.50 christos 1055: int
1.309 ad 1056: cdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 1057: {
1.30 mycroft 1058:
1059: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 1060: }
1061:
1062: /*
1.353 pooka 1063: * Associate a buffer with a vnode. There must already be a hold on
1064: * the vnode.
1065: */
1066: void
1067: bgetvp(struct vnode *vp, struct buf *bp)
1068: {
1069:
1070: KASSERT(bp->b_vp == NULL);
1071: KASSERT(bp->b_objlock == &buffer_lock);
1072: KASSERT(mutex_owned(&vp->v_interlock));
1073: KASSERT(mutex_owned(&bufcache_lock));
1074: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1075: KASSERT(!cv_has_waiters(&bp->b_done));
1076:
1077: vholdl(vp);
1078: bp->b_vp = vp;
1079: if (vp->v_type == VBLK || vp->v_type == VCHR)
1080: bp->b_dev = vp->v_rdev;
1081: else
1082: bp->b_dev = NODEV;
1083:
1084: /*
1085: * Insert onto list for new vnode.
1086: */
1087: bufinsvn(bp, &vp->v_cleanblkhd);
1088: bp->b_objlock = &vp->v_interlock;
1089: }
1090:
1091: /*
1092: * Disassociate a buffer from a vnode.
1093: */
1094: void
1095: brelvp(struct buf *bp)
1096: {
1097: struct vnode *vp = bp->b_vp;
1098:
1099: KASSERT(vp != NULL);
1100: KASSERT(bp->b_objlock == &vp->v_interlock);
1101: KASSERT(mutex_owned(&vp->v_interlock));
1102: KASSERT(mutex_owned(&bufcache_lock));
1103: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1104: KASSERT(!cv_has_waiters(&bp->b_done));
1105:
1106: /*
1107: * Delete from old vnode list, if on one.
1108: */
1109: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1110: bufremvn(bp);
1111:
1112: if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) &&
1113: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1114: vp->v_iflag &= ~VI_WRMAPDIRTY;
1115: vn_syncer_remove_from_worklist(vp);
1116: }
1117:
1118: bp->b_objlock = &buffer_lock;
1119: bp->b_vp = NULL;
1120: holdrelel(vp);
1121: }
1122:
1123: /*
1124: * Reassign a buffer from one vnode list to another.
1125: * The list reassignment must be within the same vnode.
1126: * Used to assign file specific control information
1127: * (indirect blocks) to the list to which they belong.
1128: */
1129: void
1130: reassignbuf(struct buf *bp, struct vnode *vp)
1131: {
1132: struct buflists *listheadp;
1133: int delayx;
1134:
1135: KASSERT(mutex_owned(&bufcache_lock));
1136: KASSERT(bp->b_objlock == &vp->v_interlock);
1137: KASSERT(mutex_owned(&vp->v_interlock));
1138: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1139:
1140: /*
1141: * Delete from old vnode list, if on one.
1142: */
1143: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1144: bufremvn(bp);
1145:
1146: /*
1147: * If dirty, put on list of dirty buffers;
1148: * otherwise insert onto list of clean buffers.
1149: */
1150: if ((bp->b_oflags & BO_DELWRI) == 0) {
1151: listheadp = &vp->v_cleanblkhd;
1152: if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
1153: (vp->v_iflag & VI_ONWORKLST) &&
1154: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1155: vp->v_iflag &= ~VI_WRMAPDIRTY;
1156: vn_syncer_remove_from_worklist(vp);
1157: }
1158: } else {
1159: listheadp = &vp->v_dirtyblkhd;
1160: if ((vp->v_iflag & VI_ONWORKLST) == 0) {
1161: switch (vp->v_type) {
1162: case VDIR:
1163: delayx = dirdelay;
1164: break;
1165: case VBLK:
1166: if (vp->v_specmountpoint != NULL) {
1167: delayx = metadelay;
1168: break;
1169: }
1170: /* fall through */
1171: default:
1172: delayx = filedelay;
1173: break;
1174: }
1175: if (!vp->v_mount ||
1176: (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1177: vn_syncer_add_to_worklist(vp, delayx);
1178: }
1179: }
1180: bufinsvn(bp, listheadp);
1181: }
1182:
1183: /*
1.29 cgd 1184: * Create a vnode for a device.
1185: * Used by bdevvp (block device) for root file system etc.,
1186: * and by cdevvp (character device) for console and kernfs.
1187: */
1.260 yamt 1188: static int
1.309 ad 1189: getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
1.29 cgd 1190: {
1.309 ad 1191: vnode_t *vp;
1192: vnode_t *nvp;
1.29 cgd 1193: int error;
1194:
1.80 fvdl 1195: if (dev == NODEV) {
1.302 ad 1196: *vpp = NULL;
1.29 cgd 1197: return (0);
1.80 fvdl 1198: }
1.50 christos 1199: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 1200: if (error) {
1.302 ad 1201: *vpp = NULL;
1.29 cgd 1202: return (error);
1203: }
1204: vp = nvp;
1205: vp->v_type = type;
1.309 ad 1206: vp->v_vflag |= VV_MPSAFE;
1.297 pooka 1207: uvm_vnp_setsize(vp, 0);
1.318 ad 1208: spec_node_init(vp, dev);
1.29 cgd 1209: *vpp = vp;
1210: return (0);
1211: }
1212:
1213: /*
1.349 ad 1214: * Try to gain a reference to a vnode, without acquiring its interlock.
1215: * The caller must hold a lock that will prevent the vnode from being
1216: * recycled or freed.
1217: */
1218: bool
1219: vtryget(vnode_t *vp)
1220: {
1221: u_int use, next;
1222:
1223: /*
1224: * If the vnode is being freed, don't make life any harder
1225: * for vclean() by adding another reference without waiting.
1226: * This is not strictly necessary, but we'll do it anyway.
1227: */
1228: if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) {
1229: return false;
1230: }
1231: for (use = vp->v_usecount;; use = next) {
1232: if (use == 0) {
1233: /* Need interlock held if first reference. */
1234: return false;
1235: }
1236: next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
1237: if (__predict_true(next == use)) {
1238: return true;
1239: }
1240: }
1241: }
1242:
1243: /*
1.29 cgd 1244: * Grab a particular vnode from the free list, increment its
1.83 fvdl 1245: * reference count and lock it. If the vnode lock bit is set the
1246: * vnode is being eliminated in vgone. In that case, we can not
1247: * grab the vnode, so the process is awakened when the transition is
1248: * completed, and an error returned to indicate that the vnode is no
1249: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 1250: */
1.30 mycroft 1251: int
1.309 ad 1252: vget(vnode_t *vp, int flags)
1.29 cgd 1253: {
1.175 perseant 1254: int error;
1.29 cgd 1255:
1.309 ad 1256: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1257:
1258: if ((flags & LK_INTERLOCK) == 0)
1259: mutex_enter(&vp->v_interlock);
1260:
1261: /*
1262: * Before adding a reference, we must remove the vnode
1263: * from its freelist.
1264: */
1265: if (vp->v_usecount == 0) {
1.350 ad 1266: vremfree(vp);
1.346 ad 1267: vp->v_usecount = 1;
1268: } else {
1269: atomic_inc_uint(&vp->v_usecount);
1.309 ad 1270: }
1271:
1.30 mycroft 1272: /*
1273: * If the vnode is in the process of being cleaned out for
1274: * another use, we wait for the cleaning to finish and then
1.312 ad 1275: * return failure. Cleaning is determined by checking if
1276: * the VI_XLOCK or VI_FREEING flags are set.
1.80 fvdl 1277: */
1.312 ad 1278: if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
1.313 ad 1279: if ((flags & LK_NOWAIT) != 0) {
1.324 pooka 1280: vrelel(vp, 0);
1.142 chs 1281: return EBUSY;
1282: }
1.312 ad 1283: vwait(vp, VI_XLOCK | VI_FREEING);
1.324 pooka 1284: vrelel(vp, 0);
1.313 ad 1285: return ENOENT;
1.29 cgd 1286: }
1.80 fvdl 1287: if (flags & LK_TYPE_MASK) {
1.313 ad 1288: error = vn_lock(vp, flags | LK_INTERLOCK);
1289: if (error != 0) {
1.257 yamt 1290: vrele(vp);
1.113 fvdl 1291: }
1.313 ad 1292: return error;
1.80 fvdl 1293: }
1.309 ad 1294: mutex_exit(&vp->v_interlock);
1.313 ad 1295: return 0;
1.29 cgd 1296: }
1297:
1298: /*
1299: * vput(), just unlock and vrele()
1300: */
1301: void
1.309 ad 1302: vput(vnode_t *vp)
1.29 cgd 1303: {
1.30 mycroft 1304:
1.309 ad 1305: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1306:
1307: VOP_UNLOCK(vp, 0);
1308: vrele(vp);
1.29 cgd 1309: }
1310:
1311: /*
1.346 ad 1312: * Try to drop reference on a vnode. Abort if we are releasing the
1.359 ad 1313: * last reference. Note: this _must_ succeed if not the last reference.
1.346 ad 1314: */
1315: static inline bool
1316: vtryrele(vnode_t *vp)
1317: {
1318: u_int use, next;
1319:
1320: for (use = vp->v_usecount;; use = next) {
1321: if (use == 1) {
1322: return false;
1323: }
1324: next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
1325: if (__predict_true(next == use)) {
1326: return true;
1327: }
1328: }
1329: }
1330:
1331: /*
1.309 ad 1332: * Vnode release. If reference count drops to zero, call inactive
1333: * routine and either return to freelist or free to the pool.
1.29 cgd 1334: */
1.309 ad 1335: void
1.324 pooka 1336: vrelel(vnode_t *vp, int flags)
1.29 cgd 1337: {
1.309 ad 1338: bool recycle, defer;
1339: int error;
1340:
1341: KASSERT(mutex_owned(&vp->v_interlock));
1342: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.315 ad 1343: KASSERT(vp->v_freelisthd == NULL);
1.29 cgd 1344:
1.359 ad 1345: if (__predict_false(vp->v_op == dead_vnodeop_p &&
1346: (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
1.309 ad 1347: vpanic(vp, "dead but not clean");
1348: }
1349:
1350: /*
1351: * If not the last reference, just drop the reference count
1352: * and unlock.
1353: */
1.346 ad 1354: if (vtryrele(vp)) {
1.309 ad 1355: vp->v_iflag |= VI_INACTREDO;
1356: mutex_exit(&vp->v_interlock);
1.29 cgd 1357: return;
1.80 fvdl 1358: }
1.309 ad 1359: if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
1.359 ad 1360: vpanic(vp, "vrelel: bad ref count");
1.29 cgd 1361: }
1.309 ad 1362:
1.359 ad 1363: KASSERT((vp->v_iflag & VI_XLOCK) == 0);
1364:
1.30 mycroft 1365: /*
1.309 ad 1366: * If not clean, deactivate the vnode, but preserve
1367: * our reference across the call to VOP_INACTIVE().
1.30 mycroft 1368: */
1.309 ad 1369: retry:
1370: if ((vp->v_iflag & VI_CLEAN) == 0) {
1371: recycle = false;
1.346 ad 1372: vp->v_iflag |= VI_INACTNOW;
1373:
1.309 ad 1374: /*
1375: * XXX This ugly block can be largely eliminated if
1376: * locking is pushed down into the file systems.
1377: */
1378: if (curlwp == uvm.pagedaemon_lwp) {
1379: /* The pagedaemon can't wait around; defer. */
1380: defer = true;
1381: } else if (curlwp == vrele_lwp) {
1382: /* We have to try harder. */
1383: vp->v_iflag &= ~VI_INACTREDO;
1384: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1385: LK_RETRY);
1386: if (error != 0) {
1387: /* XXX */
1388: vpanic(vp, "vrele: unable to lock %p");
1389: }
1390: defer = false;
1391: } else if ((vp->v_iflag & VI_LAYER) != 0) {
1392: /*
1393: * Acquiring the stack's lock in vclean() even
1394: * for an honest vput/vrele is dangerous because
1395: * our caller may hold other vnode locks; defer.
1396: */
1397: defer = true;
1398: } else {
1399: /* If we can't acquire the lock, then defer. */
1400: vp->v_iflag &= ~VI_INACTREDO;
1401: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1402: LK_NOWAIT);
1403: if (error != 0) {
1404: defer = true;
1405: mutex_enter(&vp->v_interlock);
1406: } else {
1407: defer = false;
1408: }
1409: }
1410:
1411: if (defer) {
1412: /*
1413: * Defer reclaim to the kthread; it's not safe to
1414: * clean it here. We donate it our last reference.
1415: */
1416: KASSERT(mutex_owned(&vp->v_interlock));
1417: KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
1.346 ad 1418: vp->v_iflag &= ~VI_INACTNOW;
1.309 ad 1419: vp->v_iflag |= VI_INACTPEND;
1420: mutex_enter(&vrele_lock);
1421: TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
1422: if (++vrele_pending > (desiredvnodes >> 8))
1423: cv_signal(&vrele_cv);
1424: mutex_exit(&vrele_lock);
1425: mutex_exit(&vp->v_interlock);
1426: return;
1427: }
1428:
1.318 ad 1429: #ifdef DIAGNOSTIC
1.321 ad 1430: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1431: vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1.318 ad 1432: vprint("vrelel: missing VOP_CLOSE()", vp);
1433: }
1434: #endif
1435:
1.309 ad 1436: /*
1.312 ad 1437: * The vnode can gain another reference while being
1438: * deactivated. If VOP_INACTIVE() indicates that
1439: * the described file has been deleted, then recycle
1440: * the vnode irrespective of additional references.
1441: * Another thread may be waiting to re-use the on-disk
1442: * inode.
1443: *
1444: * Note that VOP_INACTIVE() will drop the vnode lock.
1.309 ad 1445: */
1446: VOP_INACTIVE(vp, &recycle);
1447: mutex_enter(&vp->v_interlock);
1.346 ad 1448: vp->v_iflag &= ~VI_INACTNOW;
1.312 ad 1449: if (!recycle) {
1.346 ad 1450: if (vtryrele(vp)) {
1.312 ad 1451: mutex_exit(&vp->v_interlock);
1452: return;
1453: }
1.309 ad 1454:
1.312 ad 1455: /*
1456: * If we grew another reference while
1457: * VOP_INACTIVE() was underway, retry.
1458: */
1459: if ((vp->v_iflag & VI_INACTREDO) != 0) {
1460: goto retry;
1461: }
1.309 ad 1462: }
1463:
1464: /* Take care of space accounting. */
1465: if (vp->v_iflag & VI_EXECMAP) {
1466: atomic_add_int(&uvmexp.execpages,
1467: -vp->v_uobj.uo_npages);
1468: atomic_add_int(&uvmexp.filepages,
1469: vp->v_uobj.uo_npages);
1470: }
1.346 ad 1471: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
1.309 ad 1472: vp->v_vflag &= ~VV_MAPPED;
1473:
1474: /*
1475: * Recycle the vnode if the file is now unused (unlinked),
1476: * otherwise just free it.
1477: */
1478: if (recycle) {
1479: vclean(vp, DOCLOSE);
1480: }
1481: KASSERT(vp->v_usecount > 0);
1.298 pooka 1482: }
1.309 ad 1483:
1.346 ad 1484: if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
1.309 ad 1485: /* Gained another reference while being reclaimed. */
1486: mutex_exit(&vp->v_interlock);
1487: return;
1.147 chs 1488: }
1.298 pooka 1489:
1.309 ad 1490: if ((vp->v_iflag & VI_CLEAN) != 0) {
1491: /*
1492: * It's clean so destroy it. It isn't referenced
1493: * anywhere since it has been reclaimed.
1494: */
1495: KASSERT(vp->v_holdcnt == 0);
1496: KASSERT(vp->v_writecount == 0);
1497: mutex_exit(&vp->v_interlock);
1498: insmntque(vp, NULL);
1.318 ad 1499: if (vp->v_type == VBLK || vp->v_type == VCHR) {
1500: spec_node_destroy(vp);
1501: }
1.310 pooka 1502: vnfree(vp);
1.298 pooka 1503: } else {
1.309 ad 1504: /*
1505: * Otherwise, put it back onto the freelist. It
1506: * can't be destroyed while still associated with
1507: * a file system.
1508: */
1509: mutex_enter(&vnode_free_list_lock);
1510: if (vp->v_holdcnt > 0) {
1511: vp->v_freelisthd = &vnode_hold_list;
1512: } else {
1513: vp->v_freelisthd = &vnode_free_list;
1514: }
1515: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1516: mutex_exit(&vnode_free_list_lock);
1517: mutex_exit(&vp->v_interlock);
1.298 pooka 1518: }
1519: }
1520:
1521: void
1.309 ad 1522: vrele(vnode_t *vp)
1.298 pooka 1523: {
1524:
1.309 ad 1525: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1526:
1.346 ad 1527: if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
1528: return;
1529: }
1.309 ad 1530: mutex_enter(&vp->v_interlock);
1.324 pooka 1531: vrelel(vp, 0);
1.298 pooka 1532: }
1533:
1.309 ad 1534: static void
1535: vrele_thread(void *cookie)
1.298 pooka 1536: {
1.309 ad 1537: vnode_t *vp;
1.298 pooka 1538:
1.309 ad 1539: for (;;) {
1540: mutex_enter(&vrele_lock);
1541: while (TAILQ_EMPTY(&vrele_list)) {
1.351 ad 1542: vrele_gen++;
1543: cv_broadcast(&vrele_cv);
1.309 ad 1544: cv_timedwait(&vrele_cv, &vrele_lock, hz);
1545: }
1546: vp = TAILQ_FIRST(&vrele_list);
1547: TAILQ_REMOVE(&vrele_list, vp, v_freelist);
1548: vrele_pending--;
1549: mutex_exit(&vrele_lock);
1550:
1551: /*
1552: * If not the last reference, then ignore the vnode
1553: * and look for more work.
1554: */
1555: mutex_enter(&vp->v_interlock);
1556: KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
1557: vp->v_iflag &= ~VI_INACTPEND;
1.324 pooka 1558: vrelel(vp, 0);
1.309 ad 1559: }
1.29 cgd 1560: }
1561:
1562: /*
1563: * Page or buffer structure gets a reference.
1.258 chs 1564: * Called with v_interlock held.
1.29 cgd 1565: */
1.30 mycroft 1566: void
1.309 ad 1567: vholdl(vnode_t *vp)
1.29 cgd 1568: {
1569:
1.309 ad 1570: KASSERT(mutex_owned(&vp->v_interlock));
1571: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1572:
1573: if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
1574: mutex_enter(&vnode_free_list_lock);
1575: KASSERT(vp->v_freelisthd == &vnode_free_list);
1576: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1577: vp->v_freelisthd = &vnode_hold_list;
1578: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1579: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1580: }
1.29 cgd 1581: }
1582:
1583: /*
1584: * Page or buffer structure frees a reference.
1.258 chs 1585: * Called with v_interlock held.
1.29 cgd 1586: */
1.30 mycroft 1587: void
1.309 ad 1588: holdrelel(vnode_t *vp)
1.29 cgd 1589: {
1590:
1.309 ad 1591: KASSERT(mutex_owned(&vp->v_interlock));
1592: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.142 chs 1593:
1.309 ad 1594: if (vp->v_holdcnt <= 0) {
1595: vpanic(vp, "holdrelel: holdcnt vp %p");
1596: }
1.142 chs 1597:
1.309 ad 1598: vp->v_holdcnt--;
1599: if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1600: mutex_enter(&vnode_free_list_lock);
1601: KASSERT(vp->v_freelisthd == &vnode_hold_list);
1602: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1603: vp->v_freelisthd = &vnode_free_list;
1604: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1605: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1606: }
1.81 ross 1607: }
1608:
1609: /*
1.309 ad 1610: * Vnode reference, where a reference is already held by some other
1611: * object (for example, a file structure).
1.81 ross 1612: */
1613: void
1.309 ad 1614: vref(vnode_t *vp)
1.81 ross 1615: {
1616:
1.309 ad 1617: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.346 ad 1618: KASSERT(vp->v_usecount != 0);
1.309 ad 1619:
1.346 ad 1620: atomic_inc_uint(&vp->v_usecount);
1.29 cgd 1621: }
1622:
1623: /*
1624: * Remove any vnodes in the vnode table belonging to mount point mp.
1625: *
1.183 yamt 1626: * If FORCECLOSE is not specified, there should not be any active ones,
1.29 cgd 1627: * return error if any are found (nb: this is a user error, not a
1.183 yamt 1628: * system error). If FORCECLOSE is specified, detach any active vnodes
1.29 cgd 1629: * that are found.
1.183 yamt 1630: *
1631: * If WRITECLOSE is set, only flush out regular file vnodes open for
1632: * writing.
1633: *
1634: * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29 cgd 1635: */
1.30 mycroft 1636: #ifdef DEBUG
1637: int busyprt = 0; /* print out busy vnodes */
1638: struct ctldebug debug1 = { "busyprt", &busyprt };
1639: #endif
1.29 cgd 1640:
1.334 ad 1641: static vnode_t *
1642: vflushnext(vnode_t *mvp, int *when)
1643: {
1644:
1645: if (hardclock_ticks > *when) {
1646: mutex_exit(&mntvnode_lock);
1647: yield();
1648: mutex_enter(&mntvnode_lock);
1649: *when = hardclock_ticks + hz / 10;
1650: }
1651:
1652: return vunmark(mvp);
1653: }
1654:
1.50 christos 1655: int
1.309 ad 1656: vflush(struct mount *mp, vnode_t *skipvp, int flags)
1.29 cgd 1657: {
1.309 ad 1658: vnode_t *vp, *mvp;
1.351 ad 1659: int busy = 0, when = 0, gen;
1660:
1661: /*
1662: * First, flush out any vnode references from vrele_list.
1663: */
1664: mutex_enter(&vrele_lock);
1665: gen = vrele_gen;
1.352 pooka 1666: while (vrele_pending && gen == vrele_gen) {
1.351 ad 1667: cv_broadcast(&vrele_cv);
1668: cv_wait(&vrele_cv, &vrele_lock);
1.352 pooka 1669: }
1.351 ad 1670: mutex_exit(&vrele_lock);
1.29 cgd 1671:
1.309 ad 1672: /* Allocate a marker vnode. */
1.310 pooka 1673: if ((mvp = vnalloc(mp)) == NULL)
1.309 ad 1674: return (ENOMEM);
1675:
1.273 reinoud 1676: /*
1677: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1678: * and vclean() are called
1679: */
1.351 ad 1680: mutex_enter(&mntvnode_lock);
1.334 ad 1681: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
1682: vp = vflushnext(mvp, &when)) {
1.309 ad 1683: vmark(mvp, vp);
1684: if (vp->v_mount != mp || vismarker(vp))
1685: continue;
1.29 cgd 1686: /*
1687: * Skip over a selected vnode.
1688: */
1689: if (vp == skipvp)
1690: continue;
1.309 ad 1691: mutex_enter(&vp->v_interlock);
1.29 cgd 1692: /*
1.315 ad 1693: * Ignore clean but still referenced vnodes.
1694: */
1695: if ((vp->v_iflag & VI_CLEAN) != 0) {
1696: mutex_exit(&vp->v_interlock);
1697: continue;
1698: }
1699: /*
1.309 ad 1700: * Skip over a vnodes marked VSYSTEM.
1.29 cgd 1701: */
1.302 ad 1702: if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1.309 ad 1703: mutex_exit(&vp->v_interlock);
1.29 cgd 1704: continue;
1.80 fvdl 1705: }
1.29 cgd 1706: /*
1.30 mycroft 1707: * If WRITECLOSE is set, only flush out regular file
1708: * vnodes open for writing.
1709: */
1710: if ((flags & WRITECLOSE) &&
1.92 thorpej 1711: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1.309 ad 1712: mutex_exit(&vp->v_interlock);
1.30 mycroft 1713: continue;
1.92 thorpej 1714: }
1.30 mycroft 1715: /*
1.29 cgd 1716: * With v_usecount == 0, all we need to do is clear
1717: * out the vnode data structures and we are done.
1718: */
1719: if (vp->v_usecount == 0) {
1.309 ad 1720: mutex_exit(&mntvnode_lock);
1721: vremfree(vp);
1.350 ad 1722: vp->v_usecount = 1;
1.309 ad 1723: vclean(vp, DOCLOSE);
1.324 pooka 1724: vrelel(vp, 0);
1.309 ad 1725: mutex_enter(&mntvnode_lock);
1.29 cgd 1726: continue;
1727: }
1728: /*
1.30 mycroft 1729: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 1730: * For block or character devices, revert to an
1.318 ad 1731: * anonymous device. For all other files, just
1732: * kill them.
1.29 cgd 1733: */
1734: if (flags & FORCECLOSE) {
1.309 ad 1735: mutex_exit(&mntvnode_lock);
1.346 ad 1736: atomic_inc_uint(&vp->v_usecount);
1.29 cgd 1737: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.309 ad 1738: vclean(vp, DOCLOSE);
1.324 pooka 1739: vrelel(vp, 0);
1.29 cgd 1740: } else {
1.309 ad 1741: vclean(vp, 0);
1.318 ad 1742: vp->v_op = spec_vnodeop_p; /* XXXSMP */
1.320 ad 1743: mutex_exit(&vp->v_interlock);
1744: /*
1745: * The vnode isn't clean, but still resides
1746: * on the mount list. Remove it. XXX This
1747: * is a bit dodgy.
1748: */
1749: insmntque(vp, NULL);
1750: vrele(vp);
1.29 cgd 1751: }
1.309 ad 1752: mutex_enter(&mntvnode_lock);
1.29 cgd 1753: continue;
1754: }
1.30 mycroft 1755: #ifdef DEBUG
1.29 cgd 1756: if (busyprt)
1757: vprint("vflush: busy vnode", vp);
1.30 mycroft 1758: #endif
1.309 ad 1759: mutex_exit(&vp->v_interlock);
1.29 cgd 1760: busy++;
1761: }
1.309 ad 1762: mutex_exit(&mntvnode_lock);
1.310 pooka 1763: vnfree(mvp);
1.29 cgd 1764: if (busy)
1765: return (EBUSY);
1766: return (0);
1767: }
1768:
1769: /*
1770: * Disassociate the underlying file system from a vnode.
1.309 ad 1771: *
1772: * Must be called with the interlock held, and will return with it held.
1.29 cgd 1773: */
1.309 ad 1774: void
1775: vclean(vnode_t *vp, int flags)
1.29 cgd 1776: {
1.309 ad 1777: lwp_t *l = curlwp;
1778: bool recycle, active;
1.318 ad 1779: int error;
1.29 cgd 1780:
1.309 ad 1781: KASSERT(mutex_owned(&vp->v_interlock));
1782: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1783: KASSERT(vp->v_usecount != 0);
1.166 chs 1784:
1.309 ad 1785: /* If cleaning is already in progress wait until done and return. */
1786: if (vp->v_iflag & VI_XLOCK) {
1787: vwait(vp, VI_XLOCK);
1788: return;
1789: }
1.166 chs 1790:
1.309 ad 1791: /* If already clean, nothing to do. */
1792: if ((vp->v_iflag & VI_CLEAN) != 0) {
1793: return;
1.112 mycroft 1794: }
1.87 pk 1795:
1.29 cgd 1796: /*
1.309 ad 1797: * Prevent the vnode from being recycled or brought into use
1798: * while we clean it out.
1.29 cgd 1799: */
1.302 ad 1800: vp->v_iflag |= VI_XLOCK;
1801: if (vp->v_iflag & VI_EXECMAP) {
1.307 ad 1802: atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1803: atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1.147 chs 1804: }
1.302 ad 1805: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1.309 ad 1806: active = (vp->v_usecount > 1);
1.142 chs 1807:
1.309 ad 1808: /* XXXAD should not lock vnode under layer */
1809: VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1.80 fvdl 1810:
1.98 wrstuden 1811: /*
1.142 chs 1812: * Clean out any cached data associated with the vnode.
1.318 ad 1813: * If purging an active vnode, it must be closed and
1814: * deactivated before being reclaimed. Note that the
1815: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1816: */
1.166 chs 1817: if (flags & DOCLOSE) {
1.256 christos 1818: error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.355 simonb 1819: if (error != 0) {
1820: /* XXX, fix vn_start_write's grab of mp and use that. */
1821:
1822: if (wapbl_vphaswapbl(vp))
1823: WAPBL_DISCARD(wapbl_vptomp(vp));
1.256 christos 1824: error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.355 simonb 1825: }
1.211 dbj 1826: KASSERT(error == 0);
1.302 ad 1827: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.318 ad 1828: if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1829: spec_node_revoke(vp);
1.231 mycroft 1830: }
1.166 chs 1831: }
1.29 cgd 1832: if (active) {
1.309 ad 1833: VOP_INACTIVE(vp, &recycle);
1.80 fvdl 1834: } else {
1835: /*
1836: * Any other processes trying to obtain this lock must first
1.302 ad 1837: * wait for VI_XLOCK to clear, then call the new lock operation.
1.80 fvdl 1838: */
1839: VOP_UNLOCK(vp, 0);
1.29 cgd 1840: }
1.142 chs 1841:
1.309 ad 1842: /* Disassociate the underlying file system from the vnode. */
1843: if (VOP_RECLAIM(vp)) {
1844: vpanic(vp, "vclean: cannot reclaim");
1.87 pk 1845: }
1.30 mycroft 1846:
1.169 chs 1847: KASSERT(vp->v_uobj.uo_npages == 0);
1.255 yamt 1848: if (vp->v_type == VREG && vp->v_ractx != NULL) {
1849: uvm_ra_freectx(vp->v_ractx);
1850: vp->v_ractx = NULL;
1851: }
1.80 fvdl 1852: cache_purge(vp);
1853:
1.309 ad 1854: /* Done with purge, notify sleepers of the grim news. */
1.360 ad 1855: mutex_enter(&vp->v_interlock);
1.30 mycroft 1856: vp->v_op = dead_vnodeop_p;
1857: vp->v_tag = VT_NON;
1.309 ad 1858: vp->v_vnlock = &vp->v_lock;
1.332 ad 1859: KNOTE(&vp->v_klist, NOTE_REVOKE);
1.312 ad 1860: vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1.304 ad 1861: vp->v_vflag &= ~VV_LOCKSWORK;
1.319 ad 1862: if ((flags & DOCLOSE) != 0) {
1.318 ad 1863: vp->v_iflag |= VI_CLEAN;
1864: }
1.309 ad 1865: cv_broadcast(&vp->v_cv);
1866:
1867: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.29 cgd 1868: }
1869:
1870: /*
1.80 fvdl 1871: * Recycle an unused vnode to the front of the free list.
1872: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1873: */
1.80 fvdl 1874: int
1.309 ad 1875: vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1.217 junyoung 1876: {
1877:
1.309 ad 1878: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1879:
1880: mutex_enter(&vp->v_interlock);
1881: if (vp->v_usecount != 0) {
1882: mutex_exit(&vp->v_interlock);
1883: return (0);
1.29 cgd 1884: }
1.309 ad 1885: if (inter_lkp)
1886: mutex_exit(inter_lkp);
1887: vremfree(vp);
1.350 ad 1888: vp->v_usecount = 1;
1.309 ad 1889: vclean(vp, DOCLOSE);
1.324 pooka 1890: vrelel(vp, 0);
1.309 ad 1891: return (1);
1.29 cgd 1892: }
1893:
1894: /*
1.309 ad 1895: * Eliminate all activity associated with a vnode in preparation for
1896: * reuse. Drops a reference from the vnode.
1.29 cgd 1897: */
1898: void
1.309 ad 1899: vgone(vnode_t *vp)
1.80 fvdl 1900: {
1.166 chs 1901:
1.309 ad 1902: mutex_enter(&vp->v_interlock);
1903: vclean(vp, DOCLOSE);
1.324 pooka 1904: vrelel(vp, 0);
1.29 cgd 1905: }
1906:
1907: /*
1908: * Lookup a vnode by device number.
1909: */
1.50 christos 1910: int
1.309 ad 1911: vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1.29 cgd 1912: {
1.309 ad 1913: vnode_t *vp;
1.80 fvdl 1914: int rc = 0;
1.29 cgd 1915:
1.363 pooka 1916: mutex_enter(&device_lock);
1.318 ad 1917: for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1.29 cgd 1918: if (dev != vp->v_rdev || type != vp->v_type)
1919: continue;
1920: *vpp = vp;
1.80 fvdl 1921: rc = 1;
1922: break;
1.29 cgd 1923: }
1.363 pooka 1924: mutex_exit(&device_lock);
1.80 fvdl 1925: return (rc);
1.96 thorpej 1926: }
1927:
1928: /*
1929: * Revoke all the vnodes corresponding to the specified minor number
1930: * range (endpoints inclusive) of the specified major.
1931: */
1932: void
1.247 thorpej 1933: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 1934: {
1.316 ad 1935: vnode_t *vp, **vpp;
1936: dev_t dev;
1.96 thorpej 1937: int mn;
1938:
1.274 mrg 1939: vp = NULL; /* XXX gcc */
1940:
1.363 pooka 1941: mutex_enter(&device_lock);
1.316 ad 1942: for (mn = minl; mn <= minh; mn++) {
1943: dev = makedev(maj, mn);
1.318 ad 1944: vpp = &specfs_hash[SPECHASH(dev)];
1.316 ad 1945: for (vp = *vpp; vp != NULL;) {
1946: mutex_enter(&vp->v_interlock);
1947: if ((vp->v_iflag & VI_CLEAN) != 0 ||
1948: dev != vp->v_rdev || type != vp->v_type) {
1949: mutex_exit(&vp->v_interlock);
1950: vp = vp->v_specnext;
1951: continue;
1952: }
1.363 pooka 1953: mutex_exit(&device_lock);
1.316 ad 1954: if (vget(vp, LK_INTERLOCK) == 0) {
1955: VOP_REVOKE(vp, REVOKEALL);
1956: vrele(vp);
1957: }
1.363 pooka 1958: mutex_enter(&device_lock);
1.316 ad 1959: vp = *vpp;
1960: }
1961: }
1.363 pooka 1962: mutex_exit(&device_lock);
1.29 cgd 1963: }
1964:
1965: /*
1966: * Calculate the total number of references to a special device.
1967: */
1.30 mycroft 1968: int
1.309 ad 1969: vcount(vnode_t *vp)
1.29 cgd 1970: {
1971: int count;
1972:
1.363 pooka 1973: mutex_enter(&device_lock);
1.309 ad 1974: mutex_enter(&vp->v_interlock);
1.318 ad 1975: if (vp->v_specnode == NULL) {
1.309 ad 1976: count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
1977: mutex_exit(&vp->v_interlock);
1.363 pooka 1978: mutex_exit(&device_lock);
1.309 ad 1979: return (count);
1980: }
1981: mutex_exit(&vp->v_interlock);
1.318 ad 1982: count = vp->v_specnode->sn_dev->sd_opencnt;
1.363 pooka 1983: mutex_exit(&device_lock);
1.29 cgd 1984: return (count);
1985: }
1986:
1.101 mrg 1987: /*
1.316 ad 1988: * Eliminate all activity associated with the requested vnode
1989: * and with all vnodes aliased to the requested vnode.
1990: */
1991: void
1992: vrevoke(vnode_t *vp)
1993: {
1994: vnode_t *vq, **vpp;
1995: enum vtype type;
1996: dev_t dev;
1997:
1998: KASSERT(vp->v_usecount > 0);
1999:
2000: mutex_enter(&vp->v_interlock);
2001: if ((vp->v_iflag & VI_CLEAN) != 0) {
2002: mutex_exit(&vp->v_interlock);
2003: return;
1.368 enami 2004: } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
2005: atomic_inc_uint(&vp->v_usecount);
2006: vclean(vp, DOCLOSE);
2007: vrelel(vp, 0);
2008: return;
1.316 ad 2009: } else {
2010: dev = vp->v_rdev;
2011: type = vp->v_type;
2012: mutex_exit(&vp->v_interlock);
2013: }
2014:
1.318 ad 2015: vpp = &specfs_hash[SPECHASH(dev)];
1.363 pooka 2016: mutex_enter(&device_lock);
1.316 ad 2017: for (vq = *vpp; vq != NULL;) {
1.333 ad 2018: /* If clean or being cleaned, then ignore it. */
2019: mutex_enter(&vq->v_interlock);
2020: if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
1.317 ad 2021: vq->v_rdev != dev || vq->v_type != type) {
1.333 ad 2022: mutex_exit(&vq->v_interlock);
1.316 ad 2023: vq = vq->v_specnext;
2024: continue;
2025: }
1.363 pooka 2026: mutex_exit(&device_lock);
1.350 ad 2027: if (vq->v_usecount == 0) {
1.317 ad 2028: vremfree(vq);
1.350 ad 2029: vq->v_usecount = 1;
2030: } else {
2031: atomic_inc_uint(&vq->v_usecount);
1.316 ad 2032: }
2033: vclean(vq, DOCLOSE);
1.324 pooka 2034: vrelel(vq, 0);
1.363 pooka 2035: mutex_enter(&device_lock);
1.316 ad 2036: vq = *vpp;
2037: }
1.363 pooka 2038: mutex_exit(&device_lock);
1.316 ad 2039: }
2040:
2041: /*
1.220 lukem 2042: * sysctl helper routine to return list of supported fstypes
2043: */
1.358 pooka 2044: int
1.220 lukem 2045: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2046: {
1.291 christos 2047: char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220 lukem 2048: char *where = oldp;
2049: struct vfsops *v;
2050: size_t needed, left, slen;
2051: int error, first;
2052:
2053: if (newp != NULL)
2054: return (EPERM);
2055: if (namelen != 0)
2056: return (EINVAL);
2057:
2058: first = 1;
2059: error = 0;
2060: needed = 0;
2061: left = *oldlenp;
2062:
1.311 ad 2063: sysctl_unlock();
1.302 ad 2064: mutex_enter(&vfs_list_lock);
1.220 lukem 2065: LIST_FOREACH(v, &vfs_list, vfs_list) {
2066: if (where == NULL)
2067: needed += strlen(v->vfs_name) + 1;
2068: else {
1.245 christos 2069: memset(bf, 0, sizeof(bf));
1.220 lukem 2070: if (first) {
1.245 christos 2071: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 2072: first = 0;
2073: } else {
1.245 christos 2074: bf[0] = ' ';
2075: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 2076: }
1.245 christos 2077: bf[sizeof(bf)-1] = '\0';
2078: slen = strlen(bf);
1.220 lukem 2079: if (left < slen + 1)
2080: break;
1.302 ad 2081: v->vfs_refcount++;
2082: mutex_exit(&vfs_list_lock);
1.354 pooka 2083: /* +1 to copy out the trailing NUL byte */
1.245 christos 2084: error = copyout(bf, where, slen + 1);
1.302 ad 2085: mutex_enter(&vfs_list_lock);
2086: v->vfs_refcount--;
1.220 lukem 2087: if (error)
2088: break;
2089: where += slen;
2090: needed += slen;
2091: left -= slen;
2092: }
2093: }
1.302 ad 2094: mutex_exit(&vfs_list_lock);
1.311 ad 2095: sysctl_relock();
1.220 lukem 2096: *oldlenp = needed;
2097: return (error);
2098: }
2099:
1.212 atatat 2100:
1.29 cgd 2101: int kinfo_vdebug = 1;
2102: int kinfo_vgetfailed;
2103: #define KINFO_VNODESLOP 10
2104: /*
2105: * Dump vnode list (via sysctl).
2106: * Copyout address of vnode followed by vnode.
2107: */
2108: /* ARGSUSED */
1.50 christos 2109: int
1.212 atatat 2110: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29 cgd 2111: {
1.212 atatat 2112: char *where = oldp;
2113: size_t *sizep = oldlenp;
1.80 fvdl 2114: struct mount *mp, *nmp;
1.311 ad 2115: vnode_t *vp, *mvp, vbuf;
1.80 fvdl 2116: char *bp = where, *savebp;
1.29 cgd 2117: char *ewhere;
2118: int error;
1.212 atatat 2119:
2120: if (namelen != 0)
2121: return (EOPNOTSUPP);
2122: if (newp != NULL)
2123: return (EPERM);
1.29 cgd 2124:
1.309 ad 2125: #define VPTRSZ sizeof(vnode_t *)
2126: #define VNODESZ sizeof(vnode_t)
1.29 cgd 2127: if (where == NULL) {
2128: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2129: return (0);
2130: }
2131: ewhere = where + *sizep;
1.80 fvdl 2132:
1.311 ad 2133: sysctl_unlock();
1.302 ad 2134: mutex_enter(&mountlist_lock);
1.177 matt 2135: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2136: mp = nmp) {
1.344 ad 2137: if (vfs_busy(mp, &nmp)) {
1.29 cgd 2138: continue;
1.80 fvdl 2139: }
1.29 cgd 2140: savebp = bp;
1.309 ad 2141: /* Allocate a marker vnode. */
1.375 elad 2142: mvp = vnalloc(mp);
2143: /* Should never fail for mp != NULL */
2144: KASSERT(mvp != NULL);
1.309 ad 2145: mutex_enter(&mntvnode_lock);
2146: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
2147: vmark(mvp, vp);
1.29 cgd 2148: /*
2149: * Check that the vp is still associated with
2150: * this filesystem. RACE: could have been
2151: * recycled onto the same filesystem.
2152: */
1.309 ad 2153: if (vp->v_mount != mp || vismarker(vp))
2154: continue;
1.29 cgd 2155: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.309 ad 2156: (void)vunmark(mvp);
2157: mutex_exit(&mntvnode_lock);
1.310 pooka 2158: vnfree(mvp);
1.311 ad 2159: sysctl_relock();
1.29 cgd 2160: *sizep = bp - where;
2161: return (ENOMEM);
2162: }
1.311 ad 2163: memcpy(&vbuf, vp, VNODESZ);
1.309 ad 2164: mutex_exit(&mntvnode_lock);
1.367 enami 2165: if ((error = copyout(&vp, bp, VPTRSZ)) ||
1.311 ad 2166: (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1.309 ad 2167: mutex_enter(&mntvnode_lock);
2168: (void)vunmark(mvp);
2169: mutex_exit(&mntvnode_lock);
1.310 pooka 2170: vnfree(mvp);
1.311 ad 2171: sysctl_relock();
1.29 cgd 2172: return (error);
1.309 ad 2173: }
1.29 cgd 2174: bp += VPTRSZ + VNODESZ;
1.309 ad 2175: mutex_enter(&mntvnode_lock);
1.29 cgd 2176: }
1.309 ad 2177: mutex_exit(&mntvnode_lock);
1.344 ad 2178: vnfree(mvp);
1.339 ad 2179: vfs_unbusy(mp, false, &nmp);
1.29 cgd 2180: }
1.302 ad 2181: mutex_exit(&mountlist_lock);
1.311 ad 2182: sysctl_relock();
1.29 cgd 2183:
2184: *sizep = bp - where;
2185: return (0);
1.30 mycroft 2186: }
2187:
2188: /*
1.309 ad 2189: * Remove clean vnodes from a mountpoint's vnode list.
2190: */
2191: void
2192: vfs_scrubvnlist(struct mount *mp)
2193: {
2194: vnode_t *vp, *nvp;
2195:
1.327 ad 2196: retry:
1.309 ad 2197: mutex_enter(&mntvnode_lock);
2198: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
2199: nvp = TAILQ_NEXT(vp, v_mntvnodes);
2200: mutex_enter(&vp->v_interlock);
1.315 ad 2201: if ((vp->v_iflag & VI_CLEAN) != 0) {
1.309 ad 2202: TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.315 ad 2203: vp->v_mount = NULL;
1.327 ad 2204: mutex_exit(&mntvnode_lock);
2205: mutex_exit(&vp->v_interlock);
1.344 ad 2206: vfs_destroy(mp);
1.327 ad 2207: goto retry;
1.315 ad 2208: }
1.309 ad 2209: mutex_exit(&vp->v_interlock);
2210: }
2211: mutex_exit(&mntvnode_lock);
2212: }
2213:
2214: /*
1.30 mycroft 2215: * Check to see if a filesystem is mounted on a block device.
2216: */
2217: int
1.309 ad 2218: vfs_mountedon(vnode_t *vp)
1.30 mycroft 2219: {
1.309 ad 2220: vnode_t *vq;
1.80 fvdl 2221: int error = 0;
1.30 mycroft 2222:
1.261 reinoud 2223: if (vp->v_type != VBLK)
2224: return ENOTBLK;
1.113 fvdl 2225: if (vp->v_specmountpoint != NULL)
1.30 mycroft 2226: return (EBUSY);
1.363 pooka 2227: mutex_enter(&device_lock);
1.318 ad 2228: for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
2229: vq = vq->v_specnext) {
2230: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
2231: continue;
2232: if (vq->v_specmountpoint != NULL) {
2233: error = EBUSY;
2234: break;
1.30 mycroft 2235: }
2236: }
1.363 pooka 2237: mutex_exit(&device_lock);
1.80 fvdl 2238: return (error);
1.30 mycroft 2239: }
2240:
1.35 ws 2241: /*
1.39 mycroft 2242: * Unmount all file systems.
2243: * We traverse the list in reverse order under the assumption that doing so
2244: * will avoid needing to worry about dependencies.
2245: */
1.371 dyoung 2246: bool
1.256 christos 2247: vfs_unmountall(struct lwp *l)
1.39 mycroft 2248: {
1.377 dyoung 2249: printf("unmounting file systems...");
2250: return vfs_unmountall1(l, true, true);
2251: }
2252:
2253: bool
2254: vfs_unmountall1(struct lwp *l, bool force, bool verbose)
2255: {
1.123 augustss 2256: struct mount *mp, *nmp;
1.371 dyoung 2257: bool any_error, progress;
2258: int error;
1.39 mycroft 2259:
1.371 dyoung 2260: for (any_error = false, mp = CIRCLEQ_LAST(&mountlist);
1.325 dyoung 2261: !CIRCLEQ_EMPTY(&mountlist);
2262: mp = nmp) {
2263: nmp = CIRCLEQ_PREV(mp, mnt_list);
1.54 jtk 2264: #ifdef DEBUG
1.235 lukem 2265: printf("\nunmounting %s (%s)...",
1.56 christos 2266: mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54 jtk 2267: #endif
1.344 ad 2268: atomic_inc_uint(&mp->mnt_refcnt);
1.377 dyoung 2269: if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0)
1.371 dyoung 2270: progress = true;
2271: else {
1.377 dyoung 2272: if (verbose) {
2273: printf("unmount of %s failed with error %d\n",
2274: mp->mnt_stat.f_mntonname, error);
2275: }
1.371 dyoung 2276: any_error = true;
1.40 mycroft 2277: }
1.39 mycroft 2278: }
1.377 dyoung 2279: if (verbose)
2280: printf(" done\n");
2281: if (any_error && verbose)
1.57 christos 2282: printf("WARNING: some file systems would not unmount\n");
1.371 dyoung 2283: return progress;
1.40 mycroft 2284: }
2285:
2286: /*
2287: * Sync and unmount file systems before shutting down.
2288: */
2289: void
1.247 thorpej 2290: vfs_shutdown(void)
1.40 mycroft 2291: {
1.265 skrll 2292: struct lwp *l;
1.40 mycroft 2293:
1.265 skrll 2294: /* XXX we're certainly not running in lwp0's context! */
1.377 dyoung 2295: l = (curlwp == NULL) ? &lwp0 : curlwp;
1.185 christos 2296:
1.70 cgd 2297: printf("syncing disks... ");
2298:
1.305 pooka 2299: /* remove user processes from run queue */
1.138 bouyer 2300: suspendsched();
1.40 mycroft 2301: (void) spl0();
2302:
1.128 sommerfe 2303: /* avoid coming back this way again if we panic. */
2304: doing_shutdown = 1;
2305:
1.184 thorpej 2306: sys_sync(l, NULL, NULL);
1.40 mycroft 2307:
2308: /* Wait for sync to finish. */
1.213 pk 2309: if (buf_syncwait() != 0) {
1.124 augustss 2310: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2311: Debugger();
2312: #endif
1.57 christos 2313: printf("giving up\n");
1.84 thorpej 2314: return;
1.73 thorpej 2315: } else
1.57 christos 2316: printf("done\n");
1.73 thorpej 2317:
1.84 thorpej 2318: /*
2319: * If we've panic'd, don't make the situation potentially
2320: * worse by unmounting the file systems.
2321: */
2322: if (panicstr != NULL)
2323: return;
2324:
2325: /* Release inodes held by texts before update. */
1.73 thorpej 2326: #ifdef notdef
1.84 thorpej 2327: vnshutdown();
1.73 thorpej 2328: #endif
1.84 thorpej 2329: /* Unmount file systems. */
1.256 christos 2330: vfs_unmountall(l);
1.58 thorpej 2331: }
2332:
2333: /*
2334: * Mount the root file system. If the operator didn't specify a
2335: * file system to use, try all possible file systems until one
2336: * succeeds.
2337: */
2338: int
1.247 thorpej 2339: vfs_mountroot(void)
1.58 thorpej 2340: {
1.79 thorpej 2341: struct vfsops *v;
1.239 mycroft 2342: int error = ENODEV;
1.58 thorpej 2343:
2344: if (root_device == NULL)
2345: panic("vfs_mountroot: root device unknown");
2346:
1.264 thorpej 2347: switch (device_class(root_device)) {
1.58 thorpej 2348: case DV_IFNET:
2349: if (rootdev != NODEV)
1.173 thorpej 2350: panic("vfs_mountroot: rootdev set for DV_IFNET "
1.365 christos 2351: "(0x%llx -> %llu,%llu)",
2352: (unsigned long long)rootdev,
2353: (unsigned long long)major(rootdev),
2354: (unsigned long long)minor(rootdev));
1.58 thorpej 2355: break;
2356:
2357: case DV_DISK:
2358: if (rootdev == NODEV)
2359: panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239 mycroft 2360: if (bdevvp(rootdev, &rootvp))
2361: panic("vfs_mountroot: can't get vnode for rootdev");
1.306 pooka 2362: error = VOP_OPEN(rootvp, FREAD, FSCRED);
1.239 mycroft 2363: if (error) {
2364: printf("vfs_mountroot: can't open root device\n");
2365: return (error);
2366: }
1.58 thorpej 2367: break;
2368:
2369: default:
2370: printf("%s: inappropriate for root file system\n",
1.336 cegger 2371: device_xname(root_device));
1.58 thorpej 2372: return (ENODEV);
2373: }
2374:
2375: /*
1.362 pgoyette 2376: * If user specified a root fs type, use it. Make sure the
2377: * specified type exists and has a mount_root()
1.58 thorpej 2378: */
1.362 pgoyette 2379: if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
2380: v = vfs_getopsbyname(rootfstype);
2381: error = EFTYPE;
2382: if (v != NULL) {
2383: if (v->vfs_mountroot != NULL) {
2384: error = (v->vfs_mountroot)();
2385: }
2386: v->vfs_refcount--;
2387: }
1.239 mycroft 2388: goto done;
2389: }
1.58 thorpej 2390:
2391: /*
2392: * Try each file system currently configured into the kernel.
2393: */
1.302 ad 2394: mutex_enter(&vfs_list_lock);
1.220 lukem 2395: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2396: if (v->vfs_mountroot == NULL)
1.58 thorpej 2397: continue;
2398: #ifdef DEBUG
1.197 thorpej 2399: aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 2400: #endif
1.302 ad 2401: v->vfs_refcount++;
2402: mutex_exit(&vfs_list_lock);
1.239 mycroft 2403: error = (*v->vfs_mountroot)();
1.302 ad 2404: mutex_enter(&vfs_list_lock);
2405: v->vfs_refcount--;
1.239 mycroft 2406: if (!error) {
1.197 thorpej 2407: aprint_normal("root file system type: %s\n",
2408: v->vfs_name);
1.79 thorpej 2409: break;
1.58 thorpej 2410: }
2411: }
1.302 ad 2412: mutex_exit(&vfs_list_lock);
1.58 thorpej 2413:
1.79 thorpej 2414: if (v == NULL) {
1.336 cegger 2415: printf("no file system for %s", device_xname(root_device));
1.264 thorpej 2416: if (device_class(root_device) == DV_DISK)
1.365 christos 2417: printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1.79 thorpej 2418: printf("\n");
1.239 mycroft 2419: error = EFTYPE;
1.79 thorpej 2420: }
1.239 mycroft 2421:
2422: done:
1.264 thorpej 2423: if (error && device_class(root_device) == DV_DISK) {
1.306 pooka 2424: VOP_CLOSE(rootvp, FREAD, FSCRED);
1.239 mycroft 2425: vrele(rootvp);
2426: }
2427: return (error);
1.58 thorpej 2428: }
1.326 ad 2429:
2430: /*
1.353 pooka 2431: * Get a new unique fsid
1.326 ad 2432: */
1.353 pooka 2433: void
2434: vfs_getnewfsid(struct mount *mp)
1.326 ad 2435: {
1.353 pooka 2436: static u_short xxxfs_mntid;
2437: fsid_t tfsid;
2438: int mtype;
1.326 ad 2439:
1.353 pooka 2440: mutex_enter(&mntid_lock);
2441: mtype = makefstype(mp->mnt_op->vfs_name);
2442: mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
2443: mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
2444: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2445: if (xxxfs_mntid == 0)
2446: ++xxxfs_mntid;
2447: tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
2448: tfsid.__fsid_val[1] = mtype;
2449: if (!CIRCLEQ_EMPTY(&mountlist)) {
2450: while (vfs_getvfs(&tfsid)) {
2451: tfsid.__fsid_val[0]++;
2452: xxxfs_mntid++;
2453: }
2454: }
2455: mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
2456: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2457: mutex_exit(&mntid_lock);
2458: }
1.326 ad 2459:
1.353 pooka 2460: /*
2461: * Make a 'unique' number from a mount type name.
2462: */
2463: long
2464: makefstype(const char *type)
2465: {
2466: long rv;
1.326 ad 2467:
1.353 pooka 2468: for (rv = 0; *type; type++) {
2469: rv <<= 2;
2470: rv ^= *type;
2471: }
2472: return rv;
2473: }
2474:
2475: /*
2476: * Set vnode attributes to VNOVAL
2477: */
2478: void
2479: vattr_null(struct vattr *vap)
2480: {
2481:
2482: vap->va_type = VNON;
2483:
2484: /*
2485: * Assign individually so that it is safe even if size and
2486: * sign of each member are varied.
2487: */
2488: vap->va_mode = VNOVAL;
2489: vap->va_nlink = VNOVAL;
2490: vap->va_uid = VNOVAL;
2491: vap->va_gid = VNOVAL;
2492: vap->va_fsid = VNOVAL;
2493: vap->va_fileid = VNOVAL;
2494: vap->va_size = VNOVAL;
2495: vap->va_blocksize = VNOVAL;
2496: vap->va_atime.tv_sec =
2497: vap->va_mtime.tv_sec =
2498: vap->va_ctime.tv_sec =
2499: vap->va_birthtime.tv_sec = VNOVAL;
2500: vap->va_atime.tv_nsec =
2501: vap->va_mtime.tv_nsec =
2502: vap->va_ctime.tv_nsec =
2503: vap->va_birthtime.tv_nsec = VNOVAL;
2504: vap->va_gen = VNOVAL;
2505: vap->va_flags = VNOVAL;
2506: vap->va_rdev = VNOVAL;
2507: vap->va_bytes = VNOVAL;
2508: vap->va_vaflags = 0;
2509: }
2510:
2511: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
2512: #define ARRAY_PRINT(idx, arr) \
1.370 yamt 2513: ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1.353 pooka 2514:
2515: const char * const vnode_tags[] = { VNODE_TAGS };
2516: const char * const vnode_types[] = { VNODE_TYPES };
2517: const char vnode_flagbits[] = VNODE_FLAGBITS;
2518:
2519: /*
2520: * Print out a description of a vnode.
2521: */
2522: void
2523: vprint(const char *label, struct vnode *vp)
2524: {
2525: struct vnlock *vl;
2526: char bf[96];
2527: int flag;
2528:
2529: vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
2530: flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
1.361 christos 2531: snprintb(bf, sizeof(bf), vnode_flagbits, flag);
1.353 pooka 2532:
2533: if (label != NULL)
2534: printf("%s: ", label);
2535: printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
2536: "usecount %d, writecount %d, holdcount %d\n"
2537: "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
2538: vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
2539: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
2540: vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
2541: vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
2542: if (vp->v_data != NULL) {
2543: printf("\t");
2544: VOP_PRINT(vp);
2545: }
2546: }
2547:
2548: #ifdef DEBUG
2549: /*
2550: * List all of the locked vnodes in the system.
2551: * Called when debugging the kernel.
2552: */
2553: void
2554: printlockedvnodes(void)
2555: {
2556: struct mount *mp, *nmp;
2557: struct vnode *vp;
2558:
2559: printf("Locked vnodes\n");
2560: mutex_enter(&mountlist_lock);
2561: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2562: mp = nmp) {
2563: if (vfs_busy(mp, &nmp)) {
2564: continue;
2565: }
2566: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2567: if (VOP_ISLOCKED(vp))
2568: vprint(NULL, vp);
2569: }
2570: mutex_enter(&mountlist_lock);
2571: vfs_unbusy(mp, false, &nmp);
2572: }
2573: mutex_exit(&mountlist_lock);
2574: }
2575: #endif
2576:
2577: /*
2578: * Do the usual access checking.
2579: * file_mode, uid and gid are from the vnode in question,
2580: * while acc_mode and cred are from the VOP_ACCESS parameter list
2581: */
2582: int
2583: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
2584: mode_t acc_mode, kauth_cred_t cred)
2585: {
2586: mode_t mask;
2587: int error, ismember;
2588:
2589: /*
2590: * Super-user always gets read/write access, but execute access depends
2591: * on at least one execute bit being set.
2592: */
2593: if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) {
2594: if ((acc_mode & VEXEC) && type != VDIR &&
2595: (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2596: return (EACCES);
2597: return (0);
2598: }
2599:
2600: mask = 0;
2601:
2602: /* Otherwise, check the owner. */
2603: if (kauth_cred_geteuid(cred) == uid) {
2604: if (acc_mode & VEXEC)
2605: mask |= S_IXUSR;
2606: if (acc_mode & VREAD)
2607: mask |= S_IRUSR;
2608: if (acc_mode & VWRITE)
2609: mask |= S_IWUSR;
2610: return ((file_mode & mask) == mask ? 0 : EACCES);
2611: }
2612:
2613: /* Otherwise, check the groups. */
2614: error = kauth_cred_ismember_gid(cred, gid, &ismember);
2615: if (error)
2616: return (error);
2617: if (kauth_cred_getegid(cred) == gid || ismember) {
2618: if (acc_mode & VEXEC)
2619: mask |= S_IXGRP;
2620: if (acc_mode & VREAD)
2621: mask |= S_IRGRP;
2622: if (acc_mode & VWRITE)
2623: mask |= S_IWGRP;
2624: return ((file_mode & mask) == mask ? 0 : EACCES);
2625: }
2626:
2627: /* Otherwise, check everyone else. */
2628: if (acc_mode & VEXEC)
2629: mask |= S_IXOTH;
2630: if (acc_mode & VREAD)
2631: mask |= S_IROTH;
2632: if (acc_mode & VWRITE)
2633: mask |= S_IWOTH;
2634: return ((file_mode & mask) == mask ? 0 : EACCES);
2635: }
2636:
2637: /*
2638: * Given a file system name, look up the vfsops for that
2639: * file system, or return NULL if file system isn't present
2640: * in the kernel.
2641: */
2642: struct vfsops *
2643: vfs_getopsbyname(const char *name)
2644: {
2645: struct vfsops *v;
2646:
2647: mutex_enter(&vfs_list_lock);
2648: LIST_FOREACH(v, &vfs_list, vfs_list) {
2649: if (strcmp(v->vfs_name, name) == 0)
2650: break;
2651: }
2652: if (v != NULL)
2653: v->vfs_refcount++;
2654: mutex_exit(&vfs_list_lock);
2655:
2656: return (v);
2657: }
2658:
2659: void
2660: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2661: {
2662: const struct statvfs *mbp;
2663:
2664: if (sbp == (mbp = &mp->mnt_stat))
2665: return;
2666:
2667: (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2668: sbp->f_fsid = mbp->f_fsid;
2669: sbp->f_owner = mbp->f_owner;
2670: sbp->f_flag = mbp->f_flag;
2671: sbp->f_syncwrites = mbp->f_syncwrites;
2672: sbp->f_asyncwrites = mbp->f_asyncwrites;
2673: sbp->f_syncreads = mbp->f_syncreads;
2674: sbp->f_asyncreads = mbp->f_asyncreads;
2675: (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2676: (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2677: sizeof(sbp->f_fstypename));
2678: (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2679: sizeof(sbp->f_mntonname));
2680: (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2681: sizeof(sbp->f_mntfromname));
2682: sbp->f_namemax = mbp->f_namemax;
2683: }
2684:
2685: int
2686: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2687: const char *vfsname, struct mount *mp, struct lwp *l)
2688: {
2689: int error;
2690: size_t size;
2691: struct statvfs *sfs = &mp->mnt_stat;
2692: int (*fun)(const void *, void *, size_t, size_t *);
2693:
2694: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
2695: sizeof(mp->mnt_stat.f_fstypename));
2696:
2697: if (onp) {
2698: struct cwdinfo *cwdi = l->l_proc->p_cwdi;
2699: fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2700: if (cwdi->cwdi_rdir != NULL) {
2701: size_t len;
2702: char *bp;
2703: char *path = PNBUF_GET();
2704:
2705: bp = path + MAXPATHLEN;
2706: *--bp = '\0';
2707: rw_enter(&cwdi->cwdi_lock, RW_READER);
2708: error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
2709: path, MAXPATHLEN / 2, 0, l);
2710: rw_exit(&cwdi->cwdi_lock);
2711: if (error) {
2712: PNBUF_PUT(path);
2713: return error;
2714: }
2715:
2716: len = strlen(bp);
2717: if (len > sizeof(sfs->f_mntonname) - 1)
2718: len = sizeof(sfs->f_mntonname) - 1;
2719: (void)strncpy(sfs->f_mntonname, bp, len);
2720: PNBUF_PUT(path);
2721:
2722: if (len < sizeof(sfs->f_mntonname) - 1) {
2723: error = (*fun)(onp, &sfs->f_mntonname[len],
2724: sizeof(sfs->f_mntonname) - len - 1, &size);
2725: if (error)
2726: return error;
2727: size += len;
2728: } else {
2729: size = len;
2730: }
2731: } else {
2732: error = (*fun)(onp, &sfs->f_mntonname,
2733: sizeof(sfs->f_mntonname) - 1, &size);
2734: if (error)
2735: return error;
2736: }
2737: (void)memset(sfs->f_mntonname + size, 0,
2738: sizeof(sfs->f_mntonname) - size);
2739: }
2740:
2741: if (fromp) {
2742: fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2743: error = (*fun)(fromp, sfs->f_mntfromname,
2744: sizeof(sfs->f_mntfromname) - 1, &size);
2745: if (error)
2746: return error;
2747: (void)memset(sfs->f_mntfromname + size, 0,
2748: sizeof(sfs->f_mntfromname) - size);
2749: }
2750: return 0;
2751: }
2752:
2753: void
2754: vfs_timestamp(struct timespec *ts)
2755: {
2756:
2757: nanotime(ts);
2758: }
2759:
2760: time_t rootfstime; /* recorded root fs time, if known */
2761: void
2762: setrootfstime(time_t t)
2763: {
2764: rootfstime = t;
2765: }
2766:
2767: /*
2768: * Sham lock manager for vnodes. This is a temporary measure.
2769: */
2770: int
2771: vlockmgr(struct vnlock *vl, int flags)
2772: {
2773:
2774: KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
2775:
2776: switch (flags & LK_TYPE_MASK) {
2777: case LK_SHARED:
2778: if (rw_tryenter(&vl->vl_lock, RW_READER)) {
2779: return 0;
2780: }
2781: if ((flags & LK_NOWAIT) != 0) {
2782: return EBUSY;
2783: }
2784: rw_enter(&vl->vl_lock, RW_READER);
2785: return 0;
2786:
2787: case LK_EXCLUSIVE:
2788: if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
2789: return 0;
2790: }
2791: if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
2792: rw_write_held(&vl->vl_lock)) {
2793: vl->vl_recursecnt++;
2794: return 0;
2795: }
2796: if ((flags & LK_NOWAIT) != 0) {
2797: return EBUSY;
2798: }
2799: rw_enter(&vl->vl_lock, RW_WRITER);
2800: return 0;
1.326 ad 2801:
2802: case LK_RELEASE:
2803: if (vl->vl_recursecnt != 0) {
2804: KASSERT(rw_write_held(&vl->vl_lock));
2805: vl->vl_recursecnt--;
2806: return 0;
2807: }
2808: rw_exit(&vl->vl_lock);
2809: return 0;
2810:
2811: default:
2812: panic("vlockmgr: flags %x", flags);
2813: }
2814: }
2815:
2816: int
2817: vlockstatus(struct vnlock *vl)
2818: {
2819:
2820: if (rw_write_held(&vl->vl_lock)) {
2821: return LK_EXCLUSIVE;
2822: }
2823: if (rw_read_held(&vl->vl_lock)) {
2824: return LK_SHARED;
2825: }
2826: return 0;
2827: }
1.353 pooka 2828:
2829: /*
2830: * mount_specific_key_create --
2831: * Create a key for subsystem mount-specific data.
2832: */
2833: int
2834: mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
2835: {
2836:
2837: return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
2838: }
2839:
2840: /*
2841: * mount_specific_key_delete --
2842: * Delete a key for subsystem mount-specific data.
2843: */
2844: void
2845: mount_specific_key_delete(specificdata_key_t key)
2846: {
2847:
2848: specificdata_key_delete(mount_specificdata_domain, key);
2849: }
2850:
2851: /*
2852: * mount_initspecific --
2853: * Initialize a mount's specificdata container.
2854: */
2855: void
2856: mount_initspecific(struct mount *mp)
2857: {
2858: int error;
2859:
2860: error = specificdata_init(mount_specificdata_domain,
2861: &mp->mnt_specdataref);
2862: KASSERT(error == 0);
2863: }
2864:
2865: /*
2866: * mount_finispecific --
2867: * Finalize a mount's specificdata container.
2868: */
2869: void
2870: mount_finispecific(struct mount *mp)
2871: {
2872:
2873: specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
2874: }
2875:
2876: /*
2877: * mount_getspecific --
2878: * Return mount-specific data corresponding to the specified key.
2879: */
2880: void *
2881: mount_getspecific(struct mount *mp, specificdata_key_t key)
2882: {
2883:
2884: return (specificdata_getspecific(mount_specificdata_domain,
2885: &mp->mnt_specdataref, key));
2886: }
2887:
2888: /*
2889: * mount_setspecific --
2890: * Set mount-specific data corresponding to the specified key.
2891: */
2892: void
2893: mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
2894: {
2895:
2896: specificdata_setspecific(mount_specificdata_domain,
2897: &mp->mnt_specdataref, key, data);
2898: }
2899:
2900: int
2901: VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
2902: {
2903: int error;
2904:
2905: KERNEL_LOCK(1, NULL);
2906: error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
2907: KERNEL_UNLOCK_ONE(NULL);
2908:
2909: return error;
2910: }
2911:
2912: int
2913: VFS_START(struct mount *mp, int a)
2914: {
2915: int error;
2916:
2917: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2918: KERNEL_LOCK(1, NULL);
2919: }
2920: error = (*(mp->mnt_op->vfs_start))(mp, a);
2921: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2922: KERNEL_UNLOCK_ONE(NULL);
2923: }
2924:
2925: return error;
2926: }
2927:
2928: int
2929: VFS_UNMOUNT(struct mount *mp, int a)
2930: {
2931: int error;
2932:
2933: KERNEL_LOCK(1, NULL);
2934: error = (*(mp->mnt_op->vfs_unmount))(mp, a);
2935: KERNEL_UNLOCK_ONE(NULL);
2936:
2937: return error;
2938: }
2939:
2940: int
2941: VFS_ROOT(struct mount *mp, struct vnode **a)
2942: {
2943: int error;
2944:
2945: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2946: KERNEL_LOCK(1, NULL);
2947: }
2948: error = (*(mp->mnt_op->vfs_root))(mp, a);
2949: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2950: KERNEL_UNLOCK_ONE(NULL);
2951: }
2952:
2953: return error;
2954: }
2955:
2956: int
2957: VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
2958: {
2959: int error;
2960:
2961: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2962: KERNEL_LOCK(1, NULL);
2963: }
2964: error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
2965: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2966: KERNEL_UNLOCK_ONE(NULL);
2967: }
2968:
2969: return error;
2970: }
2971:
2972: int
2973: VFS_STATVFS(struct mount *mp, struct statvfs *a)
2974: {
2975: int error;
2976:
2977: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2978: KERNEL_LOCK(1, NULL);
2979: }
2980: error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
2981: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2982: KERNEL_UNLOCK_ONE(NULL);
2983: }
2984:
2985: return error;
2986: }
2987:
2988: int
2989: VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
2990: {
2991: int error;
2992:
2993: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2994: KERNEL_LOCK(1, NULL);
2995: }
2996: error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
2997: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2998: KERNEL_UNLOCK_ONE(NULL);
2999: }
3000:
3001: return error;
3002: }
3003:
3004: int
3005: VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
3006: {
3007: int error;
3008:
3009: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3010: KERNEL_LOCK(1, NULL);
3011: }
3012: error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
3013: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3014: KERNEL_UNLOCK_ONE(NULL);
3015: }
3016:
3017: return error;
3018: }
3019:
3020: int
3021: VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
3022: {
3023: int error;
3024:
3025: if ((vp->v_vflag & VV_MPSAFE) == 0) {
3026: KERNEL_LOCK(1, NULL);
3027: }
3028: error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
3029: if ((vp->v_vflag & VV_MPSAFE) == 0) {
3030: KERNEL_UNLOCK_ONE(NULL);
3031: }
3032:
3033: return error;
3034: }
3035:
3036: int
3037: VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
3038: {
3039: int error;
3040:
3041: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3042: KERNEL_LOCK(1, NULL);
3043: }
3044: error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
3045: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3046: KERNEL_UNLOCK_ONE(NULL);
3047: }
3048:
3049: return error;
3050: }
3051:
3052: int
3053: VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
3054: {
3055: int error;
3056:
3057: KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
3058: error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
3059: KERNEL_UNLOCK_ONE(NULL); /* XXX */
3060:
3061: return error;
3062: }
3063:
3064: int
3065: VFS_SUSPENDCTL(struct mount *mp, int a)
3066: {
3067: int error;
3068:
3069: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3070: KERNEL_LOCK(1, NULL);
3071: }
3072: error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
3073: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3074: KERNEL_UNLOCK_ONE(NULL);
3075: }
3076:
3077: return error;
3078: }
3079:
1.378 ! pooka 3080: #if defined(DDB) || defined(DEBUGPRINT)
1.353 pooka 3081: static const char buf_flagbits[] = BUF_FLAGBITS;
3082:
3083: void
3084: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
3085: {
3086: char bf[1024];
3087:
3088: (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
3089: PRIx64 " dev 0x%x\n",
3090: bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
3091:
1.361 christos 3092: snprintb(bf, sizeof(bf),
3093: buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
1.353 pooka 3094: (*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
3095:
3096: (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3097: bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.369 ad 3098: (*pr)(" data %p saveaddr %p\n",
3099: bp->b_data, bp->b_saveaddr);
1.353 pooka 3100: (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
3101: }
3102:
3103:
3104: void
3105: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
3106: {
3107: char bf[256];
3108:
3109: uvm_object_printit(&vp->v_uobj, full, pr);
1.361 christos 3110: snprintb(bf, sizeof(bf),
3111: vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
1.353 pooka 3112: (*pr)("\nVNODE flags %s\n", bf);
3113: (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
3114: vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
3115:
3116: (*pr)("data %p writecount %ld holdcnt %ld\n",
3117: vp->v_data, vp->v_writecount, vp->v_holdcnt);
3118:
3119: (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
3120: ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
3121: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
3122: vp->v_mount, vp->v_mountedhere);
3123:
3124: (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
3125:
3126: if (full) {
3127: struct buf *bp;
3128:
3129: (*pr)("clean bufs:\n");
3130: LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3131: (*pr)(" bp %p\n", bp);
3132: vfs_buf_print(bp, full, pr);
3133: }
3134:
3135: (*pr)("dirty bufs:\n");
3136: LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3137: (*pr)(" bp %p\n", bp);
3138: vfs_buf_print(bp, full, pr);
3139: }
3140: }
3141: }
3142:
3143: void
3144: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
3145: {
3146: char sbuf[256];
3147:
3148: (*pr)("vnodecovered = %p syncer = %p data = %p\n",
3149: mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3150:
3151: (*pr)("fs_bshift %d dev_bshift = %d\n",
3152: mp->mnt_fs_bshift,mp->mnt_dev_bshift);
3153:
1.361 christos 3154: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1.353 pooka 3155: (*pr)("flag = %s\n", sbuf);
3156:
1.361 christos 3157: snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1.353 pooka 3158: (*pr)("iflag = %s\n", sbuf);
3159:
3160: (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
3161: &mp->mnt_unmounting, &mp->mnt_updating);
3162:
3163: (*pr)("statvfs cache:\n");
3164: (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3165: (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3166: (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3167:
3168: (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
3169: (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
3170: (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
3171: (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
3172:
3173: (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
3174: (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
3175: (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
3176: (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
3177:
3178: (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3179: mp->mnt_stat.f_fsidx.__fsid_val[0],
3180: mp->mnt_stat.f_fsidx.__fsid_val[1]);
3181:
3182: (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3183: (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3184:
1.361 christos 3185: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
3186:
1.353 pooka 3187: (*pr)("\tflag = %s\n",sbuf);
3188: (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3189: (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3190: (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
3191: (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3192: (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3193: (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3194: (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3195:
3196: {
3197: int cnt = 0;
3198: struct vnode *vp;
3199: (*pr)("locked vnodes =");
3200: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3201: if (VOP_ISLOCKED(vp)) {
3202: if ((++cnt % 6) == 0) {
3203: (*pr)(" %p,\n\t", vp);
3204: } else {
3205: (*pr)(" %p,", vp);
3206: }
3207: }
3208: }
3209: (*pr)("\n");
3210: }
3211:
3212: if (full) {
3213: int cnt = 0;
3214: struct vnode *vp;
3215: (*pr)("all vnodes =");
3216: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3217: if (!TAILQ_NEXT(vp, v_mntvnodes)) {
3218: (*pr)(" %p", vp);
3219: } else if ((++cnt % 6) == 0) {
3220: (*pr)(" %p,\n\t", vp);
3221: } else {
3222: (*pr)(" %p,", vp);
3223: }
3224: }
3225: (*pr)("\n", vp);
3226: }
3227: }
1.378 ! pooka 3228: #endif /* DDB || DEBUGPRINT */
1.372 elad 3229:
CVSweb <webmaster@jp.NetBSD.org>