Annotation of src/sys/kern/vfs_subr.c, Revision 1.336
1.336 ! cegger 1: /* $NetBSD: vfs_subr.c,v 1.335 2008/02/24 23:16:24 dholland Exp $ */
1.74 thorpej 2:
3: /*-
1.315 ad 4: * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.302 ad 9: * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
1.74 thorpej 10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
1.32 cgd 39:
1.29 cgd 40: /*
1.30 mycroft 41: * Copyright (c) 1989, 1993
42: * The Regents of the University of California. All rights reserved.
1.29 cgd 43: * (c) UNIX System Laboratories, Inc.
44: * All or some portions of this file are derived from material licensed
45: * to the University of California by American Telephone and Telegraph
46: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47: * the permission of UNIX System Laboratories, Inc.
48: *
49: * Redistribution and use in source and binary forms, with or without
50: * modification, are permitted provided that the following conditions
51: * are met:
52: * 1. Redistributions of source code must retain the above copyright
53: * notice, this list of conditions and the following disclaimer.
54: * 2. Redistributions in binary form must reproduce the above copyright
55: * notice, this list of conditions and the following disclaimer in the
56: * documentation and/or other materials provided with the distribution.
1.204 agc 57: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 58: * may be used to endorse or promote products derived from this software
59: * without specific prior written permission.
60: *
61: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71: * SUCH DAMAGE.
72: *
1.32 cgd 73: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 74: */
75:
76: /*
1.296 pooka 77: * External virtual filesystem routines.
78: *
79: * This file contains vfs subroutines which are heavily dependant on
80: * the kernel and are not suitable for standalone use. Examples include
81: * routines involved vnode and mountpoint management.
1.29 cgd 82: */
1.162 lukem 83:
84: #include <sys/cdefs.h>
1.336 ! cegger 85: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.335 2008/02/24 23:16:24 dholland Exp $");
1.78 mrg 86:
1.125 chs 87: #include "opt_ddb.h"
1.95 thorpej 88: #include "opt_compat_netbsd.h"
1.97 christos 89: #include "opt_compat_43.h"
1.29 cgd 90:
91: #include <sys/param.h>
1.30 mycroft 92: #include <sys/systm.h>
1.29 cgd 93: #include <sys/proc.h>
1.138 bouyer 94: #include <sys/kernel.h>
1.29 cgd 95: #include <sys/mount.h>
1.46 mycroft 96: #include <sys/fcntl.h>
1.29 cgd 97: #include <sys/vnode.h>
1.30 mycroft 98: #include <sys/stat.h>
1.29 cgd 99: #include <sys/namei.h>
100: #include <sys/ucred.h>
101: #include <sys/buf.h>
102: #include <sys/errno.h>
103: #include <sys/malloc.h>
1.51 christos 104: #include <sys/syscallargs.h>
1.58 thorpej 105: #include <sys/device.h>
1.192 christos 106: #include <sys/filedesc.h>
1.266 elad 107: #include <sys/kauth.h>
1.307 ad 108: #include <sys/atomic.h>
1.309 ad 109: #include <sys/kthread.h>
1.50 christos 110:
1.30 mycroft 111: #include <miscfs/specfs/specdev.h>
1.113 fvdl 112: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 113:
1.125 chs 114: #include <uvm/uvm.h>
1.255 yamt 115: #include <uvm/uvm_readahead.h>
1.125 chs 116: #include <uvm/uvm_ddb.h>
1.129 mrg 117:
118: #include <sys/sysctl.h>
1.77 mrg 119:
1.117 fvdl 120: extern int dovfsusermount; /* 1 => permit any user to mount filesystems */
1.263 chs 121: extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */
1.117 fvdl 122:
1.309 ad 123: static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
124: static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
125: static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
126:
127: static int vrele_pending;
128: static kmutex_t vrele_lock;
129: static kcondvar_t vrele_cv;
130: static lwp_t *vrele_lwp;
1.113 fvdl 131:
1.309 ad 132: static pool_cache_t vnode_cache;
1.186 thorpej 133:
134: MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
1.93 thorpej 135:
1.89 kleink 136: /*
137: * Local declarations.
138: */
1.276 hannken 139:
1.309 ad 140: static void vrele_thread(void *);
141: static void insmntque(vnode_t *, struct mount *);
142: static int getdevvp(dev_t, vnode_t **, enum vtype);
143: static vnode_t *getcleanvnode(void);;
144: void vpanic(vnode_t *, const char *);
145:
146: #ifdef DIAGNOSTIC
147: void
148: vpanic(vnode_t *vp, const char *msg)
149: {
150:
151: vprint(NULL, vp);
152: panic("%s\n", msg);
153: }
154: #else
155: #define vpanic(vp, msg) /* nothing */
156: #endif
157:
158: void
159: vn_init1(void)
160: {
161:
162: vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
163: NULL, IPL_NONE, NULL, NULL, NULL);
164: KASSERT(vnode_cache != NULL);
165:
166: /* Create deferred release thread. */
167: mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
168: cv_init(&vrele_cv, "vrele");
169: if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
170: NULL, &vrele_lwp, "vrele"))
171: panic("fork vrele");
172: }
1.51 christos 173:
1.202 yamt 174: int
1.256 christos 175: vfs_drainvnodes(long target, struct lwp *l)
1.202 yamt 176: {
177:
178: while (numvnodes > target) {
1.309 ad 179: vnode_t *vp;
1.202 yamt 180:
1.309 ad 181: mutex_enter(&vnode_free_list_lock);
182: vp = getcleanvnode();
1.202 yamt 183: if (vp == NULL)
184: return EBUSY; /* give up */
1.309 ad 185: ungetnewvnode(vp);
1.202 yamt 186: }
187:
188: return 0;
189: }
190:
191: /*
192: * grab a vnode from freelist and clean it.
193: */
1.309 ad 194: vnode_t *
195: getcleanvnode(void)
1.202 yamt 196: {
1.309 ad 197: vnode_t *vp;
198: vnodelst_t *listhd;
1.202 yamt 199:
1.309 ad 200: KASSERT(mutex_owned(&vnode_free_list_lock));
1.229 yamt 201:
1.309 ad 202: retry:
1.229 yamt 203: listhd = &vnode_free_list;
204: try_nextlist:
205: TAILQ_FOREACH(vp, listhd, v_freelist) {
1.309 ad 206: /*
207: * It's safe to test v_usecount and v_iflag
208: * without holding the interlock here, since
209: * these vnodes should never appear on the
210: * lists.
211: */
212: if (vp->v_usecount != 0) {
213: vpanic(vp, "free vnode isn't");
214: }
215: if ((vp->v_iflag & VI_CLEAN) != 0) {
216: vpanic(vp, "clean vnode on freelist");
217: }
218: if (vp->v_freelisthd != listhd) {
219: printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
220: vpanic(vp, "list head mismatch");
221: }
222: if (!mutex_tryenter(&vp->v_interlock))
1.208 hannken 223: continue;
1.227 yamt 224: /*
1.309 ad 225: * Our lwp might hold the underlying vnode
226: * locked, so don't try to reclaim a VI_LAYER
227: * node if it's locked.
1.227 yamt 228: */
1.302 ad 229: if ((vp->v_iflag & VI_XLOCK) == 0 &&
230: ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285 hannken 231: break;
1.202 yamt 232: }
1.309 ad 233: mutex_exit(&vp->v_interlock);
1.202 yamt 234: }
235:
1.309 ad 236: if (vp == NULL) {
1.229 yamt 237: if (listhd == &vnode_free_list) {
238: listhd = &vnode_hold_list;
239: goto try_nextlist;
240: }
1.309 ad 241: mutex_exit(&vnode_free_list_lock);
242: return NULL;
1.202 yamt 243: }
244:
1.309 ad 245: /* Remove it from the freelist. */
1.202 yamt 246: TAILQ_REMOVE(listhd, vp, v_freelist);
1.309 ad 247: vp->v_freelisthd = NULL;
248: mutex_exit(&vnode_free_list_lock);
249:
250: /*
251: * The vnode is still associated with a file system, so we must
252: * clean it out before reusing it. We need to add a reference
253: * before doing this. If the vnode gains another reference while
254: * being cleaned out then we lose - retry.
255: */
256: vp->v_usecount++;
257: vclean(vp, DOCLOSE);
258: if (vp->v_usecount == 1) {
259: /* We're about to dirty it. */
260: vp->v_iflag &= ~VI_CLEAN;
261: mutex_exit(&vp->v_interlock);
1.318 ad 262: if (vp->v_type == VBLK || vp->v_type == VCHR) {
263: spec_node_destroy(vp);
264: }
265: vp->v_type = VNON;
1.309 ad 266: } else {
267: /*
268: * Don't return to freelist - the holder of the last
269: * reference will destroy it.
270: */
1.315 ad 271: KASSERT(vp->v_usecount > 1);
1.309 ad 272: vp->v_usecount--;
273: mutex_exit(&vp->v_interlock);
274: mutex_enter(&vnode_free_list_lock);
275: goto retry;
276: }
277:
278: if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
279: !TAILQ_EMPTY(&vp->v_uobj.memq)) {
280: vpanic(vp, "cleaned vnode isn't");
281: }
282: if (vp->v_numoutput != 0) {
283: vpanic(vp, "clean vnode has pending I/O's");
284: }
285: if ((vp->v_iflag & VI_ONWORKLST) != 0) {
286: vpanic(vp, "clean vnode on syncer list");
287: }
1.202 yamt 288:
289: return vp;
290: }
291:
1.29 cgd 292: /*
1.327 ad 293: * Mark a mount point as busy, and gain a new reference to it. Used to
294: * synchronize access and to delay unmounting.
295: *
296: * => Interlock is not released on failure.
297: * => If no interlock, the caller is expected to already hold a reference
298: * on the mount.
299: * => If interlocked, the interlock must prevent the last reference to
300: * the mount from disappearing.
301: */
302: int
303: vfs_busy(struct mount *mp, const krw_t op, kmutex_t *interlock)
304: {
305:
306: KASSERT(mp->mnt_refcnt > 0);
307:
308: atomic_inc_uint(&mp->mnt_refcnt);
309: if (interlock != NULL) {
310: mutex_exit(interlock);
311: }
312: if (mp->mnt_writer == curlwp) {
313: mp->mnt_recursecnt++;
314: } else {
315: rw_enter(&mp->mnt_lock, op);
316: if (op == RW_WRITER) {
317: KASSERT(mp->mnt_writer == NULL);
318: mp->mnt_writer = curlwp;
319: }
320: }
321: if ((mp->mnt_iflag & IMNT_GONE) != 0) {
322: vfs_unbusy(mp, false);
323: if (interlock != NULL) {
324: mutex_enter(interlock);
325: }
326: return ENOENT;
327: }
328:
329: return 0;
330: }
331:
332: /*
1.335 dholland 333: * As vfs_busy(), but return immediately if the mount cannot be
1.327 ad 334: * locked without waiting.
1.29 cgd 335: */
1.50 christos 336: int
1.327 ad 337: vfs_trybusy(struct mount *mp, krw_t op, kmutex_t *interlock)
1.29 cgd 338: {
339:
1.327 ad 340: KASSERT(mp->mnt_refcnt > 0);
1.217 junyoung 341:
1.327 ad 342: if (mp->mnt_writer == curlwp) {
343: mp->mnt_recursecnt++;
344: } else {
345: if (!rw_tryenter(&mp->mnt_lock, op)) {
346: return EBUSY;
347: }
348: if (op == RW_WRITER) {
349: KASSERT(mp->mnt_writer == NULL);
350: mp->mnt_writer = curlwp;
351: }
352: }
353: atomic_inc_uint(&mp->mnt_refcnt);
354: if ((mp->mnt_iflag & IMNT_GONE) != 0) {
355: vfs_unbusy(mp, false);
356: return ENOENT;
357: }
358: if (interlock != NULL) {
359: mutex_exit(interlock);
360: }
361: return 0;
1.29 cgd 362: }
363:
364: /*
1.327 ad 365: * Unlock a busy filesystem and drop reference to it. If 'keepref' is
366: * true, unlock but preserve the reference.
1.29 cgd 367: */
368: void
1.327 ad 369: vfs_unbusy(struct mount *mp, bool keepref)
1.29 cgd 370: {
371:
1.327 ad 372: KASSERT(mp->mnt_refcnt > 0);
373:
374: if (mp->mnt_writer == curlwp) {
375: KASSERT(rw_write_held(&mp->mnt_lock));
376: if (mp->mnt_recursecnt != 0) {
377: mp->mnt_recursecnt--;
378: } else {
379: mp->mnt_writer = NULL;
380: rw_exit(&mp->mnt_lock);
381: }
382: } else {
383: rw_exit(&mp->mnt_lock);
384: }
385: if (!keepref) {
386: vfs_destroy(mp);
387: }
1.29 cgd 388: }
389:
390: /*
1.80 fvdl 391: * Lookup a filesystem type, and if found allocate and initialize
392: * a mount structure for it.
393: *
394: * Devname is usually updated by mount(8) after booting.
1.29 cgd 395: */
1.50 christos 396: int
1.247 thorpej 397: vfs_rootmountalloc(const char *fstypename, const char *devname,
398: struct mount **mpp)
1.29 cgd 399: {
1.80 fvdl 400: struct vfsops *vfsp = NULL;
401: struct mount *mp;
1.29 cgd 402:
1.309 ad 403: mutex_enter(&vfs_list_lock);
1.152 jdolecek 404: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291 christos 405: if (!strncmp(vfsp->vfs_name, fstypename,
406: sizeof(mp->mnt_stat.f_fstypename)))
1.80 fvdl 407: break;
1.315 ad 408: if (vfsp == NULL) {
409: mutex_exit(&vfs_list_lock);
1.80 fvdl 410: return (ENODEV);
1.315 ad 411: }
1.309 ad 412: vfsp->vfs_refcount++;
413: mutex_exit(&vfs_list_lock);
414:
1.327 ad 415: mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
416: if (mp == NULL)
417: return ENOMEM;
418: mp->mnt_refcnt = 1;
419: rw_init(&mp->mnt_lock);
1.331 skrll 420: mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
1.327 ad 421: (void)vfs_busy(mp, RW_WRITER, NULL);
1.272 reinoud 422: TAILQ_INIT(&mp->mnt_vnodelist);
1.80 fvdl 423: mp->mnt_op = vfsp;
424: mp->mnt_flag = MNT_RDONLY;
1.309 ad 425: mp->mnt_vnodecovered = NULL;
1.291 christos 426: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
427: sizeof(mp->mnt_stat.f_fstypename));
1.80 fvdl 428: mp->mnt_stat.f_mntonname[0] = '/';
1.314 pooka 429: mp->mnt_stat.f_mntonname[1] = '\0';
1.291 christos 430: mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
431: '\0';
432: (void)copystr(devname, mp->mnt_stat.f_mntfromname,
433: sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.276 hannken 434: mount_initspecific(mp);
1.80 fvdl 435: *mpp = mp;
1.29 cgd 436: return (0);
437: }
438:
1.30 mycroft 439: /*
440: * Routines having to do with the management of the vnode table.
441: */
1.217 junyoung 442: extern int (**dead_vnodeop_p)(void *);
1.30 mycroft 443:
1.29 cgd 444: /*
445: * Return the next vnode from the free list.
446: */
1.50 christos 447: int
1.247 thorpej 448: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
1.309 ad 449: vnode_t **vpp)
1.29 cgd 450: {
1.142 chs 451: struct uvm_object *uobj;
1.113 fvdl 452: static int toggle;
1.309 ad 453: vnode_t *vp;
1.153 thorpej 454: int error = 0, tryalloc;
1.158 chs 455:
1.159 enami 456: try_again:
1.327 ad 457: if (mp != NULL) {
1.103 sommerfe 458: /*
1.327 ad 459: * Mark filesystem busy while we're creating a
460: * vnode. If unmount is in progress, this will
461: * wait; if the unmount succeeds (only if umount
462: * -f), this will return an error. If the
463: * unmount fails, we'll keep going afterwards.
1.103 sommerfe 464: */
1.327 ad 465: error = vfs_busy(mp, RW_READER, NULL);
466: if (error)
1.103 sommerfe 467: return error;
468: }
1.29 cgd 469:
1.113 fvdl 470: /*
471: * We must choose whether to allocate a new vnode or recycle an
472: * existing one. The criterion for allocating a new one is that
473: * the total number of vnodes is less than the number desired or
474: * there are no vnodes on either free list. Generally we only
475: * want to recycle vnodes that have no buffers associated with
476: * them, so we look first on the vnode_free_list. If it is empty,
477: * we next consider vnodes with referencing buffers on the
478: * vnode_hold_list. The toggle ensures that half the time we
479: * will use a buffer from the vnode_hold_list, and half the time
480: * we will allocate a new one unless the list has grown to twice
481: * the desired size. We are reticent to recycle vnodes from the
482: * vnode_hold_list because we will lose the identity of all its
483: * referencing buffers.
484: */
1.142 chs 485:
1.153 thorpej 486: vp = NULL;
487:
1.309 ad 488: mutex_enter(&vnode_free_list_lock);
1.153 thorpej 489:
1.113 fvdl 490: toggle ^= 1;
491: if (numvnodes > 2 * desiredvnodes)
492: toggle = 0;
493:
1.153 thorpej 494: tryalloc = numvnodes < desiredvnodes ||
1.159 enami 495: (TAILQ_FIRST(&vnode_free_list) == NULL &&
496: (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153 thorpej 497:
1.309 ad 498: if (tryalloc) {
1.206 yamt 499: numvnodes++;
1.309 ad 500: mutex_exit(&vnode_free_list_lock);
1.310 pooka 501: if ((vp = vnalloc(NULL)) == NULL) {
1.309 ad 502: mutex_enter(&vnode_free_list_lock);
503: numvnodes--;
504: } else
505: vp->v_usecount = 1;
506: }
507:
508: if (vp == NULL) {
509: vp = getcleanvnode();
510: if (vp == NULL) {
1.327 ad 511: if (mp != NULL) {
512: vfs_unbusy(mp, false);
513: }
1.153 thorpej 514: if (tryalloc) {
515: printf("WARNING: unable to allocate new "
516: "vnode, retrying...\n");
517: (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
518: goto try_again;
519: }
1.132 jdolecek 520: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 521: *vpp = 0;
522: return (ENFILE);
523: }
1.302 ad 524: vp->v_iflag = 0;
525: vp->v_vflag = 0;
526: vp->v_uflag = 0;
1.158 chs 527: vp->v_socket = NULL;
1.29 cgd 528: }
1.309 ad 529:
530: KASSERT(vp->v_usecount == 1);
531: KASSERT(vp->v_freelisthd == NULL);
532: KASSERT(LIST_EMPTY(&vp->v_nclist));
533: KASSERT(LIST_EMPTY(&vp->v_dnclist));
534:
1.29 cgd 535: vp->v_type = VNON;
1.104 wrstuden 536: vp->v_vnlock = &vp->v_lock;
1.29 cgd 537: vp->v_tag = tag;
538: vp->v_op = vops;
539: insmntque(vp, mp);
1.30 mycroft 540: *vpp = vp;
541: vp->v_data = 0;
1.142 chs 542:
543: /*
544: * initialize uvm_object within vnode.
545: */
546:
1.158 chs 547: uobj = &vp->v_uobj;
548: KASSERT(uobj->pgops == &uvm_vnodeops);
549: KASSERT(uobj->uo_npages == 0);
550: KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288 yamt 551: vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142 chs 552:
1.309 ad 553: if (mp != NULL) {
554: if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
555: vp->v_vflag |= VV_MPSAFE;
1.327 ad 556: vfs_unbusy(mp, true);
1.309 ad 557: }
558:
1.29 cgd 559: return (0);
1.130 fvdl 560: }
561:
562: /*
563: * This is really just the reverse of getnewvnode(). Needed for
564: * VFS_VGET functions who may need to push back a vnode in case
565: * of a locking race.
566: */
567: void
1.309 ad 568: ungetnewvnode(vnode_t *vp)
569: {
570:
571: KASSERT(vp->v_usecount == 1);
572: KASSERT(vp->v_data == NULL);
573: KASSERT(vp->v_freelisthd == NULL);
574:
575: mutex_enter(&vp->v_interlock);
576: vp->v_iflag |= VI_CLEAN;
1.324 pooka 577: vrelel(vp, 0);
1.309 ad 578: }
579:
580: /*
581: * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
582: * marker vnode and we are prepared to wait for the allocation.
583: */
584: vnode_t *
1.310 pooka 585: vnalloc(struct mount *mp)
1.130 fvdl 586: {
1.309 ad 587: vnode_t *vp;
588:
589: vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
590: if (vp == NULL) {
591: return NULL;
592: }
593:
594: memset(vp, 0, sizeof(*vp));
595: UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
596: cv_init(&vp->v_cv, "vnode");
597: /*
598: * done by memset() above.
599: * LIST_INIT(&vp->v_nclist);
600: * LIST_INIT(&vp->v_dnclist);
601: */
602:
603: if (mp != NULL) {
604: vp->v_mount = mp;
605: vp->v_type = VBAD;
606: vp->v_iflag = VI_MARKER;
607: } else {
1.326 ad 608: rw_init(&vp->v_lock.vl_lock);
1.309 ad 609: }
610:
611: return vp;
612: }
613:
614: /*
615: * Free an unused, unreferenced vnode.
616: */
617: void
1.310 pooka 618: vnfree(vnode_t *vp)
1.309 ad 619: {
620:
621: KASSERT(vp->v_usecount == 0);
622:
623: if ((vp->v_iflag & VI_MARKER) == 0) {
1.326 ad 624: rw_destroy(&vp->v_lock.vl_lock);
1.309 ad 625: mutex_enter(&vnode_free_list_lock);
626: numvnodes--;
627: mutex_exit(&vnode_free_list_lock);
628: }
629:
630: UVM_OBJ_DESTROY(&vp->v_uobj);
631: cv_destroy(&vp->v_cv);
632: pool_cache_put(vnode_cache, vp);
633: }
634:
635: /*
636: * Remove a vnode from its freelist.
637: */
638: static inline void
639: vremfree(vnode_t *vp)
640: {
641:
642: KASSERT(mutex_owned(&vp->v_interlock));
643: KASSERT(vp->v_usecount == 0);
1.130 fvdl 644:
1.217 junyoung 645: /*
1.309 ad 646: * Note that the reference count must not change until
647: * the vnode is removed.
1.130 fvdl 648: */
1.309 ad 649: mutex_enter(&vnode_free_list_lock);
650: if (vp->v_holdcnt > 0) {
651: KASSERT(vp->v_freelisthd == &vnode_hold_list);
652: } else {
653: KASSERT(vp->v_freelisthd == &vnode_free_list);
654: }
655: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
656: vp->v_freelisthd = NULL;
657: mutex_exit(&vnode_free_list_lock);
1.29 cgd 658: }
659:
660: /*
661: * Move a vnode from one mount queue to another.
662: */
1.260 yamt 663: static void
1.309 ad 664: insmntque(vnode_t *vp, struct mount *mp)
1.29 cgd 665: {
1.327 ad 666: struct mount *omp;
1.29 cgd 667:
1.103 sommerfe 668: #ifdef DIAGNOSTIC
669: if ((mp != NULL) &&
1.207 dbj 670: (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113 fvdl 671: !(mp->mnt_flag & MNT_SOFTDEP) &&
672: vp->v_tag != VT_VFS) {
1.103 sommerfe 673: panic("insmntque into dying filesystem");
674: }
675: #endif
1.217 junyoung 676:
1.309 ad 677: mutex_enter(&mntvnode_lock);
1.29 cgd 678: /*
679: * Delete from old mount point vnode list, if on one.
680: */
1.327 ad 681: if ((omp = vp->v_mount) != NULL)
1.272 reinoud 682: TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29 cgd 683: /*
1.327 ad 684: * Insert into list of vnodes for the new mount point, if
685: * available. The caller must take a reference on the mount
686: * structure and donate to the vnode.
1.29 cgd 687: */
1.279 pooka 688: if ((vp->v_mount = mp) != NULL)
689: TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.309 ad 690: mutex_exit(&mntvnode_lock);
1.327 ad 691:
692: if (omp != NULL) {
693: /* Release reference to old mount. */
694: vfs_destroy(omp);
695: }
1.29 cgd 696: }
697:
698: /*
699: * Create a vnode for a block device.
1.59 thorpej 700: * Used for root filesystem and swap areas.
1.29 cgd 701: * Also used for memory file system special devices.
702: */
1.50 christos 703: int
1.309 ad 704: bdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 705: {
1.30 mycroft 706:
707: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 708: }
709:
710: /*
711: * Create a vnode for a character device.
712: * Used for kernfs and some console handling.
713: */
1.50 christos 714: int
1.309 ad 715: cdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 716: {
1.30 mycroft 717:
718: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 719: }
720:
721: /*
722: * Create a vnode for a device.
723: * Used by bdevvp (block device) for root file system etc.,
724: * and by cdevvp (character device) for console and kernfs.
725: */
1.260 yamt 726: static int
1.309 ad 727: getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
1.29 cgd 728: {
1.309 ad 729: vnode_t *vp;
730: vnode_t *nvp;
1.29 cgd 731: int error;
732:
1.80 fvdl 733: if (dev == NODEV) {
1.302 ad 734: *vpp = NULL;
1.29 cgd 735: return (0);
1.80 fvdl 736: }
1.50 christos 737: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 738: if (error) {
1.302 ad 739: *vpp = NULL;
1.29 cgd 740: return (error);
741: }
742: vp = nvp;
743: vp->v_type = type;
1.309 ad 744: vp->v_vflag |= VV_MPSAFE;
1.297 pooka 745: uvm_vnp_setsize(vp, 0);
1.318 ad 746: spec_node_init(vp, dev);
1.29 cgd 747: *vpp = vp;
748: return (0);
749: }
750:
751: /*
752: * Grab a particular vnode from the free list, increment its
1.83 fvdl 753: * reference count and lock it. If the vnode lock bit is set the
754: * vnode is being eliminated in vgone. In that case, we can not
755: * grab the vnode, so the process is awakened when the transition is
756: * completed, and an error returned to indicate that the vnode is no
757: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 758: */
1.30 mycroft 759: int
1.309 ad 760: vget(vnode_t *vp, int flags)
1.29 cgd 761: {
1.175 perseant 762: int error;
1.29 cgd 763:
1.309 ad 764: KASSERT((vp->v_iflag & VI_MARKER) == 0);
765:
766: if ((flags & LK_INTERLOCK) == 0)
767: mutex_enter(&vp->v_interlock);
768:
769: /*
770: * Before adding a reference, we must remove the vnode
771: * from its freelist.
772: */
773: if (vp->v_usecount == 0) {
774: vremfree(vp);
775: }
776: if (++vp->v_usecount == 0) {
777: vpanic(vp, "vget: usecount overflow");
778: }
779:
1.30 mycroft 780: /*
781: * If the vnode is in the process of being cleaned out for
782: * another use, we wait for the cleaning to finish and then
1.312 ad 783: * return failure. Cleaning is determined by checking if
784: * the VI_XLOCK or VI_FREEING flags are set.
1.80 fvdl 785: */
1.312 ad 786: if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
1.313 ad 787: if ((flags & LK_NOWAIT) != 0) {
1.324 pooka 788: vrelel(vp, 0);
1.142 chs 789: return EBUSY;
790: }
1.312 ad 791: vwait(vp, VI_XLOCK | VI_FREEING);
1.324 pooka 792: vrelel(vp, 0);
1.313 ad 793: return ENOENT;
1.29 cgd 794: }
1.80 fvdl 795: if (flags & LK_TYPE_MASK) {
1.313 ad 796: error = vn_lock(vp, flags | LK_INTERLOCK);
797: if (error != 0) {
1.257 yamt 798: vrele(vp);
1.113 fvdl 799: }
1.313 ad 800: return error;
1.80 fvdl 801: }
1.309 ad 802: mutex_exit(&vp->v_interlock);
1.313 ad 803: return 0;
1.29 cgd 804: }
805:
806: /*
807: * vput(), just unlock and vrele()
808: */
809: void
1.309 ad 810: vput(vnode_t *vp)
1.29 cgd 811: {
1.30 mycroft 812:
1.309 ad 813: KASSERT((vp->v_iflag & VI_MARKER) == 0);
814:
815: VOP_UNLOCK(vp, 0);
816: vrele(vp);
1.29 cgd 817: }
818:
819: /*
1.309 ad 820: * Vnode release. If reference count drops to zero, call inactive
821: * routine and either return to freelist or free to the pool.
1.29 cgd 822: */
1.309 ad 823: void
1.324 pooka 824: vrelel(vnode_t *vp, int flags)
1.29 cgd 825: {
1.309 ad 826: bool recycle, defer;
827: int error;
828:
829: KASSERT(mutex_owned(&vp->v_interlock));
830: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.315 ad 831: KASSERT(vp->v_freelisthd == NULL);
1.29 cgd 832:
1.309 ad 833: if (vp->v_op == dead_vnodeop_p && (vp->v_iflag & VI_CLEAN) == 0) {
834: vpanic(vp, "dead but not clean");
835: }
836:
837: /*
838: * If not the last reference, just drop the reference count
839: * and unlock.
840: */
841: if (vp->v_usecount > 1) {
842: vp->v_usecount--;
843: vp->v_iflag |= VI_INACTREDO;
844: mutex_exit(&vp->v_interlock);
1.29 cgd 845: return;
1.80 fvdl 846: }
1.309 ad 847: if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
848: vpanic(vp, "vput: bad ref count");
1.29 cgd 849: }
1.309 ad 850:
1.30 mycroft 851: /*
1.309 ad 852: * If not clean, deactivate the vnode, but preserve
853: * our reference across the call to VOP_INACTIVE().
1.30 mycroft 854: */
1.309 ad 855: retry:
856: if ((vp->v_iflag & VI_CLEAN) == 0) {
857: recycle = false;
858: /*
859: * XXX This ugly block can be largely eliminated if
860: * locking is pushed down into the file systems.
861: */
862: if (curlwp == uvm.pagedaemon_lwp) {
863: /* The pagedaemon can't wait around; defer. */
864: defer = true;
865: } else if (curlwp == vrele_lwp) {
866: /* We have to try harder. */
867: vp->v_iflag &= ~VI_INACTREDO;
868: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
869: LK_RETRY);
870: if (error != 0) {
871: /* XXX */
872: vpanic(vp, "vrele: unable to lock %p");
873: }
874: defer = false;
875: } else if ((vp->v_iflag & VI_LAYER) != 0) {
876: /*
877: * Acquiring the stack's lock in vclean() even
878: * for an honest vput/vrele is dangerous because
879: * our caller may hold other vnode locks; defer.
880: */
881: defer = true;
882: } else {
883: /* If we can't acquire the lock, then defer. */
884: vp->v_iflag &= ~VI_INACTREDO;
885: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
886: LK_NOWAIT);
887: if (error != 0) {
888: defer = true;
889: mutex_enter(&vp->v_interlock);
890: } else {
891: defer = false;
892: }
893: }
894:
895: if (defer) {
896: /*
897: * Defer reclaim to the kthread; it's not safe to
898: * clean it here. We donate it our last reference.
899: */
900: KASSERT(mutex_owned(&vp->v_interlock));
901: KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
902: vp->v_iflag |= VI_INACTPEND;
903: mutex_enter(&vrele_lock);
904: TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
905: if (++vrele_pending > (desiredvnodes >> 8))
906: cv_signal(&vrele_cv);
907: mutex_exit(&vrele_lock);
908: mutex_exit(&vp->v_interlock);
909: return;
910: }
911:
1.318 ad 912: #ifdef DIAGNOSTIC
1.321 ad 913: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
914: vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1.318 ad 915: vprint("vrelel: missing VOP_CLOSE()", vp);
916: }
917: #endif
918:
1.309 ad 919: /*
1.312 ad 920: * The vnode can gain another reference while being
921: * deactivated. If VOP_INACTIVE() indicates that
922: * the described file has been deleted, then recycle
923: * the vnode irrespective of additional references.
924: * Another thread may be waiting to re-use the on-disk
925: * inode.
926: *
927: * Note that VOP_INACTIVE() will drop the vnode lock.
1.309 ad 928: */
929: VOP_INACTIVE(vp, &recycle);
930: mutex_enter(&vp->v_interlock);
1.312 ad 931: if (!recycle) {
932: if (vp->v_usecount > 1) {
933: vp->v_usecount--;
934: mutex_exit(&vp->v_interlock);
935: return;
936: }
1.309 ad 937:
1.312 ad 938: /*
939: * If we grew another reference while
940: * VOP_INACTIVE() was underway, retry.
941: */
942: if ((vp->v_iflag & VI_INACTREDO) != 0) {
943: goto retry;
944: }
1.309 ad 945: }
946:
947: /* Take care of space accounting. */
948: if (vp->v_iflag & VI_EXECMAP) {
949: atomic_add_int(&uvmexp.execpages,
950: -vp->v_uobj.uo_npages);
951: atomic_add_int(&uvmexp.filepages,
952: vp->v_uobj.uo_npages);
953: }
954: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED);
955: vp->v_vflag &= ~VV_MAPPED;
956:
957: /*
958: * Recycle the vnode if the file is now unused (unlinked),
959: * otherwise just free it.
960: */
961: if (recycle) {
962: vclean(vp, DOCLOSE);
963: }
964: KASSERT(vp->v_usecount > 0);
1.298 pooka 965: }
1.309 ad 966:
967: if (--vp->v_usecount != 0) {
968: /* Gained another reference while being reclaimed. */
969: mutex_exit(&vp->v_interlock);
970: return;
1.147 chs 971: }
1.298 pooka 972:
1.309 ad 973: if ((vp->v_iflag & VI_CLEAN) != 0) {
974: /*
975: * It's clean so destroy it. It isn't referenced
976: * anywhere since it has been reclaimed.
977: */
978: KASSERT(vp->v_holdcnt == 0);
979: KASSERT(vp->v_writecount == 0);
980: mutex_exit(&vp->v_interlock);
981: insmntque(vp, NULL);
1.318 ad 982: if (vp->v_type == VBLK || vp->v_type == VCHR) {
983: spec_node_destroy(vp);
984: }
1.310 pooka 985: vnfree(vp);
1.298 pooka 986: } else {
1.309 ad 987: /*
988: * Otherwise, put it back onto the freelist. It
989: * can't be destroyed while still associated with
990: * a file system.
991: */
992: mutex_enter(&vnode_free_list_lock);
993: if (vp->v_holdcnt > 0) {
994: vp->v_freelisthd = &vnode_hold_list;
995: } else {
996: vp->v_freelisthd = &vnode_free_list;
997: }
998: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
999: mutex_exit(&vnode_free_list_lock);
1000: mutex_exit(&vp->v_interlock);
1.298 pooka 1001: }
1002: }
1003:
1004: void
1.309 ad 1005: vrele(vnode_t *vp)
1.298 pooka 1006: {
1007:
1.309 ad 1008: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1009:
1010: mutex_enter(&vp->v_interlock);
1.324 pooka 1011: vrelel(vp, 0);
1.298 pooka 1012: }
1013:
1.309 ad 1014: static void
1015: vrele_thread(void *cookie)
1.298 pooka 1016: {
1.309 ad 1017: vnode_t *vp;
1.298 pooka 1018:
1.309 ad 1019: for (;;) {
1020: mutex_enter(&vrele_lock);
1021: while (TAILQ_EMPTY(&vrele_list)) {
1022: cv_timedwait(&vrele_cv, &vrele_lock, hz);
1023: }
1024: vp = TAILQ_FIRST(&vrele_list);
1025: TAILQ_REMOVE(&vrele_list, vp, v_freelist);
1026: vrele_pending--;
1027: mutex_exit(&vrele_lock);
1028:
1029: /*
1030: * If not the last reference, then ignore the vnode
1031: * and look for more work.
1032: */
1033: mutex_enter(&vp->v_interlock);
1034: KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
1035: vp->v_iflag &= ~VI_INACTPEND;
1036: if (vp->v_usecount > 1) {
1037: vp->v_usecount--;
1038: mutex_exit(&vp->v_interlock);
1039: continue;
1040: }
1.324 pooka 1041: vrelel(vp, 0);
1.309 ad 1042: }
1.29 cgd 1043: }
1044:
1045: /*
1046: * Page or buffer structure gets a reference.
1.258 chs 1047: * Called with v_interlock held.
1.29 cgd 1048: */
1.30 mycroft 1049: void
1.309 ad 1050: vholdl(vnode_t *vp)
1.29 cgd 1051: {
1052:
1.309 ad 1053: KASSERT(mutex_owned(&vp->v_interlock));
1054: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1055:
1056: if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
1057: mutex_enter(&vnode_free_list_lock);
1058: KASSERT(vp->v_freelisthd == &vnode_free_list);
1059: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1060: vp->v_freelisthd = &vnode_hold_list;
1061: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1062: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1063: }
1.29 cgd 1064: }
1065:
1066: /*
1067: * Page or buffer structure frees a reference.
1.258 chs 1068: * Called with v_interlock held.
1.29 cgd 1069: */
1.30 mycroft 1070: void
1.309 ad 1071: holdrelel(vnode_t *vp)
1.29 cgd 1072: {
1073:
1.309 ad 1074: KASSERT(mutex_owned(&vp->v_interlock));
1075: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.142 chs 1076:
1.309 ad 1077: if (vp->v_holdcnt <= 0) {
1078: vpanic(vp, "holdrelel: holdcnt vp %p");
1079: }
1.142 chs 1080:
1.309 ad 1081: vp->v_holdcnt--;
1082: if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1083: mutex_enter(&vnode_free_list_lock);
1084: KASSERT(vp->v_freelisthd == &vnode_hold_list);
1085: TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1086: vp->v_freelisthd = &vnode_free_list;
1087: TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1088: mutex_exit(&vnode_free_list_lock);
1.113 fvdl 1089: }
1.81 ross 1090: }
1091:
1092: /*
1.309 ad 1093: * Vnode reference, where a reference is already held by some other
1094: * object (for example, a file structure).
1.81 ross 1095: */
1096: void
1.309 ad 1097: vref(vnode_t *vp)
1.81 ross 1098: {
1099:
1.309 ad 1100: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1101:
1102: mutex_enter(&vp->v_interlock);
1103: if (vp->v_usecount <= 0) {
1104: vpanic(vp, "vref used where vget required");
1105: }
1106: if (++vp->v_usecount == 0) {
1107: vpanic(vp, "vref: usecount overflow");
1.112 mycroft 1108: }
1.309 ad 1109: mutex_exit(&vp->v_interlock);
1.29 cgd 1110: }
1111:
1112: /*
1113: * Remove any vnodes in the vnode table belonging to mount point mp.
1114: *
1.183 yamt 1115: * If FORCECLOSE is not specified, there should not be any active ones,
1.29 cgd 1116: * return error if any are found (nb: this is a user error, not a
1.183 yamt 1117: * system error). If FORCECLOSE is specified, detach any active vnodes
1.29 cgd 1118: * that are found.
1.183 yamt 1119: *
1120: * If WRITECLOSE is set, only flush out regular file vnodes open for
1121: * writing.
1122: *
1123: * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29 cgd 1124: */
1.30 mycroft 1125: #ifdef DEBUG
1126: int busyprt = 0; /* print out busy vnodes */
1127: struct ctldebug debug1 = { "busyprt", &busyprt };
1128: #endif
1.29 cgd 1129:
1.334 ad 1130: static vnode_t *
1131: vflushnext(vnode_t *mvp, int *when)
1132: {
1133:
1134: if (hardclock_ticks > *when) {
1135: mutex_exit(&mntvnode_lock);
1136: yield();
1137: mutex_enter(&mntvnode_lock);
1138: *when = hardclock_ticks + hz / 10;
1139: }
1140:
1141: return vunmark(mvp);
1142: }
1143:
1.50 christos 1144: int
1.309 ad 1145: vflush(struct mount *mp, vnode_t *skipvp, int flags)
1.29 cgd 1146: {
1.309 ad 1147: vnode_t *vp, *mvp;
1.334 ad 1148: int busy = 0, when = 0;
1.29 cgd 1149:
1.309 ad 1150: /* Allocate a marker vnode. */
1.310 pooka 1151: if ((mvp = vnalloc(mp)) == NULL)
1.309 ad 1152: return (ENOMEM);
1153:
1154: mutex_enter(&mntvnode_lock);
1.273 reinoud 1155: /*
1156: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1157: * and vclean() are called
1158: */
1.334 ad 1159: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
1160: vp = vflushnext(mvp, &when)) {
1.309 ad 1161: vmark(mvp, vp);
1162: if (vp->v_mount != mp || vismarker(vp))
1163: continue;
1.29 cgd 1164: /*
1165: * Skip over a selected vnode.
1166: */
1167: if (vp == skipvp)
1168: continue;
1.309 ad 1169: mutex_enter(&vp->v_interlock);
1.29 cgd 1170: /*
1.315 ad 1171: * Ignore clean but still referenced vnodes.
1172: */
1173: if ((vp->v_iflag & VI_CLEAN) != 0) {
1174: mutex_exit(&vp->v_interlock);
1175: continue;
1176: }
1177: /*
1.309 ad 1178: * Skip over a vnodes marked VSYSTEM.
1.29 cgd 1179: */
1.302 ad 1180: if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1.309 ad 1181: mutex_exit(&vp->v_interlock);
1.29 cgd 1182: continue;
1.80 fvdl 1183: }
1.29 cgd 1184: /*
1.30 mycroft 1185: * If WRITECLOSE is set, only flush out regular file
1186: * vnodes open for writing.
1187: */
1188: if ((flags & WRITECLOSE) &&
1.92 thorpej 1189: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1.309 ad 1190: mutex_exit(&vp->v_interlock);
1.30 mycroft 1191: continue;
1.92 thorpej 1192: }
1.30 mycroft 1193: /*
1.29 cgd 1194: * With v_usecount == 0, all we need to do is clear
1195: * out the vnode data structures and we are done.
1196: */
1197: if (vp->v_usecount == 0) {
1.309 ad 1198: mutex_exit(&mntvnode_lock);
1199: vremfree(vp);
1200: vp->v_usecount++;
1201: vclean(vp, DOCLOSE);
1.324 pooka 1202: vrelel(vp, 0);
1.309 ad 1203: mutex_enter(&mntvnode_lock);
1.29 cgd 1204: continue;
1205: }
1206: /*
1.30 mycroft 1207: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 1208: * For block or character devices, revert to an
1.318 ad 1209: * anonymous device. For all other files, just
1210: * kill them.
1.29 cgd 1211: */
1212: if (flags & FORCECLOSE) {
1.309 ad 1213: mutex_exit(&mntvnode_lock);
1214: vp->v_usecount++;
1.29 cgd 1215: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.309 ad 1216: vclean(vp, DOCLOSE);
1.324 pooka 1217: vrelel(vp, 0);
1.29 cgd 1218: } else {
1.309 ad 1219: vclean(vp, 0);
1.318 ad 1220: vp->v_op = spec_vnodeop_p; /* XXXSMP */
1.320 ad 1221: mutex_exit(&vp->v_interlock);
1222: /*
1223: * The vnode isn't clean, but still resides
1224: * on the mount list. Remove it. XXX This
1225: * is a bit dodgy.
1226: */
1227: insmntque(vp, NULL);
1228: vrele(vp);
1.29 cgd 1229: }
1.309 ad 1230: mutex_enter(&mntvnode_lock);
1.29 cgd 1231: continue;
1232: }
1.30 mycroft 1233: #ifdef DEBUG
1.29 cgd 1234: if (busyprt)
1235: vprint("vflush: busy vnode", vp);
1.30 mycroft 1236: #endif
1.309 ad 1237: mutex_exit(&vp->v_interlock);
1.29 cgd 1238: busy++;
1239: }
1.309 ad 1240: mutex_exit(&mntvnode_lock);
1.310 pooka 1241: vnfree(mvp);
1.29 cgd 1242: if (busy)
1243: return (EBUSY);
1244: return (0);
1245: }
1246:
1247: /*
1248: * Disassociate the underlying file system from a vnode.
1.309 ad 1249: *
1250: * Must be called with the interlock held, and will return with it held.
1.29 cgd 1251: */
1.309 ad 1252: void
1253: vclean(vnode_t *vp, int flags)
1.29 cgd 1254: {
1.309 ad 1255: lwp_t *l = curlwp;
1256: bool recycle, active;
1.318 ad 1257: int error;
1.29 cgd 1258:
1.309 ad 1259: KASSERT(mutex_owned(&vp->v_interlock));
1260: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1261: KASSERT(vp->v_usecount != 0);
1.166 chs 1262:
1.309 ad 1263: /* If cleaning is already in progress wait until done and return. */
1264: if (vp->v_iflag & VI_XLOCK) {
1265: vwait(vp, VI_XLOCK);
1266: return;
1267: }
1.166 chs 1268:
1.309 ad 1269: /* If already clean, nothing to do. */
1270: if ((vp->v_iflag & VI_CLEAN) != 0) {
1271: return;
1.112 mycroft 1272: }
1.87 pk 1273:
1.29 cgd 1274: /*
1.309 ad 1275: * Prevent the vnode from being recycled or brought into use
1276: * while we clean it out.
1.29 cgd 1277: */
1.302 ad 1278: vp->v_iflag |= VI_XLOCK;
1279: if (vp->v_iflag & VI_EXECMAP) {
1.307 ad 1280: atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1281: atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1.147 chs 1282: }
1.302 ad 1283: vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1.309 ad 1284: active = (vp->v_usecount > 1);
1.142 chs 1285:
1.309 ad 1286: /* XXXAD should not lock vnode under layer */
1287: VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1.80 fvdl 1288:
1.98 wrstuden 1289: /*
1.142 chs 1290: * Clean out any cached data associated with the vnode.
1.318 ad 1291: * If purging an active vnode, it must be closed and
1292: * deactivated before being reclaimed. Note that the
1293: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1294: */
1.166 chs 1295: if (flags & DOCLOSE) {
1.256 christos 1296: error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.318 ad 1297: if (error != 0)
1.256 christos 1298: error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.211 dbj 1299: KASSERT(error == 0);
1.302 ad 1300: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.318 ad 1301: if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1302: spec_node_revoke(vp);
1.231 mycroft 1303: }
1.166 chs 1304: }
1.29 cgd 1305: if (active) {
1.309 ad 1306: VOP_INACTIVE(vp, &recycle);
1.80 fvdl 1307: } else {
1308: /*
1309: * Any other processes trying to obtain this lock must first
1.302 ad 1310: * wait for VI_XLOCK to clear, then call the new lock operation.
1.80 fvdl 1311: */
1312: VOP_UNLOCK(vp, 0);
1.29 cgd 1313: }
1.142 chs 1314:
1.309 ad 1315: /* Disassociate the underlying file system from the vnode. */
1316: if (VOP_RECLAIM(vp)) {
1317: vpanic(vp, "vclean: cannot reclaim");
1.87 pk 1318: }
1.30 mycroft 1319:
1.169 chs 1320: KASSERT(vp->v_uobj.uo_npages == 0);
1.255 yamt 1321: if (vp->v_type == VREG && vp->v_ractx != NULL) {
1322: uvm_ra_freectx(vp->v_ractx);
1323: vp->v_ractx = NULL;
1324: }
1.80 fvdl 1325: cache_purge(vp);
1326:
1.309 ad 1327: /* Done with purge, notify sleepers of the grim news. */
1.30 mycroft 1328: vp->v_op = dead_vnodeop_p;
1329: vp->v_tag = VT_NON;
1.309 ad 1330: mutex_enter(&vp->v_interlock);
1331: vp->v_vnlock = &vp->v_lock;
1.332 ad 1332: KNOTE(&vp->v_klist, NOTE_REVOKE);
1.312 ad 1333: vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1.304 ad 1334: vp->v_vflag &= ~VV_LOCKSWORK;
1.319 ad 1335: if ((flags & DOCLOSE) != 0) {
1.318 ad 1336: vp->v_iflag |= VI_CLEAN;
1337: }
1.309 ad 1338: cv_broadcast(&vp->v_cv);
1339:
1340: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.29 cgd 1341: }
1342:
1343: /*
1.80 fvdl 1344: * Recycle an unused vnode to the front of the free list.
1345: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1346: */
1.80 fvdl 1347: int
1.309 ad 1348: vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1.217 junyoung 1349: {
1350:
1.309 ad 1351: KASSERT((vp->v_iflag & VI_MARKER) == 0);
1352:
1353: mutex_enter(&vp->v_interlock);
1354: if (vp->v_usecount != 0) {
1355: mutex_exit(&vp->v_interlock);
1356: return (0);
1.29 cgd 1357: }
1.309 ad 1358: if (inter_lkp)
1359: mutex_exit(inter_lkp);
1360: vremfree(vp);
1361: vp->v_usecount++;
1362: vclean(vp, DOCLOSE);
1.324 pooka 1363: vrelel(vp, 0);
1.309 ad 1364: return (1);
1.29 cgd 1365: }
1366:
1367: /*
1.309 ad 1368: * Eliminate all activity associated with a vnode in preparation for
1369: * reuse. Drops a reference from the vnode.
1.29 cgd 1370: */
1371: void
1.309 ad 1372: vgone(vnode_t *vp)
1.80 fvdl 1373: {
1.166 chs 1374:
1.309 ad 1375: mutex_enter(&vp->v_interlock);
1376: vclean(vp, DOCLOSE);
1.324 pooka 1377: vrelel(vp, 0);
1.29 cgd 1378: }
1379:
1380: /*
1381: * Lookup a vnode by device number.
1382: */
1.50 christos 1383: int
1.309 ad 1384: vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1.29 cgd 1385: {
1.309 ad 1386: vnode_t *vp;
1.80 fvdl 1387: int rc = 0;
1.29 cgd 1388:
1.318 ad 1389: mutex_enter(&specfs_lock);
1390: for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1.29 cgd 1391: if (dev != vp->v_rdev || type != vp->v_type)
1392: continue;
1393: *vpp = vp;
1.80 fvdl 1394: rc = 1;
1395: break;
1.29 cgd 1396: }
1.318 ad 1397: mutex_exit(&specfs_lock);
1.80 fvdl 1398: return (rc);
1.96 thorpej 1399: }
1400:
1401: /*
1402: * Revoke all the vnodes corresponding to the specified minor number
1403: * range (endpoints inclusive) of the specified major.
1404: */
1405: void
1.247 thorpej 1406: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 1407: {
1.316 ad 1408: vnode_t *vp, **vpp;
1409: dev_t dev;
1.96 thorpej 1410: int mn;
1411:
1.274 mrg 1412: vp = NULL; /* XXX gcc */
1413:
1.318 ad 1414: mutex_enter(&specfs_lock);
1.316 ad 1415: for (mn = minl; mn <= minh; mn++) {
1416: dev = makedev(maj, mn);
1.318 ad 1417: vpp = &specfs_hash[SPECHASH(dev)];
1.316 ad 1418: for (vp = *vpp; vp != NULL;) {
1419: mutex_enter(&vp->v_interlock);
1420: if ((vp->v_iflag & VI_CLEAN) != 0 ||
1421: dev != vp->v_rdev || type != vp->v_type) {
1422: mutex_exit(&vp->v_interlock);
1423: vp = vp->v_specnext;
1424: continue;
1425: }
1.318 ad 1426: mutex_exit(&specfs_lock);
1.316 ad 1427: if (vget(vp, LK_INTERLOCK) == 0) {
1428: VOP_REVOKE(vp, REVOKEALL);
1429: vrele(vp);
1430: }
1.318 ad 1431: mutex_enter(&specfs_lock);
1.316 ad 1432: vp = *vpp;
1433: }
1434: }
1.318 ad 1435: mutex_exit(&specfs_lock);
1.29 cgd 1436: }
1437:
1438: /*
1439: * Calculate the total number of references to a special device.
1440: */
1.30 mycroft 1441: int
1.309 ad 1442: vcount(vnode_t *vp)
1.29 cgd 1443: {
1444: int count;
1445:
1.318 ad 1446: mutex_enter(&specfs_lock);
1.309 ad 1447: mutex_enter(&vp->v_interlock);
1.318 ad 1448: if (vp->v_specnode == NULL) {
1.309 ad 1449: count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
1450: mutex_exit(&vp->v_interlock);
1.318 ad 1451: mutex_exit(&specfs_lock);
1.309 ad 1452: return (count);
1453: }
1454: mutex_exit(&vp->v_interlock);
1.318 ad 1455: count = vp->v_specnode->sn_dev->sd_opencnt;
1456: mutex_exit(&specfs_lock);
1.29 cgd 1457: return (count);
1458: }
1459:
1.101 mrg 1460: /*
1.316 ad 1461: * Eliminate all activity associated with the requested vnode
1462: * and with all vnodes aliased to the requested vnode.
1463: */
1464: void
1465: vrevoke(vnode_t *vp)
1466: {
1467: vnode_t *vq, **vpp;
1468: enum vtype type;
1469: dev_t dev;
1470:
1471: KASSERT(vp->v_usecount > 0);
1472:
1473: mutex_enter(&vp->v_interlock);
1474: if ((vp->v_iflag & VI_CLEAN) != 0) {
1475: mutex_exit(&vp->v_interlock);
1476: return;
1477: } else {
1478: dev = vp->v_rdev;
1479: type = vp->v_type;
1480: mutex_exit(&vp->v_interlock);
1481: }
1482:
1.318 ad 1483: vpp = &specfs_hash[SPECHASH(dev)];
1484: mutex_enter(&specfs_lock);
1.316 ad 1485: for (vq = *vpp; vq != NULL;) {
1.333 ad 1486: /* If clean or being cleaned, then ignore it. */
1487: mutex_enter(&vq->v_interlock);
1488: if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
1.317 ad 1489: vq->v_rdev != dev || vq->v_type != type) {
1.333 ad 1490: mutex_exit(&vq->v_interlock);
1.316 ad 1491: vq = vq->v_specnext;
1492: continue;
1493: }
1.318 ad 1494: mutex_exit(&specfs_lock);
1495: if (vq->v_usecount == 0) {
1.317 ad 1496: vremfree(vq);
1.316 ad 1497: }
1.318 ad 1498: vq->v_usecount++;
1.316 ad 1499: vclean(vq, DOCLOSE);
1.324 pooka 1500: vrelel(vq, 0);
1.318 ad 1501: mutex_enter(&specfs_lock);
1.316 ad 1502: vq = *vpp;
1503: }
1.318 ad 1504: mutex_exit(&specfs_lock);
1.316 ad 1505: }
1506:
1507: /*
1.220 lukem 1508: * sysctl helper routine to return list of supported fstypes
1509: */
1510: static int
1511: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
1512: {
1.291 christos 1513: char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220 lukem 1514: char *where = oldp;
1515: struct vfsops *v;
1516: size_t needed, left, slen;
1517: int error, first;
1518:
1519: if (newp != NULL)
1520: return (EPERM);
1521: if (namelen != 0)
1522: return (EINVAL);
1523:
1524: first = 1;
1525: error = 0;
1526: needed = 0;
1527: left = *oldlenp;
1528:
1.311 ad 1529: sysctl_unlock();
1.302 ad 1530: mutex_enter(&vfs_list_lock);
1.220 lukem 1531: LIST_FOREACH(v, &vfs_list, vfs_list) {
1532: if (where == NULL)
1533: needed += strlen(v->vfs_name) + 1;
1534: else {
1.245 christos 1535: memset(bf, 0, sizeof(bf));
1.220 lukem 1536: if (first) {
1.245 christos 1537: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 1538: first = 0;
1539: } else {
1.245 christos 1540: bf[0] = ' ';
1541: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 1542: }
1.245 christos 1543: bf[sizeof(bf)-1] = '\0';
1544: slen = strlen(bf);
1.220 lukem 1545: if (left < slen + 1)
1546: break;
1547: /* +1 to copy out the trailing NUL byte */
1.302 ad 1548: v->vfs_refcount++;
1549: mutex_exit(&vfs_list_lock);
1.245 christos 1550: error = copyout(bf, where, slen + 1);
1.302 ad 1551: mutex_enter(&vfs_list_lock);
1552: v->vfs_refcount--;
1.220 lukem 1553: if (error)
1554: break;
1555: where += slen;
1556: needed += slen;
1557: left -= slen;
1558: }
1559: }
1.302 ad 1560: mutex_exit(&vfs_list_lock);
1.311 ad 1561: sysctl_relock();
1.220 lukem 1562: *oldlenp = needed;
1563: return (error);
1564: }
1565:
1566: /*
1.80 fvdl 1567: * Top level filesystem related information gathering.
1568: */
1.212 atatat 1569: SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1.80 fvdl 1570: {
1.218 atatat 1571: sysctl_createv(clog, 0, NULL, NULL,
1572: CTLFLAG_PERMANENT,
1.212 atatat 1573: CTLTYPE_NODE, "vfs", NULL,
1574: NULL, 0, NULL, 0,
1575: CTL_VFS, CTL_EOL);
1.218 atatat 1576: sysctl_createv(clog, 0, NULL, NULL,
1577: CTLFLAG_PERMANENT,
1.226 atatat 1578: CTLTYPE_NODE, "generic",
1579: SYSCTL_DESCR("Non-specific vfs related information"),
1.212 atatat 1580: NULL, 0, NULL, 0,
1581: CTL_VFS, VFS_GENERIC, CTL_EOL);
1.218 atatat 1582: sysctl_createv(clog, 0, NULL, NULL,
1583: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.226 atatat 1584: CTLTYPE_INT, "usermount",
1585: SYSCTL_DESCR("Whether unprivileged users may mount "
1586: "filesystems"),
1.212 atatat 1587: NULL, 0, &dovfsusermount, 0,
1588: CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1.220 lukem 1589: sysctl_createv(clog, 0, NULL, NULL,
1590: CTLFLAG_PERMANENT,
1591: CTLTYPE_STRING, "fstypes",
1592: SYSCTL_DESCR("List of file systems present"),
1593: sysctl_vfs_generic_fstypes, 0, NULL, 0,
1594: CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1.263 chs 1595: sysctl_createv(clog, 0, NULL, NULL,
1596: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1597: CTLTYPE_INT, "magiclinks",
1598: SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"),
1599: NULL, 0, &vfs_magiclinks, 0,
1600: CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL);
1.80 fvdl 1601: }
1602:
1.212 atatat 1603:
1.29 cgd 1604: int kinfo_vdebug = 1;
1605: int kinfo_vgetfailed;
1606: #define KINFO_VNODESLOP 10
1607: /*
1608: * Dump vnode list (via sysctl).
1609: * Copyout address of vnode followed by vnode.
1610: */
1611: /* ARGSUSED */
1.50 christos 1612: int
1.212 atatat 1613: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29 cgd 1614: {
1.212 atatat 1615: char *where = oldp;
1616: size_t *sizep = oldlenp;
1.80 fvdl 1617: struct mount *mp, *nmp;
1.311 ad 1618: vnode_t *vp, *mvp, vbuf;
1.80 fvdl 1619: char *bp = where, *savebp;
1.29 cgd 1620: char *ewhere;
1621: int error;
1.212 atatat 1622:
1623: if (namelen != 0)
1624: return (EOPNOTSUPP);
1625: if (newp != NULL)
1626: return (EPERM);
1.29 cgd 1627:
1.309 ad 1628: #define VPTRSZ sizeof(vnode_t *)
1629: #define VNODESZ sizeof(vnode_t)
1.29 cgd 1630: if (where == NULL) {
1631: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1632: return (0);
1633: }
1634: ewhere = where + *sizep;
1.80 fvdl 1635:
1.311 ad 1636: sysctl_unlock();
1.302 ad 1637: mutex_enter(&mountlist_lock);
1.177 matt 1638: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1639: mp = nmp) {
1.327 ad 1640: if (vfs_trybusy(mp, RW_READER, &mountlist_lock)) {
1.177 matt 1641: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29 cgd 1642: continue;
1.80 fvdl 1643: }
1.29 cgd 1644: savebp = bp;
1.309 ad 1645: /* Allocate a marker vnode. */
1.311 ad 1646: if ((mvp = vnalloc(mp)) == NULL) {
1647: sysctl_relock();
1.309 ad 1648: return (ENOMEM);
1.311 ad 1649: }
1.309 ad 1650: mutex_enter(&mntvnode_lock);
1651: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
1652: vmark(mvp, vp);
1.29 cgd 1653: /*
1654: * Check that the vp is still associated with
1655: * this filesystem. RACE: could have been
1656: * recycled onto the same filesystem.
1657: */
1.309 ad 1658: if (vp->v_mount != mp || vismarker(vp))
1659: continue;
1.29 cgd 1660: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.309 ad 1661: (void)vunmark(mvp);
1662: mutex_exit(&mntvnode_lock);
1.310 pooka 1663: vnfree(mvp);
1.311 ad 1664: sysctl_relock();
1.29 cgd 1665: *sizep = bp - where;
1666: return (ENOMEM);
1667: }
1.311 ad 1668: memcpy(&vbuf, vp, VNODESZ);
1.309 ad 1669: mutex_exit(&mntvnode_lock);
1.311 ad 1670: if ((error = copyout(vp, bp, VPTRSZ)) ||
1671: (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1.309 ad 1672: mutex_enter(&mntvnode_lock);
1673: (void)vunmark(mvp);
1674: mutex_exit(&mntvnode_lock);
1.310 pooka 1675: vnfree(mvp);
1.311 ad 1676: sysctl_relock();
1.29 cgd 1677: return (error);
1.309 ad 1678: }
1.29 cgd 1679: bp += VPTRSZ + VNODESZ;
1.309 ad 1680: mutex_enter(&mntvnode_lock);
1.29 cgd 1681: }
1.309 ad 1682: mutex_exit(&mntvnode_lock);
1.302 ad 1683: mutex_enter(&mountlist_lock);
1.177 matt 1684: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.327 ad 1685: vfs_unbusy(mp, false);
1.310 pooka 1686: vnfree(mvp);
1.29 cgd 1687: }
1.302 ad 1688: mutex_exit(&mountlist_lock);
1.311 ad 1689: sysctl_relock();
1.29 cgd 1690:
1691: *sizep = bp - where;
1692: return (0);
1.30 mycroft 1693: }
1694:
1695: /*
1.309 ad 1696: * Remove clean vnodes from a mountpoint's vnode list.
1697: */
1698: void
1699: vfs_scrubvnlist(struct mount *mp)
1700: {
1701: vnode_t *vp, *nvp;
1702:
1.327 ad 1703: retry:
1.309 ad 1704: mutex_enter(&mntvnode_lock);
1705: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1706: nvp = TAILQ_NEXT(vp, v_mntvnodes);
1707: mutex_enter(&vp->v_interlock);
1.315 ad 1708: if ((vp->v_iflag & VI_CLEAN) != 0) {
1.309 ad 1709: TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.315 ad 1710: vp->v_mount = NULL;
1.327 ad 1711: mutex_exit(&mntvnode_lock);
1712: mutex_exit(&vp->v_interlock);
1713: vfs_destroy(mp);
1714: goto retry;
1.315 ad 1715: }
1.309 ad 1716: mutex_exit(&vp->v_interlock);
1717: }
1718: mutex_exit(&mntvnode_lock);
1719: }
1720:
1721: /*
1.30 mycroft 1722: * Check to see if a filesystem is mounted on a block device.
1723: */
1724: int
1.309 ad 1725: vfs_mountedon(vnode_t *vp)
1.30 mycroft 1726: {
1.309 ad 1727: vnode_t *vq;
1.80 fvdl 1728: int error = 0;
1.30 mycroft 1729:
1.261 reinoud 1730: if (vp->v_type != VBLK)
1731: return ENOTBLK;
1.113 fvdl 1732: if (vp->v_specmountpoint != NULL)
1.30 mycroft 1733: return (EBUSY);
1.318 ad 1734: mutex_enter(&specfs_lock);
1735: for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
1736: vq = vq->v_specnext) {
1737: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1738: continue;
1739: if (vq->v_specmountpoint != NULL) {
1740: error = EBUSY;
1741: break;
1.30 mycroft 1742: }
1743: }
1.318 ad 1744: mutex_exit(&specfs_lock);
1.80 fvdl 1745: return (error);
1.30 mycroft 1746: }
1747:
1.35 ws 1748: /*
1.39 mycroft 1749: * Unmount all file systems.
1750: * We traverse the list in reverse order under the assumption that doing so
1751: * will avoid needing to worry about dependencies.
1752: */
1753: void
1.256 christos 1754: vfs_unmountall(struct lwp *l)
1.39 mycroft 1755: {
1.123 augustss 1756: struct mount *mp, *nmp;
1.40 mycroft 1757: int allerror, error;
1.39 mycroft 1758:
1.235 lukem 1759: printf("unmounting file systems...");
1.325 dyoung 1760: for (allerror = 0, mp = CIRCLEQ_LAST(&mountlist);
1761: !CIRCLEQ_EMPTY(&mountlist);
1762: mp = nmp) {
1763: nmp = CIRCLEQ_PREV(mp, mnt_list);
1.54 jtk 1764: #ifdef DEBUG
1.235 lukem 1765: printf("\nunmounting %s (%s)...",
1.56 christos 1766: mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54 jtk 1767: #endif
1.149 thorpej 1768: /*
1769: * XXX Freeze syncer. Must do this before locking the
1770: * mount point. See dounmount() for details.
1771: */
1.281 ad 1772: mutex_enter(&syncer_mutex);
1.327 ad 1773: if (vfs_busy(mp, RW_WRITER, NULL)) {
1.281 ad 1774: mutex_exit(&syncer_mutex);
1.60 fvdl 1775: continue;
1.149 thorpej 1776: }
1.256 christos 1777: if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
1.57 christos 1778: printf("unmount of %s failed with error %d\n",
1.40 mycroft 1779: mp->mnt_stat.f_mntonname, error);
1780: allerror = 1;
1781: }
1.39 mycroft 1782: }
1.235 lukem 1783: printf(" done\n");
1.39 mycroft 1784: if (allerror)
1.57 christos 1785: printf("WARNING: some file systems would not unmount\n");
1.40 mycroft 1786: }
1787:
1788: /*
1789: * Sync and unmount file systems before shutting down.
1790: */
1791: void
1.247 thorpej 1792: vfs_shutdown(void)
1.40 mycroft 1793: {
1.265 skrll 1794: struct lwp *l;
1.40 mycroft 1795:
1.265 skrll 1796: /* XXX we're certainly not running in lwp0's context! */
1797: l = curlwp;
1798: if (l == NULL)
1799: l = &lwp0;
1.185 christos 1800:
1.70 cgd 1801: printf("syncing disks... ");
1802:
1.305 pooka 1803: /* remove user processes from run queue */
1.138 bouyer 1804: suspendsched();
1.40 mycroft 1805: (void) spl0();
1806:
1.128 sommerfe 1807: /* avoid coming back this way again if we panic. */
1808: doing_shutdown = 1;
1809:
1.184 thorpej 1810: sys_sync(l, NULL, NULL);
1.40 mycroft 1811:
1812: /* Wait for sync to finish. */
1.213 pk 1813: if (buf_syncwait() != 0) {
1.124 augustss 1814: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1815: Debugger();
1816: #endif
1.57 christos 1817: printf("giving up\n");
1.84 thorpej 1818: return;
1.73 thorpej 1819: } else
1.57 christos 1820: printf("done\n");
1.73 thorpej 1821:
1.84 thorpej 1822: /*
1823: * If we've panic'd, don't make the situation potentially
1824: * worse by unmounting the file systems.
1825: */
1826: if (panicstr != NULL)
1827: return;
1828:
1829: /* Release inodes held by texts before update. */
1.73 thorpej 1830: #ifdef notdef
1.84 thorpej 1831: vnshutdown();
1.73 thorpej 1832: #endif
1.84 thorpej 1833: /* Unmount file systems. */
1.256 christos 1834: vfs_unmountall(l);
1.58 thorpej 1835: }
1836:
1837: /*
1838: * Mount the root file system. If the operator didn't specify a
1839: * file system to use, try all possible file systems until one
1840: * succeeds.
1841: */
1842: int
1.247 thorpej 1843: vfs_mountroot(void)
1.58 thorpej 1844: {
1.79 thorpej 1845: struct vfsops *v;
1.239 mycroft 1846: int error = ENODEV;
1.58 thorpej 1847:
1848: if (root_device == NULL)
1849: panic("vfs_mountroot: root device unknown");
1850:
1.264 thorpej 1851: switch (device_class(root_device)) {
1.58 thorpej 1852: case DV_IFNET:
1853: if (rootdev != NODEV)
1.173 thorpej 1854: panic("vfs_mountroot: rootdev set for DV_IFNET "
1855: "(0x%08x -> %d,%d)", rootdev,
1856: major(rootdev), minor(rootdev));
1.58 thorpej 1857: break;
1858:
1859: case DV_DISK:
1860: if (rootdev == NODEV)
1861: panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239 mycroft 1862: if (bdevvp(rootdev, &rootvp))
1863: panic("vfs_mountroot: can't get vnode for rootdev");
1.306 pooka 1864: error = VOP_OPEN(rootvp, FREAD, FSCRED);
1.239 mycroft 1865: if (error) {
1866: printf("vfs_mountroot: can't open root device\n");
1867: return (error);
1868: }
1.58 thorpej 1869: break;
1870:
1871: default:
1872: printf("%s: inappropriate for root file system\n",
1.336 ! cegger 1873: device_xname(root_device));
1.58 thorpej 1874: return (ENODEV);
1875: }
1876:
1877: /*
1878: * If user specified a file system, use it.
1879: */
1.239 mycroft 1880: if (mountroot != NULL) {
1881: error = (*mountroot)();
1882: goto done;
1883: }
1.58 thorpej 1884:
1885: /*
1886: * Try each file system currently configured into the kernel.
1887: */
1.302 ad 1888: mutex_enter(&vfs_list_lock);
1.220 lukem 1889: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 1890: if (v->vfs_mountroot == NULL)
1.58 thorpej 1891: continue;
1892: #ifdef DEBUG
1.197 thorpej 1893: aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 1894: #endif
1.302 ad 1895: v->vfs_refcount++;
1896: mutex_exit(&vfs_list_lock);
1.239 mycroft 1897: error = (*v->vfs_mountroot)();
1.302 ad 1898: mutex_enter(&vfs_list_lock);
1899: v->vfs_refcount--;
1.239 mycroft 1900: if (!error) {
1.197 thorpej 1901: aprint_normal("root file system type: %s\n",
1902: v->vfs_name);
1.79 thorpej 1903: break;
1.58 thorpej 1904: }
1905: }
1.302 ad 1906: mutex_exit(&vfs_list_lock);
1.58 thorpej 1907:
1.79 thorpej 1908: if (v == NULL) {
1.336 ! cegger 1909: printf("no file system for %s", device_xname(root_device));
1.264 thorpej 1910: if (device_class(root_device) == DV_DISK)
1.79 thorpej 1911: printf(" (dev 0x%x)", rootdev);
1912: printf("\n");
1.239 mycroft 1913: error = EFTYPE;
1.79 thorpej 1914: }
1.239 mycroft 1915:
1916: done:
1.264 thorpej 1917: if (error && device_class(root_device) == DV_DISK) {
1.306 pooka 1918: VOP_CLOSE(rootvp, FREAD, FSCRED);
1.239 mycroft 1919: vrele(rootvp);
1920: }
1921: return (error);
1.58 thorpej 1922: }
1.326 ad 1923:
1924: /*
1925: * Sham lock manager for vnodes. This is a temporary measure.
1926: */
1927: int
1928: vlockmgr(struct vnlock *vl, int flags)
1929: {
1930:
1931: KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
1932:
1933: switch (flags & LK_TYPE_MASK) {
1934: case LK_SHARED:
1935: if (rw_tryenter(&vl->vl_lock, RW_READER)) {
1936: return 0;
1937: }
1938: if ((flags & LK_NOWAIT) != 0) {
1.328 ad 1939: return EBUSY;
1.326 ad 1940: }
1941: rw_enter(&vl->vl_lock, RW_READER);
1942: return 0;
1943:
1944: case LK_EXCLUSIVE:
1945: if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
1946: return 0;
1947: }
1948: if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
1949: rw_write_held(&vl->vl_lock)) {
1950: vl->vl_recursecnt++;
1951: return 0;
1952: }
1953: if ((flags & LK_NOWAIT) != 0) {
1.328 ad 1954: return EBUSY;
1.326 ad 1955: }
1956: rw_enter(&vl->vl_lock, RW_WRITER);
1957: return 0;
1958:
1959: case LK_RELEASE:
1960: if (vl->vl_recursecnt != 0) {
1961: KASSERT(rw_write_held(&vl->vl_lock));
1962: vl->vl_recursecnt--;
1963: return 0;
1964: }
1965: rw_exit(&vl->vl_lock);
1966: return 0;
1967:
1968: default:
1969: panic("vlockmgr: flags %x", flags);
1970: }
1971: }
1972:
1973: int
1974: vlockstatus(struct vnlock *vl)
1975: {
1976:
1977: if (rw_write_held(&vl->vl_lock)) {
1978: return LK_EXCLUSIVE;
1979: }
1980: if (rw_read_held(&vl->vl_lock)) {
1981: return LK_SHARED;
1982: }
1983: return 0;
1984: }
CVSweb <webmaster@jp.NetBSD.org>