Annotation of src/sys/kern/vfs_subr.c, Revision 1.301
1.301 ! hannken 1: /* $NetBSD: vfs_subr.c,v 1.300 2007/08/14 13:51:31 pooka Exp $ */
1.74 thorpej 2:
3: /*-
1.243 mycroft 4: * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9: * NASA Ames Research Center.
1.243 mycroft 10: * This code is derived from software contributed to The NetBSD Foundation
11: * by Charles M. Hannum.
1.74 thorpej 12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
21: * 3. All advertising materials mentioning features or use of this software
22: * must display the following acknowledgement:
23: * This product includes software developed by the NetBSD
24: * Foundation, Inc. and its contributors.
25: * 4. Neither the name of The NetBSD Foundation nor the names of its
26: * contributors may be used to endorse or promote products derived
27: * from this software without specific prior written permission.
28: *
29: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
30: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
31: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
33: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39: * POSSIBILITY OF SUCH DAMAGE.
40: */
1.32 cgd 41:
1.29 cgd 42: /*
1.30 mycroft 43: * Copyright (c) 1989, 1993
44: * The Regents of the University of California. All rights reserved.
1.29 cgd 45: * (c) UNIX System Laboratories, Inc.
46: * All or some portions of this file are derived from material licensed
47: * to the University of California by American Telephone and Telegraph
48: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
49: * the permission of UNIX System Laboratories, Inc.
50: *
51: * Redistribution and use in source and binary forms, with or without
52: * modification, are permitted provided that the following conditions
53: * are met:
54: * 1. Redistributions of source code must retain the above copyright
55: * notice, this list of conditions and the following disclaimer.
56: * 2. Redistributions in binary form must reproduce the above copyright
57: * notice, this list of conditions and the following disclaimer in the
58: * documentation and/or other materials provided with the distribution.
1.204 agc 59: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 60: * may be used to endorse or promote products derived from this software
61: * without specific prior written permission.
62: *
63: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73: * SUCH DAMAGE.
74: *
1.32 cgd 75: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 76: */
77:
78: /*
1.296 pooka 79: * External virtual filesystem routines.
80: *
81: * This file contains vfs subroutines which are heavily dependant on
82: * the kernel and are not suitable for standalone use. Examples include
83: * routines involved vnode and mountpoint management.
1.29 cgd 84: */
1.162 lukem 85:
86: #include <sys/cdefs.h>
1.301 ! hannken 87: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.300 2007/08/14 13:51:31 pooka Exp $");
1.78 mrg 88:
1.195 christos 89: #include "opt_inet.h"
1.125 chs 90: #include "opt_ddb.h"
1.95 thorpej 91: #include "opt_compat_netbsd.h"
1.97 christos 92: #include "opt_compat_43.h"
1.29 cgd 93:
94: #include <sys/param.h>
1.30 mycroft 95: #include <sys/systm.h>
1.29 cgd 96: #include <sys/proc.h>
1.138 bouyer 97: #include <sys/kernel.h>
1.29 cgd 98: #include <sys/mount.h>
1.46 mycroft 99: #include <sys/fcntl.h>
1.29 cgd 100: #include <sys/vnode.h>
1.30 mycroft 101: #include <sys/stat.h>
1.29 cgd 102: #include <sys/namei.h>
103: #include <sys/ucred.h>
104: #include <sys/buf.h>
105: #include <sys/errno.h>
106: #include <sys/malloc.h>
1.51 christos 107: #include <sys/syscallargs.h>
1.58 thorpej 108: #include <sys/device.h>
1.192 christos 109: #include <sys/filedesc.h>
1.266 elad 110: #include <sys/kauth.h>
1.50 christos 111:
1.30 mycroft 112: #include <miscfs/specfs/specdev.h>
1.113 fvdl 113: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 114:
1.125 chs 115: #include <uvm/uvm.h>
1.255 yamt 116: #include <uvm/uvm_readahead.h>
1.125 chs 117: #include <uvm/uvm_ddb.h>
1.129 mrg 118:
119: #include <sys/sysctl.h>
1.77 mrg 120:
1.117 fvdl 121: extern int dovfsusermount; /* 1 => permit any user to mount filesystems */
1.263 chs 122: extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */
1.117 fvdl 123:
1.113 fvdl 124: /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */
125: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114 enami 126: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113 fvdl 127:
1.135 sommerfe 128: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
1.79 thorpej 129:
1.223 simonb 130: POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
1.284 ad 131: &pool_allocator_nointr, IPL_NONE);
1.186 thorpej 132:
133: MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
1.93 thorpej 134:
1.89 kleink 135: /*
136: * Local declarations.
137: */
1.276 hannken 138:
1.260 yamt 139: static void insmntque(struct vnode *, struct mount *);
140: static int getdevvp(dev_t, struct vnode **, enum vtype);
141: static void vclean(struct vnode *, int, struct lwp *);
1.256 christos 142: static struct vnode *getcleanvnode(struct lwp *);
1.51 christos 143:
1.202 yamt 144: int
1.256 christos 145: vfs_drainvnodes(long target, struct lwp *l)
1.202 yamt 146: {
147:
148: simple_lock(&vnode_free_list_slock);
149: while (numvnodes > target) {
150: struct vnode *vp;
151:
1.256 christos 152: vp = getcleanvnode(l);
1.202 yamt 153: if (vp == NULL)
154: return EBUSY; /* give up */
155: pool_put(&vnode_pool, vp);
156: simple_lock(&vnode_free_list_slock);
157: numvnodes--;
158: }
159: simple_unlock(&vnode_free_list_slock);
160:
161: return 0;
162: }
163:
164: /*
165: * grab a vnode from freelist and clean it.
166: */
167: struct vnode *
1.256 christos 168: getcleanvnode(struct lwp *l)
1.202 yamt 169: {
170: struct vnode *vp;
171: struct freelst *listhd;
172:
173: LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
1.229 yamt 174:
175: listhd = &vnode_free_list;
176: try_nextlist:
177: TAILQ_FOREACH(vp, listhd, v_freelist) {
1.208 hannken 178: if (!simple_lock_try(&vp->v_interlock))
179: continue;
1.227 yamt 180: /*
181: * as our lwp might hold the underlying vnode locked,
182: * don't try to reclaim the VLAYER vnode if it's locked.
183: */
1.228 yamt 184: if ((vp->v_flag & VXLOCK) == 0 &&
185: ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285 hannken 186: break;
1.202 yamt 187: }
1.208 hannken 188: simple_unlock(&vp->v_interlock);
1.202 yamt 189: }
190:
191: if (vp == NULLVP) {
1.229 yamt 192: if (listhd == &vnode_free_list) {
193: listhd = &vnode_hold_list;
194: goto try_nextlist;
195: }
1.202 yamt 196: simple_unlock(&vnode_free_list_slock);
197: return NULLVP;
198: }
199:
200: if (vp->v_usecount)
201: panic("free vnode isn't, vp %p", vp);
202: TAILQ_REMOVE(listhd, vp, v_freelist);
203: /* see comment on why 0xdeadb is set at end of vgone (below) */
204: vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
205: simple_unlock(&vnode_free_list_slock);
206:
207: if (vp->v_type != VBAD)
1.256 christos 208: vgonel(vp, l);
1.202 yamt 209: else
210: simple_unlock(&vp->v_interlock);
211: #ifdef DIAGNOSTIC
212: if (vp->v_data || vp->v_uobj.uo_npages ||
213: TAILQ_FIRST(&vp->v_uobj.memq))
214: panic("cleaned vnode isn't, vp %p", vp);
215: if (vp->v_numoutput)
216: panic("clean vnode has pending I/O's, vp %p", vp);
217: #endif
218: KASSERT((vp->v_flag & VONWORKLST) == 0);
219:
220: return vp;
221: }
222:
1.29 cgd 223: /*
1.80 fvdl 224: * Mark a mount point as busy. Used to synchronize access and to delay
225: * unmounting. Interlock is not released on failure.
1.29 cgd 226: */
1.50 christos 227: int
1.247 thorpej 228: vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp)
1.29 cgd 229: {
1.80 fvdl 230: int lkflags;
1.29 cgd 231:
1.207 dbj 232: while (mp->mnt_iflag & IMNT_UNMOUNT) {
1.224 pk 233: int gone, n;
1.217 junyoung 234:
1.80 fvdl 235: if (flags & LK_NOWAIT)
236: return (ENOENT);
1.113 fvdl 237: if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
1.256 christos 238: && mp->mnt_unmounter == curlwp)
1.113 fvdl 239: return (EDEADLK);
1.80 fvdl 240: if (interlkp)
241: simple_unlock(interlkp);
242: /*
243: * Since all busy locks are shared except the exclusive
244: * lock granted when unmounting, the only place that a
245: * wakeup needs to be done is at the release of the
246: * exclusive lock at the end of dounmount.
247: */
1.224 pk 248: simple_lock(&mp->mnt_slock);
1.103 sommerfe 249: mp->mnt_wcnt++;
1.283 christos 250: ltsleep((void *)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
1.224 pk 251: n = --mp->mnt_wcnt;
252: simple_unlock(&mp->mnt_slock);
1.207 dbj 253: gone = mp->mnt_iflag & IMNT_GONE;
1.217 junyoung 254:
1.224 pk 255: if (n == 0)
1.103 sommerfe 256: wakeup(&mp->mnt_wcnt);
1.80 fvdl 257: if (interlkp)
258: simple_lock(interlkp);
1.103 sommerfe 259: if (gone)
260: return (ENOENT);
1.80 fvdl 261: }
262: lkflags = LK_SHARED;
263: if (interlkp)
264: lkflags |= LK_INTERLOCK;
265: if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
266: panic("vfs_busy: unexpected lock failure");
1.29 cgd 267: return (0);
268: }
269:
270: /*
1.80 fvdl 271: * Free a busy filesystem.
1.29 cgd 272: */
273: void
1.247 thorpej 274: vfs_unbusy(struct mount *mp)
1.29 cgd 275: {
276:
1.80 fvdl 277: lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29 cgd 278: }
279:
280: /*
1.80 fvdl 281: * Lookup a filesystem type, and if found allocate and initialize
282: * a mount structure for it.
283: *
284: * Devname is usually updated by mount(8) after booting.
1.29 cgd 285: */
1.50 christos 286: int
1.247 thorpej 287: vfs_rootmountalloc(const char *fstypename, const char *devname,
288: struct mount **mpp)
1.29 cgd 289: {
1.80 fvdl 290: struct vfsops *vfsp = NULL;
291: struct mount *mp;
1.29 cgd 292:
1.152 jdolecek 293: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291 christos 294: if (!strncmp(vfsp->vfs_name, fstypename,
295: sizeof(mp->mnt_stat.f_fstypename)))
1.80 fvdl 296: break;
297:
298: if (vfsp == NULL)
299: return (ENODEV);
300: mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91 perry 301: memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80 fvdl 302: lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
1.224 pk 303: simple_lock_init(&mp->mnt_slock);
1.80 fvdl 304: (void)vfs_busy(mp, LK_NOWAIT, 0);
1.272 reinoud 305: TAILQ_INIT(&mp->mnt_vnodelist);
1.80 fvdl 306: mp->mnt_op = vfsp;
307: mp->mnt_flag = MNT_RDONLY;
308: mp->mnt_vnodecovered = NULLVP;
309: vfsp->vfs_refcount++;
1.291 christos 310: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
311: sizeof(mp->mnt_stat.f_fstypename));
1.80 fvdl 312: mp->mnt_stat.f_mntonname[0] = '/';
1.291 christos 313: mp->mnt_stat.f_mntonname[1] = '\0';
314: mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
315: '\0';
316: (void)copystr(devname, mp->mnt_stat.f_mntfromname,
317: sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.276 hannken 318: mount_initspecific(mp);
1.80 fvdl 319: *mpp = mp;
1.29 cgd 320: return (0);
321: }
322:
1.30 mycroft 323:
324: /*
325: * Routines having to do with the management of the vnode table.
326: */
1.217 junyoung 327: extern int (**dead_vnodeop_p)(void *);
1.30 mycroft 328:
1.29 cgd 329: /*
330: * Return the next vnode from the free list.
331: */
1.50 christos 332: int
1.247 thorpej 333: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
334: struct vnode **vpp)
1.29 cgd 335: {
1.142 chs 336: extern struct uvm_pagerops uvm_vnodeops;
337: struct uvm_object *uobj;
1.256 christos 338: struct lwp *l = curlwp; /* XXX */
1.113 fvdl 339: static int toggle;
1.80 fvdl 340: struct vnode *vp;
1.153 thorpej 341: int error = 0, tryalloc;
1.158 chs 342:
1.159 enami 343: try_again:
1.103 sommerfe 344: if (mp) {
345: /*
1.106 sommerfe 346: * Mark filesystem busy while we're creating a vnode.
347: * If unmount is in progress, this will wait; if the
348: * unmount succeeds (only if umount -f), this will
349: * return an error. If the unmount fails, we'll keep
350: * going afterwards.
351: * (This puts the per-mount vnode list logically under
352: * the protection of the vfs_busy lock).
1.103 sommerfe 353: */
1.113 fvdl 354: error = vfs_busy(mp, LK_RECURSEFAIL, 0);
355: if (error && error != EDEADLK)
1.103 sommerfe 356: return error;
357: }
1.29 cgd 358:
1.113 fvdl 359: /*
360: * We must choose whether to allocate a new vnode or recycle an
361: * existing one. The criterion for allocating a new one is that
362: * the total number of vnodes is less than the number desired or
363: * there are no vnodes on either free list. Generally we only
364: * want to recycle vnodes that have no buffers associated with
365: * them, so we look first on the vnode_free_list. If it is empty,
366: * we next consider vnodes with referencing buffers on the
367: * vnode_hold_list. The toggle ensures that half the time we
368: * will use a buffer from the vnode_hold_list, and half the time
369: * we will allocate a new one unless the list has grown to twice
370: * the desired size. We are reticent to recycle vnodes from the
371: * vnode_hold_list because we will lose the identity of all its
372: * referencing buffers.
373: */
1.142 chs 374:
1.153 thorpej 375: vp = NULL;
376:
377: simple_lock(&vnode_free_list_slock);
378:
1.113 fvdl 379: toggle ^= 1;
380: if (numvnodes > 2 * desiredvnodes)
381: toggle = 0;
382:
1.153 thorpej 383: tryalloc = numvnodes < desiredvnodes ||
1.159 enami 384: (TAILQ_FIRST(&vnode_free_list) == NULL &&
385: (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153 thorpej 386:
387: if (tryalloc &&
388: (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.206 yamt 389: numvnodes++;
1.80 fvdl 390: simple_unlock(&vnode_free_list_slock);
1.142 chs 391: memset(vp, 0, sizeof(*vp));
1.248 yamt 392: UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1);
1.225 yamt 393: /*
394: * done by memset() above.
395: * LIST_INIT(&vp->v_nclist);
396: * LIST_INIT(&vp->v_dnclist);
397: */
1.29 cgd 398: } else {
1.256 christos 399: vp = getcleanvnode(l);
1.80 fvdl 400: /*
401: * Unless this is a bad time of the month, at most
402: * the first NCPUS items on the free list are
403: * locked, so this is close enough to being empty.
404: */
405: if (vp == NULLVP) {
1.113 fvdl 406: if (mp && error != EDEADLK)
407: vfs_unbusy(mp);
1.153 thorpej 408: if (tryalloc) {
409: printf("WARNING: unable to allocate new "
410: "vnode, retrying...\n");
411: (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
412: goto try_again;
413: }
1.132 jdolecek 414: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 415: *vpp = 0;
416: return (ENFILE);
417: }
1.248 yamt 418: vp->v_usecount = 1;
1.29 cgd 419: vp->v_flag = 0;
1.158 chs 420: vp->v_socket = NULL;
1.29 cgd 421: }
422: vp->v_type = VNON;
1.104 wrstuden 423: vp->v_vnlock = &vp->v_lock;
424: lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.225 yamt 425: KASSERT(LIST_EMPTY(&vp->v_nclist));
426: KASSERT(LIST_EMPTY(&vp->v_dnclist));
1.29 cgd 427: vp->v_tag = tag;
428: vp->v_op = vops;
429: insmntque(vp, mp);
1.30 mycroft 430: *vpp = vp;
431: vp->v_data = 0;
1.240 christos 432: simple_lock_init(&vp->v_interlock);
1.142 chs 433:
434: /*
435: * initialize uvm_object within vnode.
436: */
437:
1.158 chs 438: uobj = &vp->v_uobj;
439: KASSERT(uobj->pgops == &uvm_vnodeops);
440: KASSERT(uobj->uo_npages == 0);
441: KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288 yamt 442: vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142 chs 443:
1.113 fvdl 444: if (mp && error != EDEADLK)
445: vfs_unbusy(mp);
1.29 cgd 446: return (0);
1.130 fvdl 447: }
448:
449: /*
450: * This is really just the reverse of getnewvnode(). Needed for
451: * VFS_VGET functions who may need to push back a vnode in case
452: * of a locking race.
453: */
454: void
1.247 thorpej 455: ungetnewvnode(struct vnode *vp)
1.130 fvdl 456: {
457: #ifdef DIAGNOSTIC
458: if (vp->v_usecount != 1)
1.131 fvdl 459: panic("ungetnewvnode: busy vnode");
1.130 fvdl 460: #endif
461: vp->v_usecount--;
462: insmntque(vp, NULL);
463: vp->v_type = VBAD;
464:
465: simple_lock(&vp->v_interlock);
1.217 junyoung 466: /*
1.130 fvdl 467: * Insert at head of LRU list
468: */
469: simple_lock(&vnode_free_list_slock);
470: if (vp->v_holdcnt > 0)
471: TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
472: else
473: TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1.217 junyoung 474: simple_unlock(&vnode_free_list_slock);
1.130 fvdl 475: simple_unlock(&vp->v_interlock);
1.29 cgd 476: }
477:
478: /*
479: * Move a vnode from one mount queue to another.
480: */
1.260 yamt 481: static void
1.247 thorpej 482: insmntque(struct vnode *vp, struct mount *mp)
1.29 cgd 483: {
484:
1.103 sommerfe 485: #ifdef DIAGNOSTIC
486: if ((mp != NULL) &&
1.207 dbj 487: (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113 fvdl 488: !(mp->mnt_flag & MNT_SOFTDEP) &&
489: vp->v_tag != VT_VFS) {
1.103 sommerfe 490: panic("insmntque into dying filesystem");
491: }
492: #endif
1.217 junyoung 493:
1.80 fvdl 494: simple_lock(&mntvnode_slock);
1.29 cgd 495: /*
496: * Delete from old mount point vnode list, if on one.
497: */
498: if (vp->v_mount != NULL)
1.272 reinoud 499: TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29 cgd 500: /*
501: * Insert into list of vnodes for the new mount point, if available.
502: */
1.279 pooka 503: if ((vp->v_mount = mp) != NULL)
504: TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.80 fvdl 505: simple_unlock(&mntvnode_slock);
1.29 cgd 506: }
507:
508: /*
509: * Create a vnode for a block device.
1.59 thorpej 510: * Used for root filesystem and swap areas.
1.29 cgd 511: * Also used for memory file system special devices.
512: */
1.50 christos 513: int
1.247 thorpej 514: bdevvp(dev_t dev, struct vnode **vpp)
1.29 cgd 515: {
1.30 mycroft 516:
517: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 518: }
519:
520: /*
521: * Create a vnode for a character device.
522: * Used for kernfs and some console handling.
523: */
1.50 christos 524: int
1.247 thorpej 525: cdevvp(dev_t dev, struct vnode **vpp)
1.29 cgd 526: {
1.30 mycroft 527:
528: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 529: }
530:
531: /*
532: * Create a vnode for a device.
533: * Used by bdevvp (block device) for root file system etc.,
534: * and by cdevvp (character device) for console and kernfs.
535: */
1.260 yamt 536: static int
1.247 thorpej 537: getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
1.29 cgd 538: {
1.123 augustss 539: struct vnode *vp;
1.29 cgd 540: struct vnode *nvp;
541: int error;
542:
1.80 fvdl 543: if (dev == NODEV) {
544: *vpp = NULLVP;
1.29 cgd 545: return (0);
1.80 fvdl 546: }
1.50 christos 547: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 548: if (error) {
549: *vpp = NULLVP;
550: return (error);
551: }
552: vp = nvp;
553: vp->v_type = type;
1.297 pooka 554: uvm_vnp_setsize(vp, 0);
1.50 christos 555: if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29 cgd 556: vput(vp);
557: vp = nvp;
558: }
559: *vpp = vp;
560: return (0);
561: }
562:
563: /*
564: * Check to see if the new vnode represents a special device
565: * for which we already have a vnode (either because of
566: * bdevvp() or because of a different vnode representing
567: * the same block device). If such an alias exists, deallocate
568: * the existing contents and return the aliased vnode. The
569: * caller is responsible for filling it with its new contents.
570: */
571: struct vnode *
1.247 thorpej 572: checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
1.29 cgd 573: {
1.256 christos 574: struct lwp *l = curlwp; /* XXX */
1.123 augustss 575: struct vnode *vp;
1.29 cgd 576: struct vnode **vpp;
577:
578: if (nvp->v_type != VBLK && nvp->v_type != VCHR)
579: return (NULLVP);
580:
581: vpp = &speclisth[SPECHASH(nvp_rdev)];
582: loop:
1.80 fvdl 583: simple_lock(&spechash_slock);
1.29 cgd 584: for (vp = *vpp; vp; vp = vp->v_specnext) {
585: if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
586: continue;
587: /*
588: * Alias, but not in use, so flush it out.
589: */
1.80 fvdl 590: simple_lock(&vp->v_interlock);
1.231 mycroft 591: simple_unlock(&spechash_slock);
1.29 cgd 592: if (vp->v_usecount == 0) {
1.256 christos 593: vgonel(vp, l);
1.29 cgd 594: goto loop;
595: }
1.231 mycroft 596: /*
597: * What we're interested to know here is if someone else has
598: * removed this vnode from the device hash list while we were
599: * waiting. This can only happen if vclean() did it, and
1.259 yamt 600: * this requires the vnode to be locked.
1.231 mycroft 601: */
1.259 yamt 602: if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
1.29 cgd 603: goto loop;
1.259 yamt 604: if (vp->v_specinfo == NULL) {
605: vput(vp);
606: goto loop;
607: }
1.231 mycroft 608: simple_lock(&spechash_slock);
1.29 cgd 609: break;
610: }
1.34 cgd 611: if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29 cgd 612: MALLOC(nvp->v_specinfo, struct specinfo *,
1.150 thorpej 613: sizeof(struct specinfo), M_VNODE, M_NOWAIT);
614: /* XXX Erg. */
615: if (nvp->v_specinfo == NULL) {
616: simple_unlock(&spechash_slock);
617: uvm_wait("checkalias");
618: goto loop;
619: }
620:
1.29 cgd 621: nvp->v_rdev = nvp_rdev;
622: nvp->v_hashchain = vpp;
623: nvp->v_specnext = *vpp;
1.113 fvdl 624: nvp->v_specmountpoint = NULL;
1.80 fvdl 625: simple_unlock(&spechash_slock);
1.62 kleink 626: nvp->v_speclockf = NULL;
1.216 hannken 627:
1.29 cgd 628: *vpp = nvp;
1.80 fvdl 629: if (vp != NULLVP) {
1.29 cgd 630: nvp->v_flag |= VALIASED;
631: vp->v_flag |= VALIASED;
632: vput(vp);
633: }
634: return (NULLVP);
635: }
1.80 fvdl 636: simple_unlock(&spechash_slock);
637: VOP_UNLOCK(vp, 0);
638: simple_lock(&vp->v_interlock);
1.256 christos 639: vclean(vp, 0, l);
1.29 cgd 640: vp->v_op = nvp->v_op;
641: vp->v_tag = nvp->v_tag;
1.104 wrstuden 642: vp->v_vnlock = &vp->v_lock;
643: lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29 cgd 644: nvp->v_type = VNON;
645: insmntque(vp, mp);
646: return (vp);
647: }
648:
649: /*
650: * Grab a particular vnode from the free list, increment its
1.83 fvdl 651: * reference count and lock it. If the vnode lock bit is set the
652: * vnode is being eliminated in vgone. In that case, we can not
653: * grab the vnode, so the process is awakened when the transition is
654: * completed, and an error returned to indicate that the vnode is no
655: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 656: */
1.30 mycroft 657: int
1.247 thorpej 658: vget(struct vnode *vp, int flags)
1.29 cgd 659: {
1.175 perseant 660: int error;
1.29 cgd 661:
1.30 mycroft 662: /*
663: * If the vnode is in the process of being cleaned out for
664: * another use, we wait for the cleaning to finish and then
1.80 fvdl 665: * return failure. Cleaning is determined by checking that
666: * the VXLOCK flag is set.
667: */
1.142 chs 668:
1.80 fvdl 669: if ((flags & LK_INTERLOCK) == 0)
670: simple_lock(&vp->v_interlock);
1.257 yamt 671: if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) {
1.142 chs 672: if (flags & LK_NOWAIT) {
1.143 sommerfe 673: simple_unlock(&vp->v_interlock);
1.142 chs 674: return EBUSY;
675: }
1.29 cgd 676: vp->v_flag |= VXWANT;
1.158 chs 677: ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1.80 fvdl 678: return (ENOENT);
1.29 cgd 679: }
1.80 fvdl 680: if (vp->v_usecount == 0) {
681: simple_lock(&vnode_free_list_slock);
1.113 fvdl 682: if (vp->v_holdcnt > 0)
683: TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
684: else
685: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80 fvdl 686: simple_unlock(&vnode_free_list_slock);
687: }
1.29 cgd 688: vp->v_usecount++;
1.112 mycroft 689: #ifdef DIAGNOSTIC
690: if (vp->v_usecount == 0) {
691: vprint("vget", vp);
1.125 chs 692: panic("vget: usecount overflow, vp %p", vp);
1.112 mycroft 693: }
694: #endif
1.80 fvdl 695: if (flags & LK_TYPE_MASK) {
1.113 fvdl 696: if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1.257 yamt 697: vrele(vp);
1.113 fvdl 698: }
1.80 fvdl 699: return (error);
700: }
701: simple_unlock(&vp->v_interlock);
1.29 cgd 702: return (0);
703: }
704:
705: /*
706: * vput(), just unlock and vrele()
707: */
708: void
1.247 thorpej 709: vput(struct vnode *vp)
1.29 cgd 710: {
1.256 christos 711: struct lwp *l = curlwp; /* XXX */
1.30 mycroft 712:
1.111 mycroft 713: #ifdef DIAGNOSTIC
1.80 fvdl 714: if (vp == NULL)
715: panic("vput: null vp");
716: #endif
717: simple_lock(&vp->v_interlock);
718: vp->v_usecount--;
719: if (vp->v_usecount > 0) {
720: simple_unlock(&vp->v_interlock);
721: VOP_UNLOCK(vp, 0);
722: return;
723: }
724: #ifdef DIAGNOSTIC
725: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
726: vprint("vput: bad ref count", vp);
727: panic("vput: ref cnt");
728: }
729: #endif
730: /*
1.87 pk 731: * Insert at tail of LRU list.
1.80 fvdl 732: */
733: simple_lock(&vnode_free_list_slock);
1.113 fvdl 734: if (vp->v_holdcnt > 0)
735: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
736: else
737: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80 fvdl 738: simple_unlock(&vnode_free_list_slock);
1.161 thorpej 739: if (vp->v_flag & VEXECMAP) {
1.167 chs 740: uvmexp.execpages -= vp->v_uobj.uo_npages;
741: uvmexp.filepages += vp->v_uobj.uo_npages;
1.147 chs 742: }
1.270 chs 743: vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED);
1.80 fvdl 744: simple_unlock(&vp->v_interlock);
1.256 christos 745: VOP_INACTIVE(vp, l);
1.29 cgd 746: }
747:
748: /*
749: * Vnode release.
750: * If count drops to zero, call inactive routine and return to freelist.
751: */
1.298 pooka 752: static void
753: do_vrele(struct vnode *vp, int doinactive, int onhead)
1.29 cgd 754: {
1.256 christos 755: struct lwp *l = curlwp; /* XXX */
1.29 cgd 756:
757: #ifdef DIAGNOSTIC
758: if (vp == NULL)
759: panic("vrele: null vp");
760: #endif
1.80 fvdl 761: simple_lock(&vp->v_interlock);
1.29 cgd 762: vp->v_usecount--;
1.80 fvdl 763: if (vp->v_usecount > 0) {
764: simple_unlock(&vp->v_interlock);
1.29 cgd 765: return;
1.80 fvdl 766: }
1.29 cgd 767: #ifdef DIAGNOSTIC
1.80 fvdl 768: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29 cgd 769: vprint("vrele: bad ref count", vp);
1.142 chs 770: panic("vrele: ref cnt vp %p", vp);
1.29 cgd 771: }
772: #endif
1.30 mycroft 773: /*
1.87 pk 774: * Insert at tail of LRU list.
1.30 mycroft 775: */
1.80 fvdl 776: simple_lock(&vnode_free_list_slock);
1.298 pooka 777: if (vp->v_holdcnt > 0) {
1.113 fvdl 778: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1.298 pooka 779: } else {
780: if (onhead)
781: TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
782: else
783: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
784: }
1.80 fvdl 785: simple_unlock(&vnode_free_list_slock);
1.161 thorpej 786: if (vp->v_flag & VEXECMAP) {
1.167 chs 787: uvmexp.execpages -= vp->v_uobj.uo_npages;
788: uvmexp.filepages += vp->v_uobj.uo_npages;
1.147 chs 789: }
1.270 chs 790: vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED);
1.298 pooka 791:
792: if (doinactive) {
793: if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
794: VOP_INACTIVE(vp, l);
795: } else {
796: simple_unlock(&vp->v_interlock);
797: }
798: }
799:
800: void
801: vrele(struct vnode *vp)
802: {
803:
804: do_vrele(vp, 1, 0);
805: }
806:
807: void
808: vrele2(struct vnode *vp, int onhead)
809: {
810:
811: do_vrele(vp, 0, onhead);
1.29 cgd 812: }
813:
814: /*
815: * Page or buffer structure gets a reference.
1.258 chs 816: * Called with v_interlock held.
1.29 cgd 817: */
1.30 mycroft 818: void
1.247 thorpej 819: vholdl(struct vnode *vp)
1.29 cgd 820: {
821:
1.113 fvdl 822: /*
823: * If it is on the freelist and the hold count is currently
824: * zero, move it to the hold list. The test of the back
825: * pointer and the use reference count of zero is because
826: * it will be removed from a free list by getnewvnode,
827: * but will not have its reference count incremented until
828: * after calling vgone. If the reference count were
829: * incremented first, vgone would (incorrectly) try to
830: * close the previous instance of the underlying object.
831: * So, the back pointer is explicitly set to `0xdeadb' in
832: * getnewvnode after removing it from a freelist to ensure
833: * that we do not try to move it here.
834: */
835: if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
836: vp->v_holdcnt == 0 && vp->v_usecount == 0) {
837: simple_lock(&vnode_free_list_slock);
838: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
839: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
840: simple_unlock(&vnode_free_list_slock);
841: }
1.29 cgd 842: vp->v_holdcnt++;
843: }
844:
845: /*
846: * Page or buffer structure frees a reference.
1.258 chs 847: * Called with v_interlock held.
1.29 cgd 848: */
1.30 mycroft 849: void
1.247 thorpej 850: holdrelel(struct vnode *vp)
1.29 cgd 851: {
852:
853: if (vp->v_holdcnt <= 0)
1.215 yamt 854: panic("holdrelel: holdcnt vp %p", vp);
1.29 cgd 855: vp->v_holdcnt--;
1.142 chs 856:
1.113 fvdl 857: /*
858: * If it is on the holdlist and the hold count drops to
859: * zero, move it to the free list. The test of the back
860: * pointer and the use reference count of zero is because
861: * it will be removed from a free list by getnewvnode,
862: * but will not have its reference count incremented until
863: * after calling vgone. If the reference count were
864: * incremented first, vgone would (incorrectly) try to
865: * close the previous instance of the underlying object.
866: * So, the back pointer is explicitly set to `0xdeadb' in
867: * getnewvnode after removing it from a freelist to ensure
868: * that we do not try to move it here.
869: */
1.142 chs 870:
1.113 fvdl 871: if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
872: vp->v_holdcnt == 0 && vp->v_usecount == 0) {
873: simple_lock(&vnode_free_list_slock);
874: TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
875: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
876: simple_unlock(&vnode_free_list_slock);
877: }
1.81 ross 878: }
879:
880: /*
881: * Vnode reference.
882: */
883: void
1.247 thorpej 884: vref(struct vnode *vp)
1.81 ross 885: {
886:
887: simple_lock(&vp->v_interlock);
888: if (vp->v_usecount <= 0)
1.125 chs 889: panic("vref used where vget required, vp %p", vp);
1.81 ross 890: vp->v_usecount++;
1.112 mycroft 891: #ifdef DIAGNOSTIC
892: if (vp->v_usecount == 0) {
893: vprint("vref", vp);
1.125 chs 894: panic("vref: usecount overflow, vp %p", vp);
1.112 mycroft 895: }
896: #endif
1.80 fvdl 897: simple_unlock(&vp->v_interlock);
1.29 cgd 898: }
899:
900: /*
901: * Remove any vnodes in the vnode table belonging to mount point mp.
902: *
1.183 yamt 903: * If FORCECLOSE is not specified, there should not be any active ones,
1.29 cgd 904: * return error if any are found (nb: this is a user error, not a
1.183 yamt 905: * system error). If FORCECLOSE is specified, detach any active vnodes
1.29 cgd 906: * that are found.
1.183 yamt 907: *
908: * If WRITECLOSE is set, only flush out regular file vnodes open for
909: * writing.
910: *
911: * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29 cgd 912: */
1.30 mycroft 913: #ifdef DEBUG
914: int busyprt = 0; /* print out busy vnodes */
915: struct ctldebug debug1 = { "busyprt", &busyprt };
916: #endif
1.29 cgd 917:
1.50 christos 918: int
1.247 thorpej 919: vflush(struct mount *mp, struct vnode *skipvp, int flags)
1.29 cgd 920: {
1.256 christos 921: struct lwp *l = curlwp; /* XXX */
1.273 reinoud 922: struct vnode *vp, *nvp;
1.29 cgd 923: int busy = 0;
924:
1.80 fvdl 925: simple_lock(&mntvnode_slock);
1.29 cgd 926: loop:
1.273 reinoud 927: /*
928: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
929: * and vclean() are called
930: */
931: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1.29 cgd 932: if (vp->v_mount != mp)
933: goto loop;
1.273 reinoud 934: nvp = TAILQ_NEXT(vp, v_mntvnodes);
1.29 cgd 935: /*
936: * Skip over a selected vnode.
937: */
938: if (vp == skipvp)
939: continue;
1.80 fvdl 940: simple_lock(&vp->v_interlock);
1.29 cgd 941: /*
942: * Skip over a vnodes marked VSYSTEM.
943: */
1.80 fvdl 944: if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
945: simple_unlock(&vp->v_interlock);
1.29 cgd 946: continue;
1.80 fvdl 947: }
1.29 cgd 948: /*
1.30 mycroft 949: * If WRITECLOSE is set, only flush out regular file
950: * vnodes open for writing.
951: */
952: if ((flags & WRITECLOSE) &&
1.92 thorpej 953: (vp->v_writecount == 0 || vp->v_type != VREG)) {
954: simple_unlock(&vp->v_interlock);
1.30 mycroft 955: continue;
1.92 thorpej 956: }
1.30 mycroft 957: /*
1.29 cgd 958: * With v_usecount == 0, all we need to do is clear
959: * out the vnode data structures and we are done.
960: */
961: if (vp->v_usecount == 0) {
1.80 fvdl 962: simple_unlock(&mntvnode_slock);
1.256 christos 963: vgonel(vp, l);
1.80 fvdl 964: simple_lock(&mntvnode_slock);
1.29 cgd 965: continue;
966: }
967: /*
1.30 mycroft 968: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 969: * For block or character devices, revert to an
970: * anonymous device. For all other files, just kill them.
971: */
972: if (flags & FORCECLOSE) {
1.80 fvdl 973: simple_unlock(&mntvnode_slock);
1.29 cgd 974: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.256 christos 975: vgonel(vp, l);
1.29 cgd 976: } else {
1.256 christos 977: vclean(vp, 0, l);
1.30 mycroft 978: vp->v_op = spec_vnodeop_p;
1.29 cgd 979: insmntque(vp, (struct mount *)0);
980: }
1.80 fvdl 981: simple_lock(&mntvnode_slock);
1.29 cgd 982: continue;
983: }
1.30 mycroft 984: #ifdef DEBUG
1.29 cgd 985: if (busyprt)
986: vprint("vflush: busy vnode", vp);
1.30 mycroft 987: #endif
1.80 fvdl 988: simple_unlock(&vp->v_interlock);
1.29 cgd 989: busy++;
990: }
1.80 fvdl 991: simple_unlock(&mntvnode_slock);
1.29 cgd 992: if (busy)
993: return (EBUSY);
994: return (0);
995: }
996:
997: /*
998: * Disassociate the underlying file system from a vnode.
999: */
1.260 yamt 1000: static void
1.256 christos 1001: vclean(struct vnode *vp, int flags, struct lwp *l)
1.29 cgd 1002: {
1.175 perseant 1003: int active;
1.29 cgd 1004:
1.166 chs 1005: LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1006:
1.29 cgd 1007: /*
1008: * Check to see if the vnode is in use.
1009: * If so we have to reference it before we clean it out
1010: * so that its count cannot fall to zero and generate a
1011: * race against ourselves to recycle it.
1012: */
1.166 chs 1013:
1.112 mycroft 1014: if ((active = vp->v_usecount) != 0) {
1.87 pk 1015: vp->v_usecount++;
1.112 mycroft 1016: #ifdef DIAGNOSTIC
1017: if (vp->v_usecount == 0) {
1018: vprint("vclean", vp);
1019: panic("vclean: usecount overflow");
1020: }
1021: #endif
1022: }
1.87 pk 1023:
1.29 cgd 1024: /*
1025: * Prevent the vnode from being recycled or
1026: * brought into use while we clean it out.
1027: */
1028: if (vp->v_flag & VXLOCK)
1.125 chs 1029: panic("vclean: deadlock, vp %p", vp);
1.29 cgd 1030: vp->v_flag |= VXLOCK;
1.161 thorpej 1031: if (vp->v_flag & VEXECMAP) {
1.167 chs 1032: uvmexp.execpages -= vp->v_uobj.uo_npages;
1033: uvmexp.filepages += vp->v_uobj.uo_npages;
1.147 chs 1034: }
1.161 thorpej 1035: vp->v_flag &= ~(VTEXT|VEXECMAP);
1.142 chs 1036:
1.29 cgd 1037: /*
1.80 fvdl 1038: * Even if the count is zero, the VOP_INACTIVE routine may still
1.289 pooka 1039: * have the object locked while it cleans it out. For
1040: * active vnodes, it ensures that no other activity can
1.80 fvdl 1041: * occur while the underlying object is being cleaned out.
1.289 pooka 1042: *
1.295 pooka 1043: * We drain the lock to make sure we are the last one trying to
1044: * get it and immediately resurrect the lock. Future accesses
1045: * for locking this _vnode_ will be protected by VXLOCK. However,
1046: * upper layers might be using the _lock_ in case the file system
1047: * exported it and might access it while the vnode lingers in
1048: * deadfs.
1.80 fvdl 1049: */
1.295 pooka 1050: VOP_LOCK(vp, LK_DRAIN | LK_RESURRECT | LK_INTERLOCK);
1.80 fvdl 1051:
1.98 wrstuden 1052: /*
1.142 chs 1053: * Clean out any cached data associated with the vnode.
1.231 mycroft 1054: * If special device, remove it from special device alias list.
1055: * if it is on one.
1.29 cgd 1056: */
1.166 chs 1057: if (flags & DOCLOSE) {
1.211 dbj 1058: int error;
1.231 mycroft 1059: struct vnode *vq, *vx;
1060:
1.256 christos 1061: error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.211 dbj 1062: if (error)
1.256 christos 1063: error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.211 dbj 1064: KASSERT(error == 0);
1.166 chs 1065: KASSERT((vp->v_flag & VONWORKLST) == 0);
1.231 mycroft 1066:
1067: if (active)
1068: VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1069:
1070: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1071: vp->v_specinfo != 0) {
1072: simple_lock(&spechash_slock);
1073: if (vp->v_hashchain != NULL) {
1074: if (*vp->v_hashchain == vp) {
1075: *vp->v_hashchain = vp->v_specnext;
1076: } else {
1077: for (vq = *vp->v_hashchain; vq;
1078: vq = vq->v_specnext) {
1079: if (vq->v_specnext != vp)
1080: continue;
1081: vq->v_specnext = vp->v_specnext;
1082: break;
1083: }
1084: if (vq == NULL)
1085: panic("missing bdev");
1086: }
1087: if (vp->v_flag & VALIASED) {
1088: vx = NULL;
1089: for (vq = *vp->v_hashchain; vq;
1090: vq = vq->v_specnext) {
1091: if (vq->v_rdev != vp->v_rdev ||
1092: vq->v_type != vp->v_type)
1093: continue;
1094: if (vx)
1095: break;
1096: vx = vq;
1097: }
1098: if (vx == NULL)
1099: panic("missing alias");
1100: if (vq == NULL)
1101: vx->v_flag &= ~VALIASED;
1102: vp->v_flag &= ~VALIASED;
1103: }
1104: }
1105: simple_unlock(&spechash_slock);
1106: FREE(vp->v_specinfo, M_VNODE);
1107: vp->v_specinfo = NULL;
1108: }
1.166 chs 1109: }
1110: LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1.80 fvdl 1111:
1.29 cgd 1112: /*
1.30 mycroft 1113: * If purging an active vnode, it must be closed and
1.80 fvdl 1114: * deactivated before being reclaimed. Note that the
1115: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1116: */
1117: if (active) {
1.256 christos 1118: VOP_INACTIVE(vp, l);
1.80 fvdl 1119: } else {
1120: /*
1121: * Any other processes trying to obtain this lock must first
1122: * wait for VXLOCK to clear, then call the new lock operation.
1123: */
1124: VOP_UNLOCK(vp, 0);
1.29 cgd 1125: }
1126: /*
1127: * Reclaim the vnode.
1128: */
1.256 christos 1129: if (VOP_RECLAIM(vp, l))
1.125 chs 1130: panic("vclean: cannot reclaim, vp %p", vp);
1.87 pk 1131: if (active) {
1132: /*
1133: * Inline copy of vrele() since VOP_INACTIVE
1134: * has already been called.
1135: */
1136: simple_lock(&vp->v_interlock);
1137: if (--vp->v_usecount <= 0) {
1138: #ifdef DIAGNOSTIC
1139: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1140: vprint("vclean: bad ref count", vp);
1141: panic("vclean: ref cnt");
1142: }
1143: #endif
1144: /*
1145: * Insert at tail of LRU list.
1146: */
1.142 chs 1147:
1.113 fvdl 1148: simple_unlock(&vp->v_interlock);
1.87 pk 1149: simple_lock(&vnode_free_list_slock);
1.104 wrstuden 1150: #ifdef DIAGNOSTIC
1.113 fvdl 1151: if (vp->v_holdcnt > 0)
1.125 chs 1152: panic("vclean: not clean, vp %p", vp);
1.104 wrstuden 1153: #endif
1.87 pk 1154: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1155: simple_unlock(&vnode_free_list_slock);
1.113 fvdl 1156: } else
1157: simple_unlock(&vp->v_interlock);
1.87 pk 1158: }
1.30 mycroft 1159:
1.169 chs 1160: KASSERT(vp->v_uobj.uo_npages == 0);
1.255 yamt 1161: if (vp->v_type == VREG && vp->v_ractx != NULL) {
1162: uvm_ra_freectx(vp->v_ractx);
1163: vp->v_ractx = NULL;
1164: }
1.80 fvdl 1165: cache_purge(vp);
1166:
1.29 cgd 1167: /*
1.30 mycroft 1168: * Done with purge, notify sleepers of the grim news.
1.29 cgd 1169: */
1.30 mycroft 1170: vp->v_op = dead_vnodeop_p;
1171: vp->v_tag = VT_NON;
1.289 pooka 1172: vp->v_vnlock = NULL;
1.139 enami 1173: simple_lock(&vp->v_interlock);
1.181 jdolecek 1174: VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */
1.234 thorpej 1175: vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1.29 cgd 1176: if (vp->v_flag & VXWANT) {
1177: vp->v_flag &= ~VXWANT;
1.139 enami 1178: simple_unlock(&vp->v_interlock);
1.283 christos 1179: wakeup((void *)vp);
1.139 enami 1180: } else
1181: simple_unlock(&vp->v_interlock);
1.29 cgd 1182: }
1183:
1184: /*
1.80 fvdl 1185: * Recycle an unused vnode to the front of the free list.
1186: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1187: */
1.80 fvdl 1188: int
1.256 christos 1189: vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l)
1.217 junyoung 1190: {
1191:
1.80 fvdl 1192: simple_lock(&vp->v_interlock);
1193: if (vp->v_usecount == 0) {
1194: if (inter_lkp)
1195: simple_unlock(inter_lkp);
1.256 christos 1196: vgonel(vp, l);
1.80 fvdl 1197: return (1);
1.29 cgd 1198: }
1.80 fvdl 1199: simple_unlock(&vp->v_interlock);
1200: return (0);
1.29 cgd 1201: }
1202:
1203: /*
1204: * Eliminate all activity associated with a vnode
1205: * in preparation for reuse.
1206: */
1207: void
1.247 thorpej 1208: vgone(struct vnode *vp)
1.80 fvdl 1209: {
1.256 christos 1210: struct lwp *l = curlwp; /* XXX */
1.80 fvdl 1211:
1212: simple_lock(&vp->v_interlock);
1.256 christos 1213: vgonel(vp, l);
1.80 fvdl 1214: }
1215:
1216: /*
1217: * vgone, with the vp interlock held.
1218: */
1219: void
1.256 christos 1220: vgonel(struct vnode *vp, struct lwp *l)
1.29 cgd 1221: {
1222:
1.166 chs 1223: LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1224:
1.29 cgd 1225: /*
1226: * If a vgone (or vclean) is already in progress,
1227: * wait until it is done and return.
1228: */
1.166 chs 1229:
1.29 cgd 1230: if (vp->v_flag & VXLOCK) {
1231: vp->v_flag |= VXWANT;
1.166 chs 1232: ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1.29 cgd 1233: return;
1234: }
1.166 chs 1235:
1.29 cgd 1236: /*
1237: * Clean out the filesystem specific data.
1238: */
1.166 chs 1239:
1.256 christos 1240: vclean(vp, DOCLOSE, l);
1.166 chs 1241: KASSERT((vp->v_flag & VONWORKLST) == 0);
1242:
1.29 cgd 1243: /*
1244: * Delete from old mount point vnode list, if on one.
1245: */
1.166 chs 1246:
1.80 fvdl 1247: if (vp->v_mount != NULL)
1248: insmntque(vp, (struct mount *)0);
1.166 chs 1249:
1.29 cgd 1250: /*
1.202 yamt 1251: * The test of the back pointer and the reference count of
1252: * zero is because it will be removed from the free list by
1253: * getcleanvnode, but will not have its reference count
1254: * incremented until after calling vgone. If the reference
1255: * count were incremented first, vgone would (incorrectly)
1256: * try to close the previous instance of the underlying object.
1.30 mycroft 1257: * So, the back pointer is explicitly set to `0xdeadb' in
1258: * getnewvnode after removing it from the freelist to ensure
1259: * that we do not try to move it here.
1.29 cgd 1260: */
1.166 chs 1261:
1.202 yamt 1262: vp->v_type = VBAD;
1.80 fvdl 1263: if (vp->v_usecount == 0) {
1.282 thorpej 1264: bool dofree;
1.202 yamt 1265:
1.80 fvdl 1266: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1267: if (vp->v_holdcnt > 0)
1.125 chs 1268: panic("vgonel: not clean, vp %p", vp);
1.202 yamt 1269: /*
1270: * if it isn't on the freelist, we're called by getcleanvnode
1271: * and vnode is being re-used. otherwise, we'll free it.
1272: */
1273: dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
1274: if (dofree) {
1.80 fvdl 1275: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.202 yamt 1276: numvnodes--;
1.80 fvdl 1277: }
1278: simple_unlock(&vnode_free_list_slock);
1.202 yamt 1279: if (dofree)
1280: pool_put(&vnode_pool, vp);
1.29 cgd 1281: }
1282: }
1283:
1284: /*
1285: * Lookup a vnode by device number.
1286: */
1.50 christos 1287: int
1.247 thorpej 1288: vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1.29 cgd 1289: {
1.80 fvdl 1290: struct vnode *vp;
1291: int rc = 0;
1.29 cgd 1292:
1.80 fvdl 1293: simple_lock(&spechash_slock);
1.29 cgd 1294: for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1295: if (dev != vp->v_rdev || type != vp->v_type)
1296: continue;
1297: *vpp = vp;
1.80 fvdl 1298: rc = 1;
1299: break;
1.29 cgd 1300: }
1.80 fvdl 1301: simple_unlock(&spechash_slock);
1302: return (rc);
1.96 thorpej 1303: }
1304:
1305: /*
1306: * Revoke all the vnodes corresponding to the specified minor number
1307: * range (endpoints inclusive) of the specified major.
1308: */
1309: void
1.247 thorpej 1310: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 1311: {
1312: struct vnode *vp;
1313: int mn;
1314:
1.274 mrg 1315: vp = NULL; /* XXX gcc */
1316:
1.96 thorpej 1317: for (mn = minl; mn <= minh; mn++)
1318: if (vfinddev(makedev(maj, mn), type, &vp))
1319: VOP_REVOKE(vp, REVOKEALL);
1.29 cgd 1320: }
1321:
1322: /*
1323: * Calculate the total number of references to a special device.
1324: */
1.30 mycroft 1325: int
1.247 thorpej 1326: vcount(struct vnode *vp)
1.29 cgd 1327: {
1.123 augustss 1328: struct vnode *vq, *vnext;
1.29 cgd 1329: int count;
1330:
1331: loop:
1332: if ((vp->v_flag & VALIASED) == 0)
1333: return (vp->v_usecount);
1.80 fvdl 1334: simple_lock(&spechash_slock);
1.30 mycroft 1335: for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1336: vnext = vq->v_specnext;
1.29 cgd 1337: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1338: continue;
1339: /*
1340: * Alias, but not in use, so flush it out.
1341: */
1.151 wrstuden 1342: if (vq->v_usecount == 0 && vq != vp &&
1343: (vq->v_flag & VXLOCK) == 0) {
1.80 fvdl 1344: simple_unlock(&spechash_slock);
1.29 cgd 1345: vgone(vq);
1346: goto loop;
1347: }
1348: count += vq->v_usecount;
1349: }
1.80 fvdl 1350: simple_unlock(&spechash_slock);
1.29 cgd 1351: return (count);
1352: }
1353:
1354:
1.101 mrg 1355: /*
1.220 lukem 1356: * sysctl helper routine to return list of supported fstypes
1357: */
1358: static int
1359: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
1360: {
1.291 christos 1361: char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220 lukem 1362: char *where = oldp;
1363: struct vfsops *v;
1364: size_t needed, left, slen;
1365: int error, first;
1366:
1367: if (newp != NULL)
1368: return (EPERM);
1369: if (namelen != 0)
1370: return (EINVAL);
1371:
1372: first = 1;
1373: error = 0;
1374: needed = 0;
1375: left = *oldlenp;
1376:
1377: LIST_FOREACH(v, &vfs_list, vfs_list) {
1378: if (where == NULL)
1379: needed += strlen(v->vfs_name) + 1;
1380: else {
1.245 christos 1381: memset(bf, 0, sizeof(bf));
1.220 lukem 1382: if (first) {
1.245 christos 1383: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 1384: first = 0;
1385: } else {
1.245 christos 1386: bf[0] = ' ';
1387: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 1388: }
1.245 christos 1389: bf[sizeof(bf)-1] = '\0';
1390: slen = strlen(bf);
1.220 lukem 1391: if (left < slen + 1)
1392: break;
1393: /* +1 to copy out the trailing NUL byte */
1.245 christos 1394: error = copyout(bf, where, slen + 1);
1.220 lukem 1395: if (error)
1396: break;
1397: where += slen;
1398: needed += slen;
1399: left -= slen;
1400: }
1401: }
1402: *oldlenp = needed;
1403: return (error);
1404: }
1405:
1406: /*
1.80 fvdl 1407: * Top level filesystem related information gathering.
1408: */
1.212 atatat 1409: SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1.80 fvdl 1410: {
1.218 atatat 1411: sysctl_createv(clog, 0, NULL, NULL,
1412: CTLFLAG_PERMANENT,
1.212 atatat 1413: CTLTYPE_NODE, "vfs", NULL,
1414: NULL, 0, NULL, 0,
1415: CTL_VFS, CTL_EOL);
1.218 atatat 1416: sysctl_createv(clog, 0, NULL, NULL,
1417: CTLFLAG_PERMANENT,
1.226 atatat 1418: CTLTYPE_NODE, "generic",
1419: SYSCTL_DESCR("Non-specific vfs related information"),
1.212 atatat 1420: NULL, 0, NULL, 0,
1421: CTL_VFS, VFS_GENERIC, CTL_EOL);
1.218 atatat 1422: sysctl_createv(clog, 0, NULL, NULL,
1423: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.226 atatat 1424: CTLTYPE_INT, "usermount",
1425: SYSCTL_DESCR("Whether unprivileged users may mount "
1426: "filesystems"),
1.212 atatat 1427: NULL, 0, &dovfsusermount, 0,
1428: CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1.220 lukem 1429: sysctl_createv(clog, 0, NULL, NULL,
1430: CTLFLAG_PERMANENT,
1431: CTLTYPE_STRING, "fstypes",
1432: SYSCTL_DESCR("List of file systems present"),
1433: sysctl_vfs_generic_fstypes, 0, NULL, 0,
1434: CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1.263 chs 1435: sysctl_createv(clog, 0, NULL, NULL,
1436: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1437: CTLTYPE_INT, "magiclinks",
1438: SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"),
1439: NULL, 0, &vfs_magiclinks, 0,
1440: CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL);
1.80 fvdl 1441: }
1442:
1.212 atatat 1443:
1.29 cgd 1444: int kinfo_vdebug = 1;
1445: int kinfo_vgetfailed;
1446: #define KINFO_VNODESLOP 10
1447: /*
1448: * Dump vnode list (via sysctl).
1449: * Copyout address of vnode followed by vnode.
1450: */
1451: /* ARGSUSED */
1.50 christos 1452: int
1.212 atatat 1453: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29 cgd 1454: {
1.212 atatat 1455: char *where = oldp;
1456: size_t *sizep = oldlenp;
1.80 fvdl 1457: struct mount *mp, *nmp;
1.272 reinoud 1458: struct vnode *vp;
1.80 fvdl 1459: char *bp = where, *savebp;
1.29 cgd 1460: char *ewhere;
1461: int error;
1.212 atatat 1462:
1463: if (namelen != 0)
1464: return (EOPNOTSUPP);
1465: if (newp != NULL)
1466: return (EPERM);
1.29 cgd 1467:
1.90 perry 1468: #define VPTRSZ sizeof(struct vnode *)
1469: #define VNODESZ sizeof(struct vnode)
1.29 cgd 1470: if (where == NULL) {
1471: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1472: return (0);
1473: }
1474: ewhere = where + *sizep;
1.80 fvdl 1475:
1476: simple_lock(&mountlist_slock);
1.177 matt 1477: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1478: mp = nmp) {
1.80 fvdl 1479: if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177 matt 1480: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29 cgd 1481: continue;
1.80 fvdl 1482: }
1.29 cgd 1483: savebp = bp;
1484: again:
1.80 fvdl 1485: simple_lock(&mntvnode_slock);
1.272 reinoud 1486: TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1.29 cgd 1487: /*
1488: * Check that the vp is still associated with
1489: * this filesystem. RACE: could have been
1490: * recycled onto the same filesystem.
1491: */
1492: if (vp->v_mount != mp) {
1.80 fvdl 1493: simple_unlock(&mntvnode_slock);
1.29 cgd 1494: if (kinfo_vdebug)
1.57 christos 1495: printf("kinfo: vp changed\n");
1.29 cgd 1496: bp = savebp;
1497: goto again;
1498: }
1499: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80 fvdl 1500: simple_unlock(&mntvnode_slock);
1.29 cgd 1501: *sizep = bp - where;
1502: return (ENOMEM);
1503: }
1.80 fvdl 1504: simple_unlock(&mntvnode_slock);
1.283 christos 1505: if ((error = copyout((void *)&vp, bp, VPTRSZ)) ||
1506: (error = copyout((void *)vp, bp + VPTRSZ, VNODESZ)))
1.29 cgd 1507: return (error);
1508: bp += VPTRSZ + VNODESZ;
1.80 fvdl 1509: simple_lock(&mntvnode_slock);
1.29 cgd 1510: }
1.80 fvdl 1511: simple_unlock(&mntvnode_slock);
1512: simple_lock(&mountlist_slock);
1.177 matt 1513: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29 cgd 1514: vfs_unbusy(mp);
1515: }
1.80 fvdl 1516: simple_unlock(&mountlist_slock);
1.29 cgd 1517:
1518: *sizep = bp - where;
1519: return (0);
1.30 mycroft 1520: }
1521:
1522: /*
1523: * Check to see if a filesystem is mounted on a block device.
1524: */
1525: int
1.247 thorpej 1526: vfs_mountedon(struct vnode *vp)
1.30 mycroft 1527: {
1.80 fvdl 1528: struct vnode *vq;
1529: int error = 0;
1.30 mycroft 1530:
1.261 reinoud 1531: if (vp->v_type != VBLK)
1532: return ENOTBLK;
1.113 fvdl 1533: if (vp->v_specmountpoint != NULL)
1.30 mycroft 1534: return (EBUSY);
1535: if (vp->v_flag & VALIASED) {
1.80 fvdl 1536: simple_lock(&spechash_slock);
1.30 mycroft 1537: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1538: if (vq->v_rdev != vp->v_rdev ||
1539: vq->v_type != vp->v_type)
1540: continue;
1.113 fvdl 1541: if (vq->v_specmountpoint != NULL) {
1.80 fvdl 1542: error = EBUSY;
1543: break;
1544: }
1.30 mycroft 1545: }
1.80 fvdl 1546: simple_unlock(&spechash_slock);
1.30 mycroft 1547: }
1.80 fvdl 1548: return (error);
1.30 mycroft 1549: }
1550:
1.35 ws 1551: /*
1.39 mycroft 1552: * Unmount all file systems.
1553: * We traverse the list in reverse order under the assumption that doing so
1554: * will avoid needing to worry about dependencies.
1555: */
1556: void
1.256 christos 1557: vfs_unmountall(struct lwp *l)
1.39 mycroft 1558: {
1.123 augustss 1559: struct mount *mp, *nmp;
1.40 mycroft 1560: int allerror, error;
1.39 mycroft 1561:
1.235 lukem 1562: printf("unmounting file systems...");
1.39 mycroft 1563: for (allerror = 0,
1564: mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1565: nmp = mp->mnt_list.cqe_prev;
1.54 jtk 1566: #ifdef DEBUG
1.235 lukem 1567: printf("\nunmounting %s (%s)...",
1.56 christos 1568: mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54 jtk 1569: #endif
1.149 thorpej 1570: /*
1571: * XXX Freeze syncer. Must do this before locking the
1572: * mount point. See dounmount() for details.
1573: */
1.281 ad 1574: mutex_enter(&syncer_mutex);
1.149 thorpej 1575: if (vfs_busy(mp, 0, 0)) {
1.281 ad 1576: mutex_exit(&syncer_mutex);
1.60 fvdl 1577: continue;
1.149 thorpej 1578: }
1.256 christos 1579: if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
1.57 christos 1580: printf("unmount of %s failed with error %d\n",
1.40 mycroft 1581: mp->mnt_stat.f_mntonname, error);
1582: allerror = 1;
1583: }
1.39 mycroft 1584: }
1.235 lukem 1585: printf(" done\n");
1.39 mycroft 1586: if (allerror)
1.57 christos 1587: printf("WARNING: some file systems would not unmount\n");
1.40 mycroft 1588: }
1589:
1.205 yamt 1590: extern struct simplelock bqueue_slock; /* XXX */
1591:
1.40 mycroft 1592: /*
1593: * Sync and unmount file systems before shutting down.
1594: */
1595: void
1.247 thorpej 1596: vfs_shutdown(void)
1.40 mycroft 1597: {
1.265 skrll 1598: struct lwp *l;
1.40 mycroft 1599:
1.265 skrll 1600: /* XXX we're certainly not running in lwp0's context! */
1601: l = curlwp;
1602: if (l == NULL)
1603: l = &lwp0;
1.185 christos 1604:
1.70 cgd 1605: printf("syncing disks... ");
1606:
1.138 bouyer 1607: /* remove user process from run queue */
1608: suspendsched();
1.40 mycroft 1609: (void) spl0();
1610:
1.128 sommerfe 1611: /* avoid coming back this way again if we panic. */
1612: doing_shutdown = 1;
1613:
1.184 thorpej 1614: sys_sync(l, NULL, NULL);
1.40 mycroft 1615:
1616: /* Wait for sync to finish. */
1.213 pk 1617: if (buf_syncwait() != 0) {
1.124 augustss 1618: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1619: Debugger();
1620: #endif
1.57 christos 1621: printf("giving up\n");
1.84 thorpej 1622: return;
1.73 thorpej 1623: } else
1.57 christos 1624: printf("done\n");
1.73 thorpej 1625:
1.84 thorpej 1626: /*
1627: * If we've panic'd, don't make the situation potentially
1628: * worse by unmounting the file systems.
1629: */
1630: if (panicstr != NULL)
1631: return;
1632:
1633: /* Release inodes held by texts before update. */
1.73 thorpej 1634: #ifdef notdef
1.84 thorpej 1635: vnshutdown();
1.73 thorpej 1636: #endif
1.84 thorpej 1637: /* Unmount file systems. */
1.256 christos 1638: vfs_unmountall(l);
1.58 thorpej 1639: }
1640:
1641: /*
1642: * Mount the root file system. If the operator didn't specify a
1643: * file system to use, try all possible file systems until one
1644: * succeeds.
1645: */
1646: int
1.247 thorpej 1647: vfs_mountroot(void)
1.58 thorpej 1648: {
1.79 thorpej 1649: struct vfsops *v;
1.239 mycroft 1650: int error = ENODEV;
1.58 thorpej 1651:
1652: if (root_device == NULL)
1653: panic("vfs_mountroot: root device unknown");
1654:
1.264 thorpej 1655: switch (device_class(root_device)) {
1.58 thorpej 1656: case DV_IFNET:
1657: if (rootdev != NODEV)
1.173 thorpej 1658: panic("vfs_mountroot: rootdev set for DV_IFNET "
1659: "(0x%08x -> %d,%d)", rootdev,
1660: major(rootdev), minor(rootdev));
1.58 thorpej 1661: break;
1662:
1663: case DV_DISK:
1664: if (rootdev == NODEV)
1665: panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239 mycroft 1666: if (bdevvp(rootdev, &rootvp))
1667: panic("vfs_mountroot: can't get vnode for rootdev");
1.256 christos 1668: error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp);
1.239 mycroft 1669: if (error) {
1670: printf("vfs_mountroot: can't open root device\n");
1671: return (error);
1672: }
1.58 thorpej 1673: break;
1674:
1675: default:
1676: printf("%s: inappropriate for root file system\n",
1677: root_device->dv_xname);
1678: return (ENODEV);
1679: }
1680:
1681: /*
1682: * If user specified a file system, use it.
1683: */
1.239 mycroft 1684: if (mountroot != NULL) {
1685: error = (*mountroot)();
1686: goto done;
1687: }
1.58 thorpej 1688:
1689: /*
1690: * Try each file system currently configured into the kernel.
1691: */
1.220 lukem 1692: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 1693: if (v->vfs_mountroot == NULL)
1.58 thorpej 1694: continue;
1695: #ifdef DEBUG
1.197 thorpej 1696: aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 1697: #endif
1.239 mycroft 1698: error = (*v->vfs_mountroot)();
1699: if (!error) {
1.197 thorpej 1700: aprint_normal("root file system type: %s\n",
1701: v->vfs_name);
1.79 thorpej 1702: break;
1.58 thorpej 1703: }
1704: }
1705:
1.79 thorpej 1706: if (v == NULL) {
1707: printf("no file system for %s", root_device->dv_xname);
1.264 thorpej 1708: if (device_class(root_device) == DV_DISK)
1.79 thorpej 1709: printf(" (dev 0x%x)", rootdev);
1710: printf("\n");
1.239 mycroft 1711: error = EFTYPE;
1.79 thorpej 1712: }
1.239 mycroft 1713:
1714: done:
1.264 thorpej 1715: if (error && device_class(root_device) == DV_DISK) {
1.256 christos 1716: VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp);
1.239 mycroft 1717: vrele(rootvp);
1718: }
1719: return (error);
1.58 thorpej 1720: }
CVSweb <webmaster@jp.NetBSD.org>