Annotation of src/sys/kern/vfs_subr.c, Revision 1.157
1.157 ! chs 1: /* $NetBSD: vfs_subr.c,v 1.156 2001/08/03 06:00:13 jdolecek Exp $ */
1.74 thorpej 2:
3: /*-
1.79 thorpej 4: * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9: * NASA Ames Research Center.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
1.32 cgd 39:
1.29 cgd 40: /*
1.30 mycroft 41: * Copyright (c) 1989, 1993
42: * The Regents of the University of California. All rights reserved.
1.29 cgd 43: * (c) UNIX System Laboratories, Inc.
44: * All or some portions of this file are derived from material licensed
45: * to the University of California by American Telephone and Telegraph
46: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47: * the permission of UNIX System Laboratories, Inc.
48: *
49: * Redistribution and use in source and binary forms, with or without
50: * modification, are permitted provided that the following conditions
51: * are met:
52: * 1. Redistributions of source code must retain the above copyright
53: * notice, this list of conditions and the following disclaimer.
54: * 2. Redistributions in binary form must reproduce the above copyright
55: * notice, this list of conditions and the following disclaimer in the
56: * documentation and/or other materials provided with the distribution.
57: * 3. All advertising materials mentioning features or use of this software
58: * must display the following acknowledgement:
59: * This product includes software developed by the University of
60: * California, Berkeley and its contributors.
61: * 4. Neither the name of the University nor the names of its contributors
62: * may be used to endorse or promote products derived from this software
63: * without specific prior written permission.
64: *
65: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75: * SUCH DAMAGE.
76: *
1.32 cgd 77: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 78: */
79:
80: /*
81: * External virtual filesystem routines
82: */
1.78 mrg 83:
1.125 chs 84: #include "opt_ddb.h"
1.95 thorpej 85: #include "opt_compat_netbsd.h"
1.97 christos 86: #include "opt_compat_43.h"
1.29 cgd 87:
88: #include <sys/param.h>
1.30 mycroft 89: #include <sys/systm.h>
1.29 cgd 90: #include <sys/proc.h>
1.138 bouyer 91: #include <sys/kernel.h>
1.29 cgd 92: #include <sys/mount.h>
93: #include <sys/time.h>
1.46 mycroft 94: #include <sys/fcntl.h>
1.29 cgd 95: #include <sys/vnode.h>
1.30 mycroft 96: #include <sys/stat.h>
1.29 cgd 97: #include <sys/namei.h>
98: #include <sys/ucred.h>
99: #include <sys/buf.h>
100: #include <sys/errno.h>
101: #include <sys/malloc.h>
1.30 mycroft 102: #include <sys/domain.h>
103: #include <sys/mbuf.h>
1.51 christos 104: #include <sys/syscallargs.h>
1.58 thorpej 105: #include <sys/device.h>
1.71 fvdl 106: #include <sys/dirent.h>
1.50 christos 107:
1.30 mycroft 108: #include <miscfs/specfs/specdev.h>
1.113 fvdl 109: #include <miscfs/genfs/genfs.h>
110: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 111:
1.125 chs 112: #include <uvm/uvm.h>
113: #include <uvm/uvm_ddb.h>
1.129 mrg 114:
115: #include <sys/sysctl.h>
1.77 mrg 116:
1.30 mycroft 117: enum vtype iftovt_tab[16] = {
118: VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
119: VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
120: };
1.146 jdolecek 121: const int vttoif_tab[9] = {
1.30 mycroft 122: 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
123: S_IFSOCK, S_IFIFO, S_IFMT,
124: };
125:
1.31 mycroft 126: int doforce = 1; /* 1 => permit forcible unmounting */
127: int prtactive = 0; /* 1 => print out reclaim of active vnodes */
1.29 cgd 128:
1.117 fvdl 129: extern int dovfsusermount; /* 1 => permit any user to mount filesystems */
130:
1.29 cgd 131: /*
132: * Insq/Remq for the vnode usage lists.
133: */
134: #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
135: #define bufremvn(bp) { \
136: LIST_REMOVE(bp, b_vnbufs); \
137: (bp)->b_vnbufs.le_next = NOLIST; \
138: }
1.113 fvdl 139: /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */
140: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114 enami 141: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113 fvdl 142:
1.55 cgd 143: struct mntlist mountlist = /* mounted filesystem list */
144: CIRCLEQ_HEAD_INITIALIZER(mountlist);
1.79 thorpej 145: struct vfs_list_head vfs_list = /* vfs list */
1.118 mycroft 146: LIST_HEAD_INITIALIZER(vfs_list);
1.79 thorpej 147:
1.71 fvdl 148: struct nfs_public nfs_pub; /* publicly exported FS */
1.58 thorpej 149:
1.135 sommerfe 150: struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
151: static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
152: struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
153: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
154: struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
1.80 fvdl 155:
1.79 thorpej 156: /*
157: * These define the root filesystem and device.
158: */
159: struct mount *rootfs;
160: struct vnode *rootvnode;
1.80 fvdl 161: struct device *root_device; /* root device */
1.79 thorpej 162:
1.93 thorpej 163: struct pool vnode_pool; /* memory pool for vnodes */
164:
1.89 kleink 165: /*
166: * Local declarations.
167: */
1.50 christos 168: void insmntque __P((struct vnode *, struct mount *));
169: int getdevvp __P((dev_t, struct vnode **, enum vtype));
170: void vgoneall __P((struct vnode *));
171:
172: static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
173: struct export_args *));
174: static int vfs_free_netcred __P((struct radix_node *, void *));
175: static void vfs_free_addrlist __P((struct netexport *));
1.51 christos 176:
177: #ifdef DEBUG
178: void printlockedvnodes __P((void));
179: #endif
180:
1.29 cgd 181: /*
1.30 mycroft 182: * Initialize the vnode management data structures.
1.29 cgd 183: */
1.50 christos 184: void
1.30 mycroft 185: vntblinit()
1.29 cgd 186: {
1.93 thorpej 187:
188: pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
189: 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
1.113 fvdl 190:
191: /*
192: * Initialize the filesystem syncer.
193: */
194: vn_initialize_syncerd();
1.29 cgd 195: }
196:
197: /*
1.80 fvdl 198: * Mark a mount point as busy. Used to synchronize access and to delay
199: * unmounting. Interlock is not released on failure.
1.29 cgd 200: */
1.50 christos 201: int
1.80 fvdl 202: vfs_busy(mp, flags, interlkp)
203: struct mount *mp;
204: int flags;
205: struct simplelock *interlkp;
1.29 cgd 206: {
1.80 fvdl 207: int lkflags;
1.29 cgd 208:
1.103 sommerfe 209: while (mp->mnt_flag & MNT_UNMOUNT) {
210: int gone;
211:
1.80 fvdl 212: if (flags & LK_NOWAIT)
213: return (ENOENT);
1.113 fvdl 214: if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
215: && mp->mnt_unmounter == curproc)
216: return (EDEADLK);
1.80 fvdl 217: if (interlkp)
218: simple_unlock(interlkp);
219: /*
220: * Since all busy locks are shared except the exclusive
221: * lock granted when unmounting, the only place that a
222: * wakeup needs to be done is at the release of the
223: * exclusive lock at the end of dounmount.
1.103 sommerfe 224: *
1.106 sommerfe 225: * XXX MP: add spinlock protecting mnt_wcnt here once you
226: * can atomically unlock-and-sleep.
1.80 fvdl 227: */
1.103 sommerfe 228: mp->mnt_wcnt++;
1.113 fvdl 229: tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
1.103 sommerfe 230: mp->mnt_wcnt--;
231: gone = mp->mnt_flag & MNT_GONE;
232:
233: if (mp->mnt_wcnt == 0)
234: wakeup(&mp->mnt_wcnt);
1.80 fvdl 235: if (interlkp)
236: simple_lock(interlkp);
1.103 sommerfe 237: if (gone)
238: return (ENOENT);
1.80 fvdl 239: }
240: lkflags = LK_SHARED;
241: if (interlkp)
242: lkflags |= LK_INTERLOCK;
243: if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
244: panic("vfs_busy: unexpected lock failure");
1.29 cgd 245: return (0);
246: }
247:
248: /*
1.80 fvdl 249: * Free a busy filesystem.
1.29 cgd 250: */
251: void
1.80 fvdl 252: vfs_unbusy(mp)
253: struct mount *mp;
1.29 cgd 254: {
255:
1.80 fvdl 256: lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29 cgd 257: }
258:
259: /*
1.80 fvdl 260: * Lookup a filesystem type, and if found allocate and initialize
261: * a mount structure for it.
262: *
263: * Devname is usually updated by mount(8) after booting.
1.29 cgd 264: */
1.50 christos 265: int
1.80 fvdl 266: vfs_rootmountalloc(fstypename, devname, mpp)
267: char *fstypename;
268: char *devname;
269: struct mount **mpp;
1.29 cgd 270: {
1.80 fvdl 271: struct vfsops *vfsp = NULL;
272: struct mount *mp;
1.29 cgd 273:
1.152 jdolecek 274: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.80 fvdl 275: if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
276: break;
277:
278: if (vfsp == NULL)
279: return (ENODEV);
280: mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91 perry 281: memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80 fvdl 282: lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
283: (void)vfs_busy(mp, LK_NOWAIT, 0);
284: LIST_INIT(&mp->mnt_vnodelist);
285: mp->mnt_op = vfsp;
286: mp->mnt_flag = MNT_RDONLY;
287: mp->mnt_vnodecovered = NULLVP;
288: vfsp->vfs_refcount++;
289: strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
290: mp->mnt_stat.f_mntonname[0] = '/';
291: (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
292: *mpp = mp;
1.29 cgd 293: return (0);
294: }
295:
296: /*
297: * Lookup a mount point by filesystem identifier.
298: */
299: struct mount *
1.80 fvdl 300: vfs_getvfs(fsid)
1.29 cgd 301: fsid_t *fsid;
302: {
1.123 augustss 303: struct mount *mp;
1.29 cgd 304:
1.80 fvdl 305: simple_lock(&mountlist_slock);
1.38 mycroft 306: for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1.80 fvdl 307: mp = mp->mnt_list.cqe_next) {
1.29 cgd 308: if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
1.80 fvdl 309: mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
310: simple_unlock(&mountlist_slock);
1.29 cgd 311: return (mp);
1.80 fvdl 312: }
313: }
314: simple_unlock(&mountlist_slock);
1.29 cgd 315: return ((struct mount *)0);
316: }
317:
318: /*
319: * Get a new unique fsid
320: */
321: void
1.127 assar 322: vfs_getnewfsid(mp)
1.29 cgd 323: struct mount *mp;
324: {
325: static u_short xxxfs_mntid;
326: fsid_t tfsid;
1.80 fvdl 327: int mtype;
1.29 cgd 328:
1.80 fvdl 329: simple_lock(&mntid_slock);
1.127 assar 330: mtype = makefstype(mp->mnt_op->vfs_name);
1.80 fvdl 331: mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
1.29 cgd 332: mp->mnt_stat.f_fsid.val[1] = mtype;
333: if (xxxfs_mntid == 0)
334: ++xxxfs_mntid;
1.33 deraadt 335: tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
1.29 cgd 336: tfsid.val[1] = mtype;
1.38 mycroft 337: if (mountlist.cqh_first != (void *)&mountlist) {
1.80 fvdl 338: while (vfs_getvfs(&tfsid)) {
1.29 cgd 339: tfsid.val[0]++;
340: xxxfs_mntid++;
341: }
342: }
343: mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
1.80 fvdl 344: simple_unlock(&mntid_slock);
1.29 cgd 345: }
346:
347: /*
1.30 mycroft 348: * Make a 'unique' number from a mount type name.
1.29 cgd 349: */
350: long
351: makefstype(type)
1.127 assar 352: const char *type;
1.29 cgd 353: {
354: long rv;
355:
356: for (rv = 0; *type; type++) {
357: rv <<= 2;
358: rv ^= *type;
359: }
360: return rv;
361: }
1.30 mycroft 362:
1.80 fvdl 363:
1.30 mycroft 364: /*
365: * Set vnode attributes to VNOVAL
366: */
367: void
368: vattr_null(vap)
1.123 augustss 369: struct vattr *vap;
1.30 mycroft 370: {
371:
372: vap->va_type = VNON;
1.75 enami 373:
374: /*
375: * Assign individually so that it is safe even if size and
376: * sign of each member are varied.
377: */
378: vap->va_mode = VNOVAL;
379: vap->va_nlink = VNOVAL;
380: vap->va_uid = VNOVAL;
381: vap->va_gid = VNOVAL;
382: vap->va_fsid = VNOVAL;
383: vap->va_fileid = VNOVAL;
1.30 mycroft 384: vap->va_size = VNOVAL;
1.75 enami 385: vap->va_blocksize = VNOVAL;
1.76 christos 386: vap->va_atime.tv_sec =
387: vap->va_mtime.tv_sec =
388: vap->va_ctime.tv_sec = VNOVAL;
389: vap->va_atime.tv_nsec =
390: vap->va_mtime.tv_nsec =
391: vap->va_ctime.tv_nsec = VNOVAL;
1.75 enami 392: vap->va_gen = VNOVAL;
393: vap->va_flags = VNOVAL;
394: vap->va_rdev = VNOVAL;
1.30 mycroft 395: vap->va_bytes = VNOVAL;
396: vap->va_vaflags = 0;
397: }
398:
399: /*
400: * Routines having to do with the management of the vnode table.
401: */
1.50 christos 402: extern int (**dead_vnodeop_p) __P((void *));
1.30 mycroft 403: long numvnodes;
404:
1.29 cgd 405: /*
406: * Return the next vnode from the free list.
407: */
1.50 christos 408: int
1.29 cgd 409: getnewvnode(tag, mp, vops, vpp)
410: enum vtagtype tag;
411: struct mount *mp;
1.50 christos 412: int (**vops) __P((void *));
1.29 cgd 413: struct vnode **vpp;
414: {
1.142 chs 415: extern struct uvm_pagerops uvm_vnodeops;
416: struct uvm_object *uobj;
1.80 fvdl 417: struct proc *p = curproc; /* XXX */
1.113 fvdl 418: struct freelst *listhd;
419: static int toggle;
1.80 fvdl 420: struct vnode *vp;
1.153 thorpej 421: int error = 0, tryalloc;
1.81 ross 422: #ifdef DIAGNOSTIC
1.30 mycroft 423: int s;
1.81 ross 424: #endif
1.103 sommerfe 425: if (mp) {
426: /*
1.106 sommerfe 427: * Mark filesystem busy while we're creating a vnode.
428: * If unmount is in progress, this will wait; if the
429: * unmount succeeds (only if umount -f), this will
430: * return an error. If the unmount fails, we'll keep
431: * going afterwards.
432: * (This puts the per-mount vnode list logically under
433: * the protection of the vfs_busy lock).
1.103 sommerfe 434: */
1.113 fvdl 435: error = vfs_busy(mp, LK_RECURSEFAIL, 0);
436: if (error && error != EDEADLK)
1.103 sommerfe 437: return error;
438: }
1.29 cgd 439:
1.113 fvdl 440: /*
441: * We must choose whether to allocate a new vnode or recycle an
442: * existing one. The criterion for allocating a new one is that
443: * the total number of vnodes is less than the number desired or
444: * there are no vnodes on either free list. Generally we only
445: * want to recycle vnodes that have no buffers associated with
446: * them, so we look first on the vnode_free_list. If it is empty,
447: * we next consider vnodes with referencing buffers on the
448: * vnode_hold_list. The toggle ensures that half the time we
449: * will use a buffer from the vnode_hold_list, and half the time
450: * we will allocate a new one unless the list has grown to twice
451: * the desired size. We are reticent to recycle vnodes from the
452: * vnode_hold_list because we will lose the identity of all its
453: * referencing buffers.
454: */
1.142 chs 455:
1.153 thorpej 456: try_again:
457: vp = NULL;
458:
459: simple_lock(&vnode_free_list_slock);
460:
1.113 fvdl 461: toggle ^= 1;
462: if (numvnodes > 2 * desiredvnodes)
463: toggle = 0;
464:
1.153 thorpej 465: tryalloc = numvnodes < desiredvnodes ||
1.113 fvdl 466: (TAILQ_FIRST(listhd = &vnode_free_list) == NULL &&
1.153 thorpej 467: (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle));
468:
469: if (tryalloc &&
470: (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.80 fvdl 471: simple_unlock(&vnode_free_list_slock);
1.142 chs 472: memset(vp, 0, sizeof(*vp));
1.153 thorpej 473: simple_lock_init(&vp->v_interlock);
1.29 cgd 474: numvnodes++;
475: } else {
1.113 fvdl 476: for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
477: vp = TAILQ_NEXT(vp, v_freelist)) {
1.105 wrstuden 478: if (simple_lock_try(&vp->v_interlock)) {
1.153 thorpej 479: if ((vp->v_flag & VLAYER) == 0)
1.105 wrstuden 480: break;
481: if (VOP_ISLOCKED(vp) == 0)
482: break;
483: else
484: simple_unlock(&vp->v_interlock);
485: }
1.80 fvdl 486: }
487: /*
488: * Unless this is a bad time of the month, at most
489: * the first NCPUS items on the free list are
490: * locked, so this is close enough to being empty.
491: */
492: if (vp == NULLVP) {
493: simple_unlock(&vnode_free_list_slock);
1.113 fvdl 494: if (mp && error != EDEADLK)
495: vfs_unbusy(mp);
1.153 thorpej 496: if (tryalloc) {
497: printf("WARNING: unable to allocate new "
498: "vnode, retrying...\n");
499: (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
500: goto try_again;
501: }
1.132 jdolecek 502: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 503: *vpp = 0;
504: return (ENFILE);
505: }
1.80 fvdl 506: if (vp->v_usecount)
1.125 chs 507: panic("free vnode isn't, vp %p", vp);
1.113 fvdl 508: TAILQ_REMOVE(listhd, vp, v_freelist);
1.30 mycroft 509: /* see comment on why 0xdeadb is set at end of vgone (below) */
1.29 cgd 510: vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
1.80 fvdl 511: simple_unlock(&vnode_free_list_slock);
1.30 mycroft 512: vp->v_lease = NULL;
1.29 cgd 513: if (vp->v_type != VBAD)
1.80 fvdl 514: vgonel(vp, p);
515: else
516: simple_unlock(&vp->v_interlock);
1.30 mycroft 517: #ifdef DIAGNOSTIC
1.80 fvdl 518: if (vp->v_data)
1.125 chs 519: panic("cleaned vnode isn't, vp %p", vp);
1.30 mycroft 520: s = splbio();
521: if (vp->v_numoutput)
1.125 chs 522: panic("clean vnode has pending I/O's, vp %p", vp);
1.30 mycroft 523: splx(s);
524: #endif
1.29 cgd 525: vp->v_flag = 0;
526: vp->v_lastr = 0;
1.30 mycroft 527: vp->v_ralen = 0;
528: vp->v_maxra = 0;
529: vp->v_lastw = 0;
530: vp->v_lasta = 0;
531: vp->v_cstart = 0;
532: vp->v_clen = 0;
1.29 cgd 533: vp->v_socket = 0;
534: }
535: vp->v_type = VNON;
1.104 wrstuden 536: vp->v_vnlock = &vp->v_lock;
537: lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.142 chs 538: lockinit(&vp->v_glock, PVFS, "glock", 0, 0);
1.29 cgd 539: cache_purge(vp);
540: vp->v_tag = tag;
541: vp->v_op = vops;
542: insmntque(vp, mp);
1.30 mycroft 543: *vpp = vp;
1.29 cgd 544: vp->v_usecount = 1;
1.30 mycroft 545: vp->v_data = 0;
1.77 mrg 546: simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
1.142 chs 547:
548: /*
549: * initialize uvm_object within vnode.
550: */
551:
552: uobj = &vp->v_uvm.u_obj;
553: uobj->pgops = &uvm_vnodeops;
554: TAILQ_INIT(&uobj->memq);
555: vp->v_uvm.u_size = VSIZENOTSET;
556:
1.113 fvdl 557: if (mp && error != EDEADLK)
558: vfs_unbusy(mp);
1.29 cgd 559: return (0);
1.130 fvdl 560: }
561:
562: /*
563: * This is really just the reverse of getnewvnode(). Needed for
564: * VFS_VGET functions who may need to push back a vnode in case
565: * of a locking race.
566: */
567: void
1.131 fvdl 568: ungetnewvnode(vp)
1.130 fvdl 569: struct vnode *vp;
570: {
571: #ifdef DIAGNOSTIC
572: if (vp->v_usecount != 1)
1.131 fvdl 573: panic("ungetnewvnode: busy vnode");
1.130 fvdl 574: #endif
575: vp->v_usecount--;
576: insmntque(vp, NULL);
577: vp->v_type = VBAD;
578:
579: simple_lock(&vp->v_interlock);
580: /*
581: * Insert at head of LRU list
582: */
583: simple_lock(&vnode_free_list_slock);
584: if (vp->v_holdcnt > 0)
585: TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
586: else
587: TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
588: simple_unlock(&vnode_free_list_slock);
589: simple_unlock(&vp->v_interlock);
1.29 cgd 590: }
591:
592: /*
593: * Move a vnode from one mount queue to another.
594: */
1.50 christos 595: void
1.29 cgd 596: insmntque(vp, mp)
1.123 augustss 597: struct vnode *vp;
598: struct mount *mp;
1.29 cgd 599: {
600:
1.103 sommerfe 601: #ifdef DIAGNOSTIC
602: if ((mp != NULL) &&
1.113 fvdl 603: (mp->mnt_flag & MNT_UNMOUNT) &&
604: !(mp->mnt_flag & MNT_SOFTDEP) &&
605: vp->v_tag != VT_VFS) {
1.103 sommerfe 606: panic("insmntque into dying filesystem");
607: }
608: #endif
609:
1.80 fvdl 610: simple_lock(&mntvnode_slock);
1.29 cgd 611: /*
612: * Delete from old mount point vnode list, if on one.
613: */
614: if (vp->v_mount != NULL)
615: LIST_REMOVE(vp, v_mntvnodes);
616: /*
617: * Insert into list of vnodes for the new mount point, if available.
618: */
1.80 fvdl 619: if ((vp->v_mount = mp) != NULL)
620: LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
621: simple_unlock(&mntvnode_slock);
1.29 cgd 622: }
623:
624: /*
625: * Update outstanding I/O count and do wakeup if requested.
626: */
1.50 christos 627: void
1.29 cgd 628: vwakeup(bp)
1.123 augustss 629: struct buf *bp;
1.29 cgd 630: {
1.123 augustss 631: struct vnode *vp;
1.29 cgd 632:
1.50 christos 633: if ((vp = bp->b_vp) != NULL) {
1.30 mycroft 634: if (--vp->v_numoutput < 0)
1.125 chs 635: panic("vwakeup: neg numoutput, vp %p", vp);
1.29 cgd 636: if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
637: vp->v_flag &= ~VBWAIT;
638: wakeup((caddr_t)&vp->v_numoutput);
639: }
640: }
641: }
642:
643: /*
644: * Flush out and invalidate all buffers associated with a vnode.
1.126 mycroft 645: * Called with the underlying vnode locked, which should prevent new dirty
646: * buffers from being queued.
1.29 cgd 647: */
1.30 mycroft 648: int
649: vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1.123 augustss 650: struct vnode *vp;
1.30 mycroft 651: int flags;
652: struct ucred *cred;
653: struct proc *p;
654: int slpflag, slptimeo;
1.29 cgd 655: {
1.142 chs 656: struct uvm_object *uobj = &vp->v_uvm.u_obj;
1.126 mycroft 657: struct buf *bp, *nbp;
1.142 chs 658: int s, error, rv;
659: int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO|
660: (flags & V_SAVE ? PGO_CLEANIT : 0);
661:
662: /* XXXUBC this doesn't look at flags or slp* */
663: if (vp->v_type == VREG) {
664: simple_lock(&uobj->vmobjlock);
665: rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
666: simple_unlock(&uobj->vmobjlock);
667: if (!rv) {
668: return EIO;
669: }
670: }
1.30 mycroft 671: if (flags & V_SAVE) {
1.140 fvdl 672: error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
1.126 mycroft 673: if (error)
1.122 fvdl 674: return (error);
1.126 mycroft 675: #ifdef DIAGNOSTIC
1.122 fvdl 676: s = splbio();
1.126 mycroft 677: if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
1.125 chs 678: panic("vinvalbuf: dirty bufs, vp %p", vp);
1.113 fvdl 679: splx(s);
1.126 mycroft 680: #endif
1.30 mycroft 681: }
1.113 fvdl 682:
1.115 fvdl 683: s = splbio();
684:
1.126 mycroft 685: restart:
686: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
687: nbp = LIST_NEXT(bp, b_vnbufs);
688: if (bp->b_flags & B_BUSY) {
689: bp->b_flags |= B_WANTED;
690: error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
691: "vinvalbuf", slptimeo);
692: if (error) {
693: splx(s);
694: return (error);
695: }
696: goto restart;
1.113 fvdl 697: }
1.126 mycroft 698: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
699: brelse(bp);
700: }
1.30 mycroft 701:
1.126 mycroft 702: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
703: nbp = LIST_NEXT(bp, b_vnbufs);
704: if (bp->b_flags & B_BUSY) {
705: bp->b_flags |= B_WANTED;
706: error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
707: "vinvalbuf", slptimeo);
708: if (error) {
709: splx(s);
710: return (error);
1.29 cgd 711: }
1.126 mycroft 712: goto restart;
713: }
714: /*
715: * XXX Since there are no node locks for NFS, I believe
716: * there is a slight chance that a delayed write will
717: * occur while sleeping just above, so check for it.
718: */
719: if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
720: #ifdef DEBUG
721: printf("buffer still DELWRI\n");
722: #endif
1.63 mycroft 723: bp->b_flags |= B_BUSY | B_VFLUSH;
1.126 mycroft 724: VOP_BWRITE(bp);
725: goto restart;
726: }
727: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
728: brelse(bp);
729: }
730:
731: #ifdef DIAGNOSTIC
732: if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
733: panic("vinvalbuf: flush failed, vp %p", vp);
1.113 fvdl 734: #endif
1.126 mycroft 735:
736: splx(s);
737:
738: return (0);
739: }
740:
741: /*
742: * Destroy any in core blocks past the truncation length.
743: * Called with the underlying vnode locked, which should prevent new dirty
744: * buffers from being queued.
745: */
746: int
747: vtruncbuf(vp, lbn, slpflag, slptimeo)
748: struct vnode *vp;
749: daddr_t lbn;
750: int slpflag, slptimeo;
751: {
1.142 chs 752: struct uvm_object *uobj = &vp->v_uvm.u_obj;
1.126 mycroft 753: struct buf *bp, *nbp;
1.142 chs 754: int s, error, rv;
1.126 mycroft 755:
756: s = splbio();
1.142 chs 757: if (vp->v_type == VREG) {
758: simple_lock(&uobj->vmobjlock);
759: rv = (uobj->pgops->pgo_flush)(uobj,
1.145 chs 760: round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift), 0,
761: PGO_FREE|PGO_SYNCIO);
1.142 chs 762: simple_unlock(&uobj->vmobjlock);
763: if (!rv) {
764: splx(s);
765: return EIO;
766: }
767: }
1.126 mycroft 768:
769: restart:
770: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
771: nbp = LIST_NEXT(bp, b_vnbufs);
772: if (bp->b_lblkno < lbn)
773: continue;
774: if (bp->b_flags & B_BUSY) {
775: bp->b_flags |= B_WANTED;
1.142 chs 776: error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126 mycroft 777: "vtruncbuf", slptimeo);
778: if (error) {
779: splx(s);
780: return (error);
1.29 cgd 781: }
1.126 mycroft 782: goto restart;
1.29 cgd 783: }
1.126 mycroft 784: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
785: brelse(bp);
1.29 cgd 786: }
1.115 fvdl 787:
1.126 mycroft 788: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
789: nbp = LIST_NEXT(bp, b_vnbufs);
790: if (bp->b_lblkno < lbn)
791: continue;
792: if (bp->b_flags & B_BUSY) {
793: bp->b_flags |= B_WANTED;
1.142 chs 794: error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126 mycroft 795: "vtruncbuf", slptimeo);
796: if (error) {
797: splx(s);
798: return (error);
799: }
800: goto restart;
801: }
802: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
803: brelse(bp);
804: }
1.115 fvdl 805:
806: splx(s);
807:
1.30 mycroft 808: return (0);
809: }
810:
811: void
812: vflushbuf(vp, sync)
1.123 augustss 813: struct vnode *vp;
1.30 mycroft 814: int sync;
815: {
1.142 chs 816: struct uvm_object *uobj = &vp->v_uvm.u_obj;
1.123 augustss 817: struct buf *bp, *nbp;
1.30 mycroft 818: int s;
819:
1.142 chs 820: if (vp->v_type == VREG) {
821: int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0);
822:
823: simple_lock(&uobj->vmobjlock);
824: (uobj->pgops->pgo_flush)(uobj, 0, 0, flags);
825: simple_unlock(&uobj->vmobjlock);
826: }
827:
1.30 mycroft 828: loop:
829: s = splbio();
1.126 mycroft 830: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
831: nbp = LIST_NEXT(bp, b_vnbufs);
1.30 mycroft 832: if ((bp->b_flags & B_BUSY))
833: continue;
834: if ((bp->b_flags & B_DELWRI) == 0)
1.125 chs 835: panic("vflushbuf: not dirty, bp %p", bp);
1.63 mycroft 836: bp->b_flags |= B_BUSY | B_VFLUSH;
1.30 mycroft 837: splx(s);
838: /*
839: * Wait for I/O associated with indirect blocks to complete,
840: * since there is no way to quickly wait for them below.
841: */
842: if (bp->b_vp == vp || sync == 0)
843: (void) bawrite(bp);
844: else
845: (void) bwrite(bp);
846: goto loop;
847: }
848: if (sync == 0) {
849: splx(s);
850: return;
851: }
852: while (vp->v_numoutput) {
853: vp->v_flag |= VBWAIT;
854: tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
855: }
856: splx(s);
1.126 mycroft 857: if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1.30 mycroft 858: vprint("vflushbuf: dirty", vp);
859: goto loop;
860: }
1.29 cgd 861: }
862:
863: /*
864: * Associate a buffer with a vnode.
865: */
1.50 christos 866: void
1.29 cgd 867: bgetvp(vp, bp)
1.123 augustss 868: struct vnode *vp;
869: struct buf *bp;
1.29 cgd 870: {
1.115 fvdl 871: int s;
1.29 cgd 872:
873: if (bp->b_vp)
1.125 chs 874: panic("bgetvp: not free, bp %p", bp);
1.29 cgd 875: VHOLD(vp);
1.115 fvdl 876: s = splbio();
1.29 cgd 877: bp->b_vp = vp;
878: if (vp->v_type == VBLK || vp->v_type == VCHR)
879: bp->b_dev = vp->v_rdev;
880: else
881: bp->b_dev = NODEV;
882: /*
883: * Insert onto list for new vnode.
884: */
885: bufinsvn(bp, &vp->v_cleanblkhd);
1.115 fvdl 886: splx(s);
1.29 cgd 887: }
888:
889: /*
890: * Disassociate a buffer from a vnode.
891: */
1.50 christos 892: void
1.29 cgd 893: brelvp(bp)
1.123 augustss 894: struct buf *bp;
1.29 cgd 895: {
896: struct vnode *vp;
1.115 fvdl 897: int s;
1.29 cgd 898:
1.125 chs 899: if (bp->b_vp == NULL)
900: panic("brelvp: vp NULL, bp %p", bp);
1.115 fvdl 901:
902: s = splbio();
1.113 fvdl 903: vp = bp->b_vp;
1.29 cgd 904: /*
905: * Delete from old vnode list, if on one.
906: */
907: if (bp->b_vnbufs.le_next != NOLIST)
908: bufremvn(bp);
1.142 chs 909:
910: if (vp->v_type != VREG && (vp->v_flag & VONWORKLST) &&
911: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1.113 fvdl 912: vp->v_flag &= ~VONWORKLST;
913: LIST_REMOVE(vp, v_synclist);
914: }
1.142 chs 915:
916: bp->b_vp = NULL;
1.29 cgd 917: HOLDRELE(vp);
1.115 fvdl 918: splx(s);
1.29 cgd 919: }
920:
921: /*
922: * Reassign a buffer from one vnode to another.
923: * Used to assign file specific control information
924: * (indirect blocks) to the vnode to which they belong.
1.115 fvdl 925: *
926: * This function must be called at splbio().
1.29 cgd 927: */
1.50 christos 928: void
1.29 cgd 929: reassignbuf(bp, newvp)
1.113 fvdl 930: struct buf *bp;
931: struct vnode *newvp;
1.29 cgd 932: {
1.113 fvdl 933: struct buflists *listheadp;
934: int delay;
1.29 cgd 935:
936: /*
937: * Delete from old vnode list, if on one.
938: */
939: if (bp->b_vnbufs.le_next != NOLIST)
940: bufremvn(bp);
941: /*
942: * If dirty, put on list of dirty buffers;
943: * otherwise insert onto list of clean buffers.
944: */
1.113 fvdl 945: if ((bp->b_flags & B_DELWRI) == 0) {
946: listheadp = &newvp->v_cleanblkhd;
1.142 chs 947: if (newvp->v_type != VREG &&
948: (newvp->v_flag & VONWORKLST) &&
1.113 fvdl 949: LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
950: newvp->v_flag &= ~VONWORKLST;
951: LIST_REMOVE(newvp, v_synclist);
952: }
953: } else {
1.29 cgd 954: listheadp = &newvp->v_dirtyblkhd;
1.113 fvdl 955: if ((newvp->v_flag & VONWORKLST) == 0) {
956: switch (newvp->v_type) {
957: case VDIR:
958: delay = dirdelay;
959: break;
960: case VBLK:
961: if (newvp->v_specmountpoint != NULL) {
962: delay = metadelay;
963: break;
964: }
965: /* fall through */
966: default:
1.118 mycroft 967: delay = filedelay;
968: break;
1.113 fvdl 969: }
1.118 mycroft 970: if (!newvp->v_mount ||
971: (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
972: vn_syncer_add_to_worklist(newvp, delay);
1.113 fvdl 973: }
974: }
1.29 cgd 975: bufinsvn(bp, listheadp);
976: }
977:
978: /*
979: * Create a vnode for a block device.
1.59 thorpej 980: * Used for root filesystem and swap areas.
1.29 cgd 981: * Also used for memory file system special devices.
982: */
1.50 christos 983: int
1.29 cgd 984: bdevvp(dev, vpp)
985: dev_t dev;
986: struct vnode **vpp;
987: {
1.30 mycroft 988:
989: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 990: }
991:
992: /*
993: * Create a vnode for a character device.
994: * Used for kernfs and some console handling.
995: */
1.50 christos 996: int
1.29 cgd 997: cdevvp(dev, vpp)
998: dev_t dev;
999: struct vnode **vpp;
1000: {
1.30 mycroft 1001:
1002: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 1003: }
1004:
1005: /*
1006: * Create a vnode for a device.
1007: * Used by bdevvp (block device) for root file system etc.,
1008: * and by cdevvp (character device) for console and kernfs.
1009: */
1.50 christos 1010: int
1.29 cgd 1011: getdevvp(dev, vpp, type)
1012: dev_t dev;
1013: struct vnode **vpp;
1014: enum vtype type;
1015: {
1.123 augustss 1016: struct vnode *vp;
1.29 cgd 1017: struct vnode *nvp;
1018: int error;
1019:
1.80 fvdl 1020: if (dev == NODEV) {
1021: *vpp = NULLVP;
1.29 cgd 1022: return (0);
1.80 fvdl 1023: }
1.50 christos 1024: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 1025: if (error) {
1026: *vpp = NULLVP;
1027: return (error);
1028: }
1029: vp = nvp;
1030: vp->v_type = type;
1.50 christos 1031: if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29 cgd 1032: vput(vp);
1033: vp = nvp;
1034: }
1035: *vpp = vp;
1036: return (0);
1037: }
1038:
1039: /*
1040: * Check to see if the new vnode represents a special device
1041: * for which we already have a vnode (either because of
1042: * bdevvp() or because of a different vnode representing
1043: * the same block device). If such an alias exists, deallocate
1044: * the existing contents and return the aliased vnode. The
1045: * caller is responsible for filling it with its new contents.
1046: */
1047: struct vnode *
1048: checkalias(nvp, nvp_rdev, mp)
1.123 augustss 1049: struct vnode *nvp;
1.29 cgd 1050: dev_t nvp_rdev;
1051: struct mount *mp;
1052: {
1.80 fvdl 1053: struct proc *p = curproc; /* XXX */
1.123 augustss 1054: struct vnode *vp;
1.29 cgd 1055: struct vnode **vpp;
1056:
1057: if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1058: return (NULLVP);
1059:
1060: vpp = &speclisth[SPECHASH(nvp_rdev)];
1061: loop:
1.80 fvdl 1062: simple_lock(&spechash_slock);
1.29 cgd 1063: for (vp = *vpp; vp; vp = vp->v_specnext) {
1064: if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1065: continue;
1066: /*
1067: * Alias, but not in use, so flush it out.
1068: */
1.80 fvdl 1069: simple_lock(&vp->v_interlock);
1.29 cgd 1070: if (vp->v_usecount == 0) {
1.80 fvdl 1071: simple_unlock(&spechash_slock);
1072: vgonel(vp, p);
1.29 cgd 1073: goto loop;
1074: }
1.80 fvdl 1075: if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
1076: simple_unlock(&spechash_slock);
1.29 cgd 1077: goto loop;
1.80 fvdl 1078: }
1.29 cgd 1079: break;
1080: }
1.34 cgd 1081: if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29 cgd 1082: MALLOC(nvp->v_specinfo, struct specinfo *,
1.150 thorpej 1083: sizeof(struct specinfo), M_VNODE, M_NOWAIT);
1084: /* XXX Erg. */
1085: if (nvp->v_specinfo == NULL) {
1086: simple_unlock(&spechash_slock);
1087: uvm_wait("checkalias");
1088: goto loop;
1089: }
1090:
1.29 cgd 1091: nvp->v_rdev = nvp_rdev;
1092: nvp->v_hashchain = vpp;
1093: nvp->v_specnext = *vpp;
1.113 fvdl 1094: nvp->v_specmountpoint = NULL;
1.80 fvdl 1095: simple_unlock(&spechash_slock);
1.62 kleink 1096: nvp->v_speclockf = NULL;
1.29 cgd 1097: *vpp = nvp;
1.80 fvdl 1098: if (vp != NULLVP) {
1.29 cgd 1099: nvp->v_flag |= VALIASED;
1100: vp->v_flag |= VALIASED;
1101: vput(vp);
1102: }
1103: return (NULLVP);
1104: }
1.80 fvdl 1105: simple_unlock(&spechash_slock);
1106: VOP_UNLOCK(vp, 0);
1107: simple_lock(&vp->v_interlock);
1108: vclean(vp, 0, p);
1.29 cgd 1109: vp->v_op = nvp->v_op;
1110: vp->v_tag = nvp->v_tag;
1.104 wrstuden 1111: vp->v_vnlock = &vp->v_lock;
1112: lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29 cgd 1113: nvp->v_type = VNON;
1114: insmntque(vp, mp);
1115: return (vp);
1116: }
1117:
1118: /*
1119: * Grab a particular vnode from the free list, increment its
1.83 fvdl 1120: * reference count and lock it. If the vnode lock bit is set the
1121: * vnode is being eliminated in vgone. In that case, we can not
1122: * grab the vnode, so the process is awakened when the transition is
1123: * completed, and an error returned to indicate that the vnode is no
1124: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 1125: */
1.30 mycroft 1126: int
1.80 fvdl 1127: vget(vp, flags)
1128: struct vnode *vp;
1129: int flags;
1.29 cgd 1130: {
1.80 fvdl 1131: int error;
1.29 cgd 1132:
1.30 mycroft 1133: /*
1134: * If the vnode is in the process of being cleaned out for
1135: * another use, we wait for the cleaning to finish and then
1.80 fvdl 1136: * return failure. Cleaning is determined by checking that
1137: * the VXLOCK flag is set.
1138: */
1.142 chs 1139:
1.80 fvdl 1140: if ((flags & LK_INTERLOCK) == 0)
1141: simple_lock(&vp->v_interlock);
1142: if (vp->v_flag & VXLOCK) {
1.142 chs 1143: if (flags & LK_NOWAIT) {
1.143 sommerfe 1144: simple_unlock(&vp->v_interlock);
1.142 chs 1145: return EBUSY;
1146: }
1.29 cgd 1147: vp->v_flag |= VXWANT;
1.134 sommerfe 1148: ltsleep((caddr_t)vp, PINOD|PNORELOCK,
1149: "vget", 0, &vp->v_interlock);
1.80 fvdl 1150: return (ENOENT);
1.29 cgd 1151: }
1.80 fvdl 1152: if (vp->v_usecount == 0) {
1153: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1154: if (vp->v_holdcnt > 0)
1155: TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1156: else
1157: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80 fvdl 1158: simple_unlock(&vnode_free_list_slock);
1159: }
1.29 cgd 1160: vp->v_usecount++;
1.112 mycroft 1161: #ifdef DIAGNOSTIC
1162: if (vp->v_usecount == 0) {
1163: vprint("vget", vp);
1.125 chs 1164: panic("vget: usecount overflow, vp %p", vp);
1.112 mycroft 1165: }
1166: #endif
1.80 fvdl 1167: if (flags & LK_TYPE_MASK) {
1.113 fvdl 1168: if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1169: /*
1170: * must expand vrele here because we do not want
1171: * to call VOP_INACTIVE if the reference count
1172: * drops back to zero since it was never really
1173: * active. We must remove it from the free list
1174: * before sleeping so that multiple processes do
1175: * not try to recycle it.
1176: */
1177: simple_lock(&vp->v_interlock);
1178: vp->v_usecount--;
1179: if (vp->v_usecount > 0) {
1180: simple_unlock(&vp->v_interlock);
1181: return (error);
1182: }
1183: /*
1184: * insert at tail of LRU list
1185: */
1186: simple_lock(&vnode_free_list_slock);
1187: if (vp->v_holdcnt > 0)
1188: TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1189: v_freelist);
1190: else
1191: TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1192: v_freelist);
1193: simple_unlock(&vnode_free_list_slock);
1194: simple_unlock(&vp->v_interlock);
1195: }
1.80 fvdl 1196: return (error);
1197: }
1198: simple_unlock(&vp->v_interlock);
1.29 cgd 1199: return (0);
1200: }
1201:
1202: /*
1203: * vput(), just unlock and vrele()
1204: */
1205: void
1206: vput(vp)
1.80 fvdl 1207: struct vnode *vp;
1.29 cgd 1208: {
1.80 fvdl 1209: struct proc *p = curproc; /* XXX */
1.30 mycroft 1210:
1.111 mycroft 1211: #ifdef DIAGNOSTIC
1.80 fvdl 1212: if (vp == NULL)
1213: panic("vput: null vp");
1214: #endif
1215: simple_lock(&vp->v_interlock);
1216: vp->v_usecount--;
1217: if (vp->v_usecount > 0) {
1218: simple_unlock(&vp->v_interlock);
1219: VOP_UNLOCK(vp, 0);
1220: return;
1221: }
1222: #ifdef DIAGNOSTIC
1223: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1224: vprint("vput: bad ref count", vp);
1225: panic("vput: ref cnt");
1226: }
1227: #endif
1228: /*
1.87 pk 1229: * Insert at tail of LRU list.
1.80 fvdl 1230: */
1231: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1232: if (vp->v_holdcnt > 0)
1233: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1234: else
1235: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80 fvdl 1236: simple_unlock(&vnode_free_list_slock);
1.147 chs 1237: if (vp->v_flag & VTEXT) {
1238: uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
1239: uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
1240: }
1.142 chs 1241: vp->v_flag &= ~VTEXT;
1.80 fvdl 1242: simple_unlock(&vp->v_interlock);
1243: VOP_INACTIVE(vp, p);
1.29 cgd 1244: }
1245:
1246: /*
1247: * Vnode release.
1248: * If count drops to zero, call inactive routine and return to freelist.
1249: */
1250: void
1251: vrele(vp)
1.80 fvdl 1252: struct vnode *vp;
1.29 cgd 1253: {
1.80 fvdl 1254: struct proc *p = curproc; /* XXX */
1.29 cgd 1255:
1256: #ifdef DIAGNOSTIC
1257: if (vp == NULL)
1258: panic("vrele: null vp");
1259: #endif
1.80 fvdl 1260: simple_lock(&vp->v_interlock);
1.29 cgd 1261: vp->v_usecount--;
1.80 fvdl 1262: if (vp->v_usecount > 0) {
1263: simple_unlock(&vp->v_interlock);
1.29 cgd 1264: return;
1.80 fvdl 1265: }
1.29 cgd 1266: #ifdef DIAGNOSTIC
1.80 fvdl 1267: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29 cgd 1268: vprint("vrele: bad ref count", vp);
1.142 chs 1269: panic("vrele: ref cnt vp %p", vp);
1.29 cgd 1270: }
1271: #endif
1.30 mycroft 1272: /*
1.87 pk 1273: * Insert at tail of LRU list.
1.30 mycroft 1274: */
1.80 fvdl 1275: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1276: if (vp->v_holdcnt > 0)
1277: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1278: else
1279: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80 fvdl 1280: simple_unlock(&vnode_free_list_slock);
1.147 chs 1281: if (vp->v_flag & VTEXT) {
1282: uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
1283: uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
1284: }
1.142 chs 1285: vp->v_flag &= ~VTEXT;
1.80 fvdl 1286: if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1287: VOP_INACTIVE(vp, p);
1.29 cgd 1288: }
1289:
1.80 fvdl 1290: #ifdef DIAGNOSTIC
1.29 cgd 1291: /*
1292: * Page or buffer structure gets a reference.
1293: */
1.30 mycroft 1294: void
1.29 cgd 1295: vhold(vp)
1.123 augustss 1296: struct vnode *vp;
1.29 cgd 1297: {
1298:
1.113 fvdl 1299: /*
1300: * If it is on the freelist and the hold count is currently
1301: * zero, move it to the hold list. The test of the back
1302: * pointer and the use reference count of zero is because
1303: * it will be removed from a free list by getnewvnode,
1304: * but will not have its reference count incremented until
1305: * after calling vgone. If the reference count were
1306: * incremented first, vgone would (incorrectly) try to
1307: * close the previous instance of the underlying object.
1308: * So, the back pointer is explicitly set to `0xdeadb' in
1309: * getnewvnode after removing it from a freelist to ensure
1310: * that we do not try to move it here.
1311: */
1312: simple_lock(&vp->v_interlock);
1313: if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1314: vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1315: simple_lock(&vnode_free_list_slock);
1316: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1317: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1318: simple_unlock(&vnode_free_list_slock);
1319: }
1.29 cgd 1320: vp->v_holdcnt++;
1.80 fvdl 1321: simple_unlock(&vp->v_interlock);
1.29 cgd 1322: }
1323:
1324: /*
1325: * Page or buffer structure frees a reference.
1326: */
1.30 mycroft 1327: void
1.29 cgd 1328: holdrele(vp)
1.123 augustss 1329: struct vnode *vp;
1.29 cgd 1330: {
1331:
1.80 fvdl 1332: simple_lock(&vp->v_interlock);
1.29 cgd 1333: if (vp->v_holdcnt <= 0)
1.125 chs 1334: panic("holdrele: holdcnt vp %p", vp);
1.29 cgd 1335: vp->v_holdcnt--;
1.142 chs 1336:
1.113 fvdl 1337: /*
1338: * If it is on the holdlist and the hold count drops to
1339: * zero, move it to the free list. The test of the back
1340: * pointer and the use reference count of zero is because
1341: * it will be removed from a free list by getnewvnode,
1342: * but will not have its reference count incremented until
1343: * after calling vgone. If the reference count were
1344: * incremented first, vgone would (incorrectly) try to
1345: * close the previous instance of the underlying object.
1346: * So, the back pointer is explicitly set to `0xdeadb' in
1347: * getnewvnode after removing it from a freelist to ensure
1348: * that we do not try to move it here.
1349: */
1.142 chs 1350:
1.113 fvdl 1351: if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1352: vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1353: simple_lock(&vnode_free_list_slock);
1354: TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1355: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1356: simple_unlock(&vnode_free_list_slock);
1357: }
1.81 ross 1358: simple_unlock(&vp->v_interlock);
1359: }
1360:
1361: /*
1362: * Vnode reference.
1363: */
1364: void
1365: vref(vp)
1366: struct vnode *vp;
1367: {
1368:
1369: simple_lock(&vp->v_interlock);
1370: if (vp->v_usecount <= 0)
1.125 chs 1371: panic("vref used where vget required, vp %p", vp);
1.81 ross 1372: vp->v_usecount++;
1.112 mycroft 1373: #ifdef DIAGNOSTIC
1374: if (vp->v_usecount == 0) {
1375: vprint("vref", vp);
1.125 chs 1376: panic("vref: usecount overflow, vp %p", vp);
1.112 mycroft 1377: }
1378: #endif
1.80 fvdl 1379: simple_unlock(&vp->v_interlock);
1.29 cgd 1380: }
1.80 fvdl 1381: #endif /* DIAGNOSTIC */
1.29 cgd 1382:
1383: /*
1384: * Remove any vnodes in the vnode table belonging to mount point mp.
1385: *
1386: * If MNT_NOFORCE is specified, there should not be any active ones,
1387: * return error if any are found (nb: this is a user error, not a
1388: * system error). If MNT_FORCE is specified, detach any active vnodes
1389: * that are found.
1390: */
1.30 mycroft 1391: #ifdef DEBUG
1392: int busyprt = 0; /* print out busy vnodes */
1393: struct ctldebug debug1 = { "busyprt", &busyprt };
1394: #endif
1.29 cgd 1395:
1.50 christos 1396: int
1.29 cgd 1397: vflush(mp, skipvp, flags)
1398: struct mount *mp;
1399: struct vnode *skipvp;
1400: int flags;
1401: {
1.80 fvdl 1402: struct proc *p = curproc; /* XXX */
1.123 augustss 1403: struct vnode *vp, *nvp;
1.29 cgd 1404: int busy = 0;
1405:
1.80 fvdl 1406: simple_lock(&mntvnode_slock);
1.29 cgd 1407: loop:
1408: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1409: if (vp->v_mount != mp)
1410: goto loop;
1411: nvp = vp->v_mntvnodes.le_next;
1412: /*
1413: * Skip over a selected vnode.
1414: */
1415: if (vp == skipvp)
1416: continue;
1.80 fvdl 1417: simple_lock(&vp->v_interlock);
1.29 cgd 1418: /*
1419: * Skip over a vnodes marked VSYSTEM.
1420: */
1.80 fvdl 1421: if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1422: simple_unlock(&vp->v_interlock);
1.29 cgd 1423: continue;
1.80 fvdl 1424: }
1.29 cgd 1425: /*
1.30 mycroft 1426: * If WRITECLOSE is set, only flush out regular file
1427: * vnodes open for writing.
1428: */
1429: if ((flags & WRITECLOSE) &&
1.92 thorpej 1430: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1431: simple_unlock(&vp->v_interlock);
1.30 mycroft 1432: continue;
1.92 thorpej 1433: }
1.30 mycroft 1434: /*
1.29 cgd 1435: * With v_usecount == 0, all we need to do is clear
1436: * out the vnode data structures and we are done.
1437: */
1438: if (vp->v_usecount == 0) {
1.80 fvdl 1439: simple_unlock(&mntvnode_slock);
1440: vgonel(vp, p);
1441: simple_lock(&mntvnode_slock);
1.29 cgd 1442: continue;
1443: }
1444: /*
1.30 mycroft 1445: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 1446: * For block or character devices, revert to an
1447: * anonymous device. For all other files, just kill them.
1448: */
1449: if (flags & FORCECLOSE) {
1.80 fvdl 1450: simple_unlock(&mntvnode_slock);
1.29 cgd 1451: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.80 fvdl 1452: vgonel(vp, p);
1.29 cgd 1453: } else {
1.80 fvdl 1454: vclean(vp, 0, p);
1.30 mycroft 1455: vp->v_op = spec_vnodeop_p;
1.29 cgd 1456: insmntque(vp, (struct mount *)0);
1457: }
1.80 fvdl 1458: simple_lock(&mntvnode_slock);
1.29 cgd 1459: continue;
1460: }
1.30 mycroft 1461: #ifdef DEBUG
1.29 cgd 1462: if (busyprt)
1463: vprint("vflush: busy vnode", vp);
1.30 mycroft 1464: #endif
1.80 fvdl 1465: simple_unlock(&vp->v_interlock);
1.29 cgd 1466: busy++;
1467: }
1.80 fvdl 1468: simple_unlock(&mntvnode_slock);
1.29 cgd 1469: if (busy)
1470: return (EBUSY);
1471: return (0);
1472: }
1473:
1474: /*
1475: * Disassociate the underlying file system from a vnode.
1476: */
1477: void
1.80 fvdl 1478: vclean(vp, flags, p)
1.123 augustss 1479: struct vnode *vp;
1.29 cgd 1480: int flags;
1.80 fvdl 1481: struct proc *p;
1.29 cgd 1482: {
1483: int active;
1484:
1485: /*
1486: * Check to see if the vnode is in use.
1487: * If so we have to reference it before we clean it out
1488: * so that its count cannot fall to zero and generate a
1489: * race against ourselves to recycle it.
1490: */
1.112 mycroft 1491: if ((active = vp->v_usecount) != 0) {
1.87 pk 1492: /* We have the vnode interlock. */
1493: vp->v_usecount++;
1.112 mycroft 1494: #ifdef DIAGNOSTIC
1495: if (vp->v_usecount == 0) {
1496: vprint("vclean", vp);
1497: panic("vclean: usecount overflow");
1498: }
1499: #endif
1500: }
1.87 pk 1501:
1.29 cgd 1502: /*
1503: * Prevent the vnode from being recycled or
1504: * brought into use while we clean it out.
1505: */
1506: if (vp->v_flag & VXLOCK)
1.125 chs 1507: panic("vclean: deadlock, vp %p", vp);
1.29 cgd 1508: vp->v_flag |= VXLOCK;
1.147 chs 1509: if (vp->v_flag & VTEXT) {
1510: uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
1511: uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
1512: }
1.142 chs 1513: vp->v_flag &= ~VTEXT;
1514:
1.29 cgd 1515: /*
1.80 fvdl 1516: * Even if the count is zero, the VOP_INACTIVE routine may still
1517: * have the object locked while it cleans it out. The VOP_LOCK
1518: * ensures that the VOP_INACTIVE routine is done with its work.
1519: * For active vnodes, it ensures that no other activity can
1520: * occur while the underlying object is being cleaned out.
1521: */
1522: VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1523:
1.98 wrstuden 1524: /*
1.142 chs 1525: * Clean out any cached data associated with the vnode.
1.29 cgd 1526: */
1527: if (flags & DOCLOSE)
1.80 fvdl 1528: vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1529:
1.29 cgd 1530: /*
1.30 mycroft 1531: * If purging an active vnode, it must be closed and
1.80 fvdl 1532: * deactivated before being reclaimed. Note that the
1533: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1534: */
1535: if (active) {
1536: if (flags & DOCLOSE)
1.86 pk 1537: VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1.80 fvdl 1538: VOP_INACTIVE(vp, p);
1539: } else {
1540: /*
1541: * Any other processes trying to obtain this lock must first
1542: * wait for VXLOCK to clear, then call the new lock operation.
1543: */
1544: VOP_UNLOCK(vp, 0);
1.29 cgd 1545: }
1546: /*
1547: * Reclaim the vnode.
1548: */
1.80 fvdl 1549: if (VOP_RECLAIM(vp, p))
1.125 chs 1550: panic("vclean: cannot reclaim, vp %p", vp);
1.87 pk 1551: if (active) {
1552: /*
1553: * Inline copy of vrele() since VOP_INACTIVE
1554: * has already been called.
1555: */
1556: simple_lock(&vp->v_interlock);
1557: if (--vp->v_usecount <= 0) {
1558: #ifdef DIAGNOSTIC
1559: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1560: vprint("vclean: bad ref count", vp);
1561: panic("vclean: ref cnt");
1562: }
1563: #endif
1564: /*
1565: * Insert at tail of LRU list.
1566: */
1.142 chs 1567:
1.113 fvdl 1568: simple_unlock(&vp->v_interlock);
1.87 pk 1569: simple_lock(&vnode_free_list_slock);
1.104 wrstuden 1570: #ifdef DIAGNOSTIC
1571: if (vp->v_vnlock) {
1572: if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1573: vprint("vclean: lock not drained", vp);
1574: }
1.113 fvdl 1575: if (vp->v_holdcnt > 0)
1.125 chs 1576: panic("vclean: not clean, vp %p", vp);
1.104 wrstuden 1577: #endif
1.87 pk 1578: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1579: simple_unlock(&vnode_free_list_slock);
1.113 fvdl 1580: } else
1581: simple_unlock(&vp->v_interlock);
1.87 pk 1582: }
1.30 mycroft 1583:
1.80 fvdl 1584: cache_purge(vp);
1585:
1.29 cgd 1586: /*
1.30 mycroft 1587: * Done with purge, notify sleepers of the grim news.
1.29 cgd 1588: */
1.30 mycroft 1589: vp->v_op = dead_vnodeop_p;
1590: vp->v_tag = VT_NON;
1.139 enami 1591: simple_lock(&vp->v_interlock);
1.29 cgd 1592: vp->v_flag &= ~VXLOCK;
1593: if (vp->v_flag & VXWANT) {
1594: vp->v_flag &= ~VXWANT;
1.139 enami 1595: simple_unlock(&vp->v_interlock);
1.29 cgd 1596: wakeup((caddr_t)vp);
1.139 enami 1597: } else
1598: simple_unlock(&vp->v_interlock);
1.29 cgd 1599: }
1600:
1601: /*
1.80 fvdl 1602: * Recycle an unused vnode to the front of the free list.
1603: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1604: */
1.80 fvdl 1605: int
1606: vrecycle(vp, inter_lkp, p)
1607: struct vnode *vp;
1608: struct simplelock *inter_lkp;
1609: struct proc *p;
1610: {
1611:
1612: simple_lock(&vp->v_interlock);
1613: if (vp->v_usecount == 0) {
1614: if (inter_lkp)
1615: simple_unlock(inter_lkp);
1616: vgonel(vp, p);
1617: return (1);
1.29 cgd 1618: }
1.80 fvdl 1619: simple_unlock(&vp->v_interlock);
1620: return (0);
1.29 cgd 1621: }
1622:
1623: /*
1624: * Eliminate all activity associated with a vnode
1625: * in preparation for reuse.
1626: */
1627: void
1628: vgone(vp)
1.80 fvdl 1629: struct vnode *vp;
1630: {
1631: struct proc *p = curproc; /* XXX */
1632:
1633: simple_lock(&vp->v_interlock);
1634: vgonel(vp, p);
1635: }
1636:
1637: /*
1638: * vgone, with the vp interlock held.
1639: */
1640: void
1641: vgonel(vp, p)
1.123 augustss 1642: struct vnode *vp;
1.80 fvdl 1643: struct proc *p;
1.29 cgd 1644: {
1.80 fvdl 1645: struct vnode *vq;
1.29 cgd 1646: struct vnode *vx;
1647:
1648: /*
1649: * If a vgone (or vclean) is already in progress,
1650: * wait until it is done and return.
1651: */
1652: if (vp->v_flag & VXLOCK) {
1653: vp->v_flag |= VXWANT;
1.134 sommerfe 1654: ltsleep((caddr_t)vp, PINOD | PNORELOCK,
1655: "vgone", 0, &vp->v_interlock);
1.29 cgd 1656: return;
1657: }
1658: /*
1659: * Clean out the filesystem specific data.
1660: */
1.80 fvdl 1661: vclean(vp, DOCLOSE, p);
1.29 cgd 1662: /*
1663: * Delete from old mount point vnode list, if on one.
1664: */
1.80 fvdl 1665: if (vp->v_mount != NULL)
1666: insmntque(vp, (struct mount *)0);
1.29 cgd 1667: /*
1668: * If special device, remove it from special device alias list.
1.80 fvdl 1669: * if it is on one.
1.29 cgd 1670: */
1.80 fvdl 1671: if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1672: simple_lock(&spechash_slock);
1.110 wrstuden 1673: if (vp->v_hashchain != NULL) {
1674: if (*vp->v_hashchain == vp) {
1675: *vp->v_hashchain = vp->v_specnext;
1676: } else {
1677: for (vq = *vp->v_hashchain; vq;
1678: vq = vq->v_specnext) {
1679: if (vq->v_specnext != vp)
1680: continue;
1681: vq->v_specnext = vp->v_specnext;
1682: break;
1683: }
1684: if (vq == NULL)
1685: panic("missing bdev");
1.29 cgd 1686: }
1.110 wrstuden 1687: if (vp->v_flag & VALIASED) {
1688: vx = NULL;
1689: for (vq = *vp->v_hashchain; vq;
1690: vq = vq->v_specnext) {
1691: if (vq->v_rdev != vp->v_rdev ||
1692: vq->v_type != vp->v_type)
1693: continue;
1694: if (vx)
1695: break;
1696: vx = vq;
1697: }
1698: if (vx == NULL)
1699: panic("missing alias");
1700: if (vq == NULL)
1701: vx->v_flag &= ~VALIASED;
1702: vp->v_flag &= ~VALIASED;
1.29 cgd 1703: }
1704: }
1.80 fvdl 1705: simple_unlock(&spechash_slock);
1.29 cgd 1706: FREE(vp->v_specinfo, M_VNODE);
1707: vp->v_specinfo = NULL;
1708: }
1709: /*
1.30 mycroft 1710: * If it is on the freelist and not already at the head,
1711: * move it to the head of the list. The test of the back
1712: * pointer and the reference count of zero is because
1713: * it will be removed from the free list by getnewvnode,
1714: * but will not have its reference count incremented until
1715: * after calling vgone. If the reference count were
1716: * incremented first, vgone would (incorrectly) try to
1717: * close the previous instance of the underlying object.
1718: * So, the back pointer is explicitly set to `0xdeadb' in
1719: * getnewvnode after removing it from the freelist to ensure
1720: * that we do not try to move it here.
1.29 cgd 1721: */
1.80 fvdl 1722: if (vp->v_usecount == 0) {
1723: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1724: if (vp->v_holdcnt > 0)
1.125 chs 1725: panic("vgonel: not clean, vp %p", vp);
1.80 fvdl 1726: if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1.113 fvdl 1727: TAILQ_FIRST(&vnode_free_list) != vp) {
1.80 fvdl 1728: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1729: TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1730: }
1731: simple_unlock(&vnode_free_list_slock);
1.29 cgd 1732: }
1733: vp->v_type = VBAD;
1734: }
1735:
1736: /*
1737: * Lookup a vnode by device number.
1738: */
1.50 christos 1739: int
1.29 cgd 1740: vfinddev(dev, type, vpp)
1741: dev_t dev;
1742: enum vtype type;
1743: struct vnode **vpp;
1744: {
1.80 fvdl 1745: struct vnode *vp;
1746: int rc = 0;
1.29 cgd 1747:
1.80 fvdl 1748: simple_lock(&spechash_slock);
1.29 cgd 1749: for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1750: if (dev != vp->v_rdev || type != vp->v_type)
1751: continue;
1752: *vpp = vp;
1.80 fvdl 1753: rc = 1;
1754: break;
1.29 cgd 1755: }
1.80 fvdl 1756: simple_unlock(&spechash_slock);
1757: return (rc);
1.96 thorpej 1758: }
1759:
1760: /*
1761: * Revoke all the vnodes corresponding to the specified minor number
1762: * range (endpoints inclusive) of the specified major.
1763: */
1764: void
1765: vdevgone(maj, minl, minh, type)
1766: int maj, minl, minh;
1767: enum vtype type;
1768: {
1769: struct vnode *vp;
1770: int mn;
1771:
1772: for (mn = minl; mn <= minh; mn++)
1773: if (vfinddev(makedev(maj, mn), type, &vp))
1774: VOP_REVOKE(vp, REVOKEALL);
1.29 cgd 1775: }
1776:
1777: /*
1778: * Calculate the total number of references to a special device.
1779: */
1.30 mycroft 1780: int
1.29 cgd 1781: vcount(vp)
1.123 augustss 1782: struct vnode *vp;
1.29 cgd 1783: {
1.123 augustss 1784: struct vnode *vq, *vnext;
1.29 cgd 1785: int count;
1786:
1787: loop:
1788: if ((vp->v_flag & VALIASED) == 0)
1789: return (vp->v_usecount);
1.80 fvdl 1790: simple_lock(&spechash_slock);
1.30 mycroft 1791: for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1792: vnext = vq->v_specnext;
1.29 cgd 1793: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1794: continue;
1795: /*
1796: * Alias, but not in use, so flush it out.
1797: */
1.151 wrstuden 1798: if (vq->v_usecount == 0 && vq != vp &&
1799: (vq->v_flag & VXLOCK) == 0) {
1.80 fvdl 1800: simple_unlock(&spechash_slock);
1.29 cgd 1801: vgone(vq);
1802: goto loop;
1803: }
1804: count += vq->v_usecount;
1805: }
1.80 fvdl 1806: simple_unlock(&spechash_slock);
1.29 cgd 1807: return (count);
1808: }
1809:
1810: /*
1811: * Print out a description of a vnode.
1812: */
1.146 jdolecek 1813: static const char * const typename[] =
1.29 cgd 1814: { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1815:
1816: void
1817: vprint(label, vp)
1818: char *label;
1.123 augustss 1819: struct vnode *vp;
1.29 cgd 1820: {
1821: char buf[64];
1822:
1823: if (label != NULL)
1.57 christos 1824: printf("%s: ", label);
1.142 chs 1825: printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
1.113 fvdl 1826: vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1.56 christos 1827: vp->v_holdcnt);
1.29 cgd 1828: buf[0] = '\0';
1829: if (vp->v_flag & VROOT)
1830: strcat(buf, "|VROOT");
1831: if (vp->v_flag & VTEXT)
1832: strcat(buf, "|VTEXT");
1833: if (vp->v_flag & VSYSTEM)
1834: strcat(buf, "|VSYSTEM");
1835: if (vp->v_flag & VXLOCK)
1836: strcat(buf, "|VXLOCK");
1837: if (vp->v_flag & VXWANT)
1838: strcat(buf, "|VXWANT");
1839: if (vp->v_flag & VBWAIT)
1840: strcat(buf, "|VBWAIT");
1841: if (vp->v_flag & VALIASED)
1842: strcat(buf, "|VALIASED");
1843: if (buf[0] != '\0')
1.57 christos 1844: printf(" flags (%s)", &buf[1]);
1.30 mycroft 1845: if (vp->v_data == NULL) {
1.57 christos 1846: printf("\n");
1.30 mycroft 1847: } else {
1.57 christos 1848: printf("\n\t");
1.30 mycroft 1849: VOP_PRINT(vp);
1850: }
1.29 cgd 1851: }
1852:
1853: #ifdef DEBUG
1854: /*
1855: * List all of the locked vnodes in the system.
1856: * Called when debugging the kernel.
1857: */
1.51 christos 1858: void
1.29 cgd 1859: printlockedvnodes()
1860: {
1.80 fvdl 1861: struct mount *mp, *nmp;
1862: struct vnode *vp;
1.29 cgd 1863:
1.57 christos 1864: printf("Locked vnodes\n");
1.80 fvdl 1865: simple_lock(&mountlist_slock);
1866: for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1867: if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1868: nmp = mp->mnt_list.cqe_next;
1869: continue;
1870: }
1.30 mycroft 1871: for (vp = mp->mnt_vnodelist.lh_first;
1872: vp != NULL;
1.80 fvdl 1873: vp = vp->v_mntvnodes.le_next) {
1.29 cgd 1874: if (VOP_ISLOCKED(vp))
1875: vprint((char *)0, vp);
1.80 fvdl 1876: }
1877: simple_lock(&mountlist_slock);
1878: nmp = mp->mnt_list.cqe_next;
1879: vfs_unbusy(mp);
1.29 cgd 1880: }
1.80 fvdl 1881: simple_unlock(&mountlist_slock);
1.29 cgd 1882: }
1883: #endif
1884:
1.101 mrg 1885: /*
1.80 fvdl 1886: * Top level filesystem related information gathering.
1887: */
1888: int
1889: vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1890: int *name;
1891: u_int namelen;
1892: void *oldp;
1893: size_t *oldlenp;
1894: void *newp;
1895: size_t newlen;
1896: struct proc *p;
1897: {
1.95 thorpej 1898: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80 fvdl 1899: struct vfsconf vfc;
1.154 jdolecek 1900: extern const char * const mountcompatnames[];
1901: extern int nmountcompatnames;
1.95 thorpej 1902: #endif
1.80 fvdl 1903: struct vfsops *vfsp;
1904:
1905: /* all sysctl names at this level are at least name and field */
1906: if (namelen < 2)
1907: return (ENOTDIR); /* overloaded */
1.94 thorpej 1908:
1909: /* Not generic: goes to file system. */
1.80 fvdl 1910: if (name[0] != VFS_GENERIC) {
1.155 jdolecek 1911: static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES;
1.154 jdolecek 1912: const char *vfsname;
1913:
1914: if (name[0] < 0 || name[0] > VFS_MAXID
1915: || (vfsname = vfsnames[name[0]].ctl_name) == NULL)
1.80 fvdl 1916: return (EOPNOTSUPP);
1.154 jdolecek 1917:
1918: vfsp = vfs_getopsbyname(vfsname);
1.94 thorpej 1919: if (vfsp == NULL || vfsp->vfs_sysctl == NULL)
1920: return (EOPNOTSUPP);
1.80 fvdl 1921: return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
1922: oldp, oldlenp, newp, newlen, p));
1923: }
1.94 thorpej 1924:
1925: /* The rest are generic vfs sysctls. */
1.80 fvdl 1926: switch (name[1]) {
1.117 fvdl 1927: case VFS_USERMOUNT:
1928: return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount);
1.95 thorpej 1929: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80 fvdl 1930: case VFS_MAXTYPENUM:
1.94 thorpej 1931: /*
1932: * Provided for 4.4BSD-Lite2 compatibility.
1933: */
1.80 fvdl 1934: return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
1935: case VFS_CONF:
1.94 thorpej 1936: /*
1937: * Special: a node, next is a file system name.
1938: * Provided for 4.4BSD-Lite2 compatibility.
1939: */
1.80 fvdl 1940: if (namelen < 3)
1941: return (ENOTDIR); /* overloaded */
1942: if (name[2] >= nmountcompatnames || name[2] < 0 ||
1943: mountcompatnames[name[2]] == NULL)
1944: return (EOPNOTSUPP);
1945: vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
1946: if (vfsp == NULL)
1.94 thorpej 1947: return (EOPNOTSUPP);
1948: vfc.vfc_vfsops = vfsp;
1.80 fvdl 1949: strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
1950: vfc.vfc_typenum = name[2];
1.94 thorpej 1951: vfc.vfc_refcount = vfsp->vfs_refcount;
1.80 fvdl 1952: vfc.vfc_flags = 0;
1953: vfc.vfc_mountroot = vfsp->vfs_mountroot;
1954: vfc.vfc_next = NULL;
1955: return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
1956: sizeof(struct vfsconf)));
1.95 thorpej 1957: #endif
1958: default:
1959: break;
1.80 fvdl 1960: }
1961: return (EOPNOTSUPP);
1962: }
1963:
1.29 cgd 1964: int kinfo_vdebug = 1;
1965: int kinfo_vgetfailed;
1966: #define KINFO_VNODESLOP 10
1967: /*
1968: * Dump vnode list (via sysctl).
1969: * Copyout address of vnode followed by vnode.
1970: */
1971: /* ARGSUSED */
1.50 christos 1972: int
1.80 fvdl 1973: sysctl_vnode(where, sizep, p)
1.29 cgd 1974: char *where;
1975: size_t *sizep;
1.80 fvdl 1976: struct proc *p;
1.29 cgd 1977: {
1.80 fvdl 1978: struct mount *mp, *nmp;
1979: struct vnode *nvp, *vp;
1980: char *bp = where, *savebp;
1.29 cgd 1981: char *ewhere;
1982: int error;
1983:
1.90 perry 1984: #define VPTRSZ sizeof(struct vnode *)
1985: #define VNODESZ sizeof(struct vnode)
1.29 cgd 1986: if (where == NULL) {
1987: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1988: return (0);
1989: }
1990: ewhere = where + *sizep;
1.80 fvdl 1991:
1992: simple_lock(&mountlist_slock);
1.38 mycroft 1993: for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1.80 fvdl 1994: if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1995: nmp = mp->mnt_list.cqe_next;
1.29 cgd 1996: continue;
1.80 fvdl 1997: }
1.29 cgd 1998: savebp = bp;
1999: again:
1.80 fvdl 2000: simple_lock(&mntvnode_slock);
1.29 cgd 2001: for (vp = mp->mnt_vnodelist.lh_first;
2002: vp != NULL;
1.80 fvdl 2003: vp = nvp) {
1.29 cgd 2004: /*
2005: * Check that the vp is still associated with
2006: * this filesystem. RACE: could have been
2007: * recycled onto the same filesystem.
2008: */
2009: if (vp->v_mount != mp) {
1.80 fvdl 2010: simple_unlock(&mntvnode_slock);
1.29 cgd 2011: if (kinfo_vdebug)
1.57 christos 2012: printf("kinfo: vp changed\n");
1.29 cgd 2013: bp = savebp;
2014: goto again;
2015: }
1.80 fvdl 2016: nvp = vp->v_mntvnodes.le_next;
1.29 cgd 2017: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80 fvdl 2018: simple_unlock(&mntvnode_slock);
1.29 cgd 2019: *sizep = bp - where;
2020: return (ENOMEM);
2021: }
1.80 fvdl 2022: simple_unlock(&mntvnode_slock);
1.29 cgd 2023: if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2024: (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2025: return (error);
2026: bp += VPTRSZ + VNODESZ;
1.80 fvdl 2027: simple_lock(&mntvnode_slock);
1.29 cgd 2028: }
1.80 fvdl 2029: simple_unlock(&mntvnode_slock);
2030: simple_lock(&mountlist_slock);
2031: nmp = mp->mnt_list.cqe_next;
1.29 cgd 2032: vfs_unbusy(mp);
2033: }
1.80 fvdl 2034: simple_unlock(&mountlist_slock);
1.29 cgd 2035:
2036: *sizep = bp - where;
2037: return (0);
1.30 mycroft 2038: }
2039:
2040: /*
2041: * Check to see if a filesystem is mounted on a block device.
2042: */
2043: int
2044: vfs_mountedon(vp)
1.80 fvdl 2045: struct vnode *vp;
1.30 mycroft 2046: {
1.80 fvdl 2047: struct vnode *vq;
2048: int error = 0;
1.30 mycroft 2049:
1.113 fvdl 2050: if (vp->v_specmountpoint != NULL)
1.30 mycroft 2051: return (EBUSY);
2052: if (vp->v_flag & VALIASED) {
1.80 fvdl 2053: simple_lock(&spechash_slock);
1.30 mycroft 2054: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2055: if (vq->v_rdev != vp->v_rdev ||
2056: vq->v_type != vp->v_type)
2057: continue;
1.113 fvdl 2058: if (vq->v_specmountpoint != NULL) {
1.80 fvdl 2059: error = EBUSY;
2060: break;
2061: }
1.30 mycroft 2062: }
1.80 fvdl 2063: simple_unlock(&spechash_slock);
1.30 mycroft 2064: }
1.80 fvdl 2065: return (error);
1.30 mycroft 2066: }
2067:
2068: /*
2069: * Build hash lists of net addresses and hang them off the mount point.
2070: * Called by ufs_mount() to set up the lists of export addresses.
2071: */
2072: static int
2073: vfs_hang_addrlist(mp, nep, argp)
2074: struct mount *mp;
2075: struct netexport *nep;
2076: struct export_args *argp;
2077: {
1.123 augustss 2078: struct netcred *np, *enp;
2079: struct radix_node_head *rnh;
2080: int i;
1.30 mycroft 2081: struct radix_node *rn;
2082: struct sockaddr *saddr, *smask = 0;
2083: struct domain *dom;
2084: int error;
2085:
2086: if (argp->ex_addrlen == 0) {
2087: if (mp->mnt_flag & MNT_DEFEXPORTED)
2088: return (EPERM);
2089: np = &nep->ne_defexported;
2090: np->netc_exflags = argp->ex_flags;
2091: np->netc_anon = argp->ex_anon;
2092: np->netc_anon.cr_ref = 1;
2093: mp->mnt_flag |= MNT_DEFEXPORTED;
2094: return (0);
2095: }
1.156 jdolecek 2096:
2097: if (argp->ex_addrlen > MLEN)
2098: return (EINVAL);
2099:
1.30 mycroft 2100: i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2101: np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1.91 perry 2102: memset((caddr_t)np, 0, i);
1.30 mycroft 2103: saddr = (struct sockaddr *)(np + 1);
1.50 christos 2104: error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
2105: if (error)
1.30 mycroft 2106: goto out;
2107: if (saddr->sa_len > argp->ex_addrlen)
2108: saddr->sa_len = argp->ex_addrlen;
2109: if (argp->ex_masklen) {
2110: smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1.66 mycroft 2111: error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1.30 mycroft 2112: if (error)
2113: goto out;
2114: if (smask->sa_len > argp->ex_masklen)
2115: smask->sa_len = argp->ex_masklen;
2116: }
2117: i = saddr->sa_family;
2118: if ((rnh = nep->ne_rtable[i]) == 0) {
2119: /*
2120: * Seems silly to initialize every AF when most are not
2121: * used, do so on demand here
2122: */
2123: for (dom = domains; dom; dom = dom->dom_next)
2124: if (dom->dom_family == i && dom->dom_rtattach) {
2125: dom->dom_rtattach((void **)&nep->ne_rtable[i],
2126: dom->dom_rtoffset);
2127: break;
2128: }
2129: if ((rnh = nep->ne_rtable[i]) == 0) {
2130: error = ENOBUFS;
2131: goto out;
2132: }
2133: }
2134: rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2135: np->netc_rnodes);
2136: if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1.72 fvdl 2137: if (rn == 0) {
2138: enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
2139: smask, rnh);
2140: if (enp == 0) {
2141: error = EPERM;
2142: goto out;
2143: }
2144: } else
2145: enp = (struct netcred *)rn;
2146:
2147: if (enp->netc_exflags != argp->ex_flags ||
2148: enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
2149: enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
2150: enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups ||
1.91 perry 2151: memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
1.72 fvdl 2152: enp->netc_anon.cr_ngroups))
2153: error = EPERM;
2154: else
2155: error = 0;
1.30 mycroft 2156: goto out;
2157: }
2158: np->netc_exflags = argp->ex_flags;
2159: np->netc_anon = argp->ex_anon;
2160: np->netc_anon.cr_ref = 1;
2161: return (0);
2162: out:
2163: free(np, M_NETADDR);
2164: return (error);
2165: }
2166:
2167: /* ARGSUSED */
2168: static int
2169: vfs_free_netcred(rn, w)
2170: struct radix_node *rn;
1.50 christos 2171: void *w;
1.30 mycroft 2172: {
1.123 augustss 2173: struct radix_node_head *rnh = (struct radix_node_head *)w;
1.30 mycroft 2174:
2175: (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2176: free((caddr_t)rn, M_NETADDR);
2177: return (0);
2178: }
2179:
2180: /*
2181: * Free the net address hash lists that are hanging off the mount points.
2182: */
2183: static void
2184: vfs_free_addrlist(nep)
2185: struct netexport *nep;
2186: {
1.123 augustss 2187: int i;
2188: struct radix_node_head *rnh;
1.30 mycroft 2189:
2190: for (i = 0; i <= AF_MAX; i++)
1.50 christos 2191: if ((rnh = nep->ne_rtable[i]) != NULL) {
2192: (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1.30 mycroft 2193: free((caddr_t)rnh, M_RTABLE);
2194: nep->ne_rtable[i] = 0;
2195: }
2196: }
2197:
2198: int
2199: vfs_export(mp, nep, argp)
2200: struct mount *mp;
2201: struct netexport *nep;
2202: struct export_args *argp;
2203: {
2204: int error;
2205:
2206: if (argp->ex_flags & MNT_DELEXPORT) {
1.71 fvdl 2207: if (mp->mnt_flag & MNT_EXPUBLIC) {
2208: vfs_setpublicfs(NULL, NULL, NULL);
2209: mp->mnt_flag &= ~MNT_EXPUBLIC;
2210: }
1.30 mycroft 2211: vfs_free_addrlist(nep);
2212: mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2213: }
2214: if (argp->ex_flags & MNT_EXPORTED) {
1.71 fvdl 2215: if (argp->ex_flags & MNT_EXPUBLIC) {
2216: if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2217: return (error);
2218: mp->mnt_flag |= MNT_EXPUBLIC;
2219: }
1.50 christos 2220: if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1.30 mycroft 2221: return (error);
2222: mp->mnt_flag |= MNT_EXPORTED;
2223: }
1.71 fvdl 2224: return (0);
2225: }
2226:
2227: /*
2228: * Set the publicly exported filesystem (WebNFS). Currently, only
2229: * one public filesystem is possible in the spec (RFC 2054 and 2055)
2230: */
2231: int
2232: vfs_setpublicfs(mp, nep, argp)
2233: struct mount *mp;
2234: struct netexport *nep;
2235: struct export_args *argp;
2236: {
2237: int error;
2238: struct vnode *rvp;
2239: char *cp;
2240:
2241: /*
2242: * mp == NULL -> invalidate the current info, the FS is
2243: * no longer exported. May be called from either vfs_export
2244: * or unmount, so check if it hasn't already been done.
2245: */
2246: if (mp == NULL) {
2247: if (nfs_pub.np_valid) {
2248: nfs_pub.np_valid = 0;
2249: if (nfs_pub.np_index != NULL) {
2250: FREE(nfs_pub.np_index, M_TEMP);
2251: nfs_pub.np_index = NULL;
2252: }
2253: }
2254: return (0);
2255: }
2256:
2257: /*
2258: * Only one allowed at a time.
2259: */
2260: if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2261: return (EBUSY);
2262:
2263: /*
2264: * Get real filehandle for root of exported FS.
2265: */
1.91 perry 2266: memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
1.71 fvdl 2267: nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2268:
2269: if ((error = VFS_ROOT(mp, &rvp)))
2270: return (error);
2271:
2272: if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2273: return (error);
2274:
2275: vput(rvp);
2276:
2277: /*
2278: * If an indexfile was specified, pull it in.
2279: */
2280: if (argp->ex_indexfile != NULL) {
2281: MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2282: M_WAITOK);
2283: error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2284: MAXNAMLEN, (size_t *)0);
2285: if (!error) {
2286: /*
2287: * Check for illegal filenames.
2288: */
2289: for (cp = nfs_pub.np_index; *cp; cp++) {
2290: if (*cp == '/') {
2291: error = EINVAL;
2292: break;
2293: }
2294: }
2295: }
2296: if (error) {
2297: FREE(nfs_pub.np_index, M_TEMP);
2298: return (error);
2299: }
2300: }
2301:
2302: nfs_pub.np_mount = mp;
2303: nfs_pub.np_valid = 1;
1.30 mycroft 2304: return (0);
2305: }
2306:
2307: struct netcred *
2308: vfs_export_lookup(mp, nep, nam)
1.123 augustss 2309: struct mount *mp;
1.30 mycroft 2310: struct netexport *nep;
2311: struct mbuf *nam;
2312: {
1.123 augustss 2313: struct netcred *np;
2314: struct radix_node_head *rnh;
1.30 mycroft 2315: struct sockaddr *saddr;
2316:
2317: np = NULL;
2318: if (mp->mnt_flag & MNT_EXPORTED) {
2319: /*
2320: * Lookup in the export list first.
2321: */
2322: if (nam != NULL) {
2323: saddr = mtod(nam, struct sockaddr *);
2324: rnh = nep->ne_rtable[saddr->sa_family];
2325: if (rnh != NULL) {
2326: np = (struct netcred *)
2327: (*rnh->rnh_matchaddr)((caddr_t)saddr,
2328: rnh);
2329: if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2330: np = NULL;
2331: }
2332: }
2333: /*
2334: * If no address match, use the default if it exists.
2335: */
2336: if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2337: np = &nep->ne_defexported;
2338: }
2339: return (np);
1.35 ws 2340: }
2341:
2342: /*
2343: * Do the usual access checking.
2344: * file_mode, uid and gid are from the vnode in question,
2345: * while acc_mode and cred are from the VOP_ACCESS parameter list
2346: */
1.41 mycroft 2347: int
1.68 mycroft 2348: vaccess(type, file_mode, uid, gid, acc_mode, cred)
2349: enum vtype type;
1.35 ws 2350: mode_t file_mode;
2351: uid_t uid;
2352: gid_t gid;
2353: mode_t acc_mode;
2354: struct ucred *cred;
2355: {
2356: mode_t mask;
2357:
1.64 mycroft 2358: /*
2359: * Super-user always gets read/write access, but execute access depends
2360: * on at least one execute bit being set.
2361: */
2362: if (cred->cr_uid == 0) {
1.69 mycroft 2363: if ((acc_mode & VEXEC) && type != VDIR &&
1.68 mycroft 2364: (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1.64 mycroft 2365: return (EACCES);
1.68 mycroft 2366: return (0);
1.64 mycroft 2367: }
1.35 ws 2368:
2369: mask = 0;
2370:
2371: /* Otherwise, check the owner. */
2372: if (cred->cr_uid == uid) {
1.68 mycroft 2373: if (acc_mode & VEXEC)
1.35 ws 2374: mask |= S_IXUSR;
2375: if (acc_mode & VREAD)
2376: mask |= S_IRUSR;
2377: if (acc_mode & VWRITE)
2378: mask |= S_IWUSR;
1.64 mycroft 2379: return ((file_mode & mask) == mask ? 0 : EACCES);
1.35 ws 2380: }
2381:
2382: /* Otherwise, check the groups. */
1.44 jtc 2383: if (cred->cr_gid == gid || groupmember(gid, cred)) {
1.68 mycroft 2384: if (acc_mode & VEXEC)
1.35 ws 2385: mask |= S_IXGRP;
2386: if (acc_mode & VREAD)
2387: mask |= S_IRGRP;
2388: if (acc_mode & VWRITE)
2389: mask |= S_IWGRP;
1.64 mycroft 2390: return ((file_mode & mask) == mask ? 0 : EACCES);
1.35 ws 2391: }
2392:
2393: /* Otherwise, check everyone else. */
1.68 mycroft 2394: if (acc_mode & VEXEC)
1.35 ws 2395: mask |= S_IXOTH;
2396: if (acc_mode & VREAD)
2397: mask |= S_IROTH;
2398: if (acc_mode & VWRITE)
2399: mask |= S_IWOTH;
1.64 mycroft 2400: return ((file_mode & mask) == mask ? 0 : EACCES);
1.39 mycroft 2401: }
2402:
2403: /*
2404: * Unmount all file systems.
2405: * We traverse the list in reverse order under the assumption that doing so
2406: * will avoid needing to worry about dependencies.
2407: */
2408: void
1.128 sommerfe 2409: vfs_unmountall(p)
2410: struct proc *p;
1.39 mycroft 2411: {
1.123 augustss 2412: struct mount *mp, *nmp;
1.40 mycroft 2413: int allerror, error;
1.39 mycroft 2414:
2415: for (allerror = 0,
2416: mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2417: nmp = mp->mnt_list.cqe_prev;
1.54 jtk 2418: #ifdef DEBUG
1.57 christos 2419: printf("unmounting %s (%s)...\n",
1.56 christos 2420: mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54 jtk 2421: #endif
1.149 thorpej 2422: /*
2423: * XXX Freeze syncer. Must do this before locking the
2424: * mount point. See dounmount() for details.
2425: */
2426: lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
2427: if (vfs_busy(mp, 0, 0)) {
2428: lockmgr(&syncer_lock, LK_RELEASE, NULL);
1.60 fvdl 2429: continue;
1.149 thorpej 2430: }
1.80 fvdl 2431: if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
1.57 christos 2432: printf("unmount of %s failed with error %d\n",
1.40 mycroft 2433: mp->mnt_stat.f_mntonname, error);
2434: allerror = 1;
2435: }
1.39 mycroft 2436: }
2437: if (allerror)
1.57 christos 2438: printf("WARNING: some file systems would not unmount\n");
1.40 mycroft 2439: }
2440:
2441: /*
2442: * Sync and unmount file systems before shutting down.
2443: */
2444: void
2445: vfs_shutdown()
2446: {
1.123 augustss 2447: struct buf *bp;
1.138 bouyer 2448: int iter, nbusy, nbusy_prev = 0, dcount, s;
1.128 sommerfe 2449: struct proc *p = curproc;
1.40 mycroft 2450:
1.128 sommerfe 2451: /* XXX we're certainly not running in proc0's context! */
2452: if (p == NULL)
2453: p = &proc0;
2454:
1.70 cgd 2455: printf("syncing disks... ");
2456:
1.138 bouyer 2457: /* remove user process from run queue */
2458: suspendsched();
1.40 mycroft 2459: (void) spl0();
2460:
1.128 sommerfe 2461: /* avoid coming back this way again if we panic. */
2462: doing_shutdown = 1;
2463:
1.142 chs 2464: sys_sync(p, NULL, NULL);
1.40 mycroft 2465:
2466: /* Wait for sync to finish. */
1.113 fvdl 2467: dcount = 10000;
1.138 bouyer 2468: for (iter = 0; iter < 20;) {
1.40 mycroft 2469: nbusy = 0;
1.113 fvdl 2470: for (bp = &buf[nbuf]; --bp >= buf; ) {
1.133 fvdl 2471: if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.40 mycroft 2472: nbusy++;
1.113 fvdl 2473: /*
2474: * With soft updates, some buffers that are
2475: * written will be remarked as dirty until other
2476: * buffers are written.
2477: */
1.116 perseant 2478: if (bp->b_vp && bp->b_vp->v_mount
2479: && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
2480: && (bp->b_flags & B_DELWRI)) {
1.113 fvdl 2481: s = splbio();
2482: bremfree(bp);
2483: bp->b_flags |= B_BUSY;
2484: splx(s);
2485: nbusy++;
2486: bawrite(bp);
2487: if (dcount-- <= 0) {
2488: printf("softdep ");
2489: goto fail;
2490: }
2491: }
2492: }
1.40 mycroft 2493: if (nbusy == 0)
2494: break;
1.138 bouyer 2495: if (nbusy_prev == 0)
2496: nbusy_prev = nbusy;
1.57 christos 2497: printf("%d ", nbusy);
1.138 bouyer 2498: tsleep(&nbusy, PRIBIO, "bflush",
2499: (iter == 0) ? 1 : hz / 25 * iter);
2500: if (nbusy >= nbusy_prev) /* we didn't flush anything */
2501: iter++;
2502: else
2503: nbusy_prev = nbusy;
1.40 mycroft 2504: }
1.73 thorpej 2505: if (nbusy) {
1.113 fvdl 2506: fail:
1.124 augustss 2507: #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
1.108 simonb 2508: printf("giving up\nPrinting vnodes for busy buffers\n");
2509: for (bp = &buf[nbuf]; --bp >= buf; )
1.133 fvdl 2510: if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.109 thorpej 2511: vprint(NULL, bp->b_vp);
1.124 augustss 2512:
2513: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2514: Debugger();
2515: #endif
2516:
2517: #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.57 christos 2518: printf("giving up\n");
1.124 augustss 2519: #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.84 thorpej 2520: return;
1.73 thorpej 2521: } else
1.57 christos 2522: printf("done\n");
1.73 thorpej 2523:
1.84 thorpej 2524: /*
2525: * If we've panic'd, don't make the situation potentially
2526: * worse by unmounting the file systems.
2527: */
2528: if (panicstr != NULL)
2529: return;
2530:
2531: /* Release inodes held by texts before update. */
1.73 thorpej 2532: #ifdef notdef
1.84 thorpej 2533: vnshutdown();
1.73 thorpej 2534: #endif
1.84 thorpej 2535: /* Unmount file systems. */
1.128 sommerfe 2536: vfs_unmountall(p);
1.58 thorpej 2537: }
2538:
2539: /*
2540: * Mount the root file system. If the operator didn't specify a
2541: * file system to use, try all possible file systems until one
2542: * succeeds.
2543: */
2544: int
2545: vfs_mountroot()
2546: {
2547: extern int (*mountroot) __P((void));
1.79 thorpej 2548: struct vfsops *v;
1.58 thorpej 2549:
2550: if (root_device == NULL)
2551: panic("vfs_mountroot: root device unknown");
2552:
2553: switch (root_device->dv_class) {
2554: case DV_IFNET:
2555: if (rootdev != NODEV)
2556: panic("vfs_mountroot: rootdev set for DV_IFNET");
2557: break;
2558:
2559: case DV_DISK:
2560: if (rootdev == NODEV)
2561: panic("vfs_mountroot: rootdev not set for DV_DISK");
2562: break;
2563:
2564: default:
2565: printf("%s: inappropriate for root file system\n",
2566: root_device->dv_xname);
2567: return (ENODEV);
2568: }
2569:
2570: /*
2571: * If user specified a file system, use it.
2572: */
2573: if (mountroot != NULL)
2574: return ((*mountroot)());
2575:
2576: /*
2577: * Try each file system currently configured into the kernel.
2578: */
1.79 thorpej 2579: for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2580: if (v->vfs_mountroot == NULL)
1.58 thorpej 2581: continue;
2582: #ifdef DEBUG
1.79 thorpej 2583: printf("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 2584: #endif
1.79 thorpej 2585: if ((*v->vfs_mountroot)() == 0) {
2586: printf("root file system type: %s\n", v->vfs_name);
2587: break;
1.58 thorpej 2588: }
2589: }
2590:
1.79 thorpej 2591: if (v == NULL) {
2592: printf("no file system for %s", root_device->dv_xname);
2593: if (root_device->dv_class == DV_DISK)
2594: printf(" (dev 0x%x)", rootdev);
2595: printf("\n");
2596: return (EFTYPE);
2597: }
2598: return (0);
1.58 thorpej 2599: }
2600:
2601: /*
2602: * Given a file system name, look up the vfsops for that
2603: * file system, or return NULL if file system isn't present
2604: * in the kernel.
2605: */
2606: struct vfsops *
2607: vfs_getopsbyname(name)
2608: const char *name;
2609: {
1.79 thorpej 2610: struct vfsops *v;
2611:
2612: for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2613: if (strcmp(v->vfs_name, name) == 0)
2614: break;
2615: }
2616:
2617: return (v);
2618: }
2619:
2620: /*
2621: * Establish a file system and initialize it.
2622: */
2623: int
2624: vfs_attach(vfs)
2625: struct vfsops *vfs;
2626: {
2627: struct vfsops *v;
2628: int error = 0;
2629:
1.58 thorpej 2630:
1.79 thorpej 2631: /*
2632: * Make sure this file system doesn't already exist.
2633: */
1.157 ! chs 2634: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2635: if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2636: error = EEXIST;
2637: goto out;
2638: }
2639: }
2640:
2641: /*
2642: * Initialize the vnode operations for this file system.
2643: */
2644: vfs_opv_init(vfs->vfs_opv_descs);
2645:
2646: /*
2647: * Now initialize the file system itself.
2648: */
2649: (*vfs->vfs_init)();
2650:
2651: /*
2652: * ...and link it into the kernel's list.
2653: */
2654: LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2655:
2656: /*
2657: * Sanity: make sure the reference count is 0.
2658: */
2659: vfs->vfs_refcount = 0;
2660:
2661: out:
2662: return (error);
2663: }
2664:
2665: /*
2666: * Remove a file system from the kernel.
2667: */
2668: int
2669: vfs_detach(vfs)
2670: struct vfsops *vfs;
2671: {
2672: struct vfsops *v;
2673:
2674: /*
2675: * Make sure no one is using the filesystem.
2676: */
2677: if (vfs->vfs_refcount != 0)
2678: return (EBUSY);
2679:
2680: /*
2681: * ...and remove it from the kernel's list.
2682: */
1.157 ! chs 2683: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2684: if (v == vfs) {
2685: LIST_REMOVE(v, vfs_list);
2686: break;
2687: }
2688: }
2689:
2690: if (v == NULL)
2691: return (ESRCH);
1.121 jdolecek 2692:
2693: /*
2694: * Now run the file system-specific cleanups.
2695: */
2696: (*vfs->vfs_done)();
1.79 thorpej 2697:
2698: /*
2699: * Free the vnode operations vector.
2700: */
2701: vfs_opv_free(vfs->vfs_opv_descs);
2702: return (0);
1.157 ! chs 2703: }
! 2704:
! 2705: void
! 2706: vfs_reinit(void)
! 2707: {
! 2708: struct vfsops *vfs;
! 2709:
! 2710: LIST_FOREACH(vfs, &vfs_list, vfs_list) {
! 2711: if (vfs->vfs_reinit) {
! 2712: (*vfs->vfs_reinit)();
! 2713: }
! 2714: }
1.29 cgd 2715: }
1.125 chs 2716:
2717: #ifdef DDB
2718: const char buf_flagbits[] =
1.142 chs 2719: "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
1.125 chs 2720: "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
1.142 chs 2721: "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
2722: "\32XXX\33VFLUSH";
1.125 chs 2723:
2724: void
2725: vfs_buf_print(bp, full, pr)
2726: struct buf *bp;
2727: int full;
2728: void (*pr) __P((const char *, ...));
2729: {
2730: char buf[1024];
2731:
2732: (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n",
2733: bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
2734:
2735: bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
2736: (*pr)(" error %d flags 0x%s\n", bp->b_error, buf);
2737:
2738: (*pr)(" bufsize 0x%x bcount 0x%x resid 0x%x\n",
2739: bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.142 chs 2740: (*pr)(" data %p saveaddr %p dep %p\n",
2741: bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
1.125 chs 2742: (*pr)(" iodone %p\n", bp->b_iodone);
2743: }
2744:
2745:
2746: const char vnode_flagbits[] =
2747: "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED"
1.148 enami 2748: "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
1.125 chs 2749:
2750: const char *vnode_types[] = {
2751: "VNON",
2752: "VREG",
2753: "VDIR",
2754: "VBLK",
2755: "VCHR",
2756: "VLNK",
2757: "VSOCK",
2758: "VFIFO",
2759: "VBAD",
2760: };
2761:
2762: const char *vnode_tags[] = {
2763: "VT_NON",
2764: "VT_UFS",
2765: "VT_NFS",
2766: "VT_MFS",
2767: "VT_MSDOSFS",
2768: "VT_LFS",
2769: "VT_LOFS",
2770: "VT_FDESC",
2771: "VT_PORTAL",
2772: "VT_NULL",
2773: "VT_UMAP",
2774: "VT_KERNFS",
2775: "VT_PROCFS",
2776: "VT_AFS",
2777: "VT_ISOFS",
2778: "VT_UNION",
2779: "VT_ADOSFS",
2780: "VT_EXT2FS",
2781: "VT_CODA",
2782: "VT_FILECORE",
2783: "VT_NTFS",
2784: "VT_VFS",
2785: "VT_OVERLAY"
2786: };
2787:
2788: void
2789: vfs_vnode_print(vp, full, pr)
2790: struct vnode *vp;
2791: int full;
2792: void (*pr) __P((const char *, ...));
2793: {
1.142 chs 2794: char buf[256];
1.125 chs 2795:
2796: const char *vtype, *vtag;
2797:
2798: uvm_object_printit(&vp->v_uvm.u_obj, full, pr);
2799: bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
2800: (*pr)("\nVNODE flags %s\n", buf);
1.142 chs 2801: (*pr)("mp %p nio %d size 0x%x rwlock 0x%x glock 0x%x\n",
2802: vp->v_mount, vp->v_uvm.u_nio, (int)vp->v_uvm.u_size,
2803: vp->v_vnlock ? lockstatus(vp->v_vnlock) : 0x999,
2804: lockstatus(&vp->v_glock));
1.125 chs 2805:
2806: (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n",
2807: vp->v_data, vp->v_usecount, vp->v_writecount,
2808: vp->v_holdcnt, vp->v_numoutput);
2809:
2810: vtype = (vp->v_type >= 0 &&
2811: vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
2812: vnode_types[vp->v_type] : "UNKNOWN";
2813: vtag = (vp->v_tag >= 0 &&
2814: vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
2815: vnode_tags[vp->v_tag] : "UNKNOWN";
2816:
2817: (*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n",
2818: vtype, vp->v_type, vtag, vp->v_tag,
2819: vp->v_id, vp->v_mount, vp->v_mountedhere);
2820: (*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n",
2821: vp->v_lastr, vp->v_lastw, vp->v_lasta);
2822: (*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n",
2823: vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra);
2824:
2825: if (full) {
2826: struct buf *bp;
2827:
2828: (*pr)("clean bufs:\n");
1.142 chs 2829: LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
2830: (*pr)(" bp %p\n", bp);
1.125 chs 2831: vfs_buf_print(bp, full, pr);
2832: }
2833:
2834: (*pr)("dirty bufs:\n");
1.142 chs 2835: LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
2836: (*pr)(" bp %p\n", bp);
1.125 chs 2837: vfs_buf_print(bp, full, pr);
2838: }
2839: }
2840: }
2841: #endif
CVSweb <webmaster@jp.NetBSD.org>