Annotation of src/sys/kern/vfs_subr.c, Revision 1.249
1.249 ! elad 1: /* $NetBSD: vfs_subr.c,v 1.248 2005/06/06 12:09:19 yamt Exp $ */
1.74 thorpej 2:
3: /*-
1.243 mycroft 4: * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9: * NASA Ames Research Center.
1.243 mycroft 10: * This code is derived from software contributed to The NetBSD Foundation
11: * by Charles M. Hannum.
1.74 thorpej 12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
21: * 3. All advertising materials mentioning features or use of this software
22: * must display the following acknowledgement:
23: * This product includes software developed by the NetBSD
24: * Foundation, Inc. and its contributors.
25: * 4. Neither the name of The NetBSD Foundation nor the names of its
26: * contributors may be used to endorse or promote products derived
27: * from this software without specific prior written permission.
28: *
29: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
30: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
31: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
33: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39: * POSSIBILITY OF SUCH DAMAGE.
40: */
1.32 cgd 41:
1.29 cgd 42: /*
1.30 mycroft 43: * Copyright (c) 1989, 1993
44: * The Regents of the University of California. All rights reserved.
1.29 cgd 45: * (c) UNIX System Laboratories, Inc.
46: * All or some portions of this file are derived from material licensed
47: * to the University of California by American Telephone and Telegraph
48: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
49: * the permission of UNIX System Laboratories, Inc.
50: *
51: * Redistribution and use in source and binary forms, with or without
52: * modification, are permitted provided that the following conditions
53: * are met:
54: * 1. Redistributions of source code must retain the above copyright
55: * notice, this list of conditions and the following disclaimer.
56: * 2. Redistributions in binary form must reproduce the above copyright
57: * notice, this list of conditions and the following disclaimer in the
58: * documentation and/or other materials provided with the distribution.
1.204 agc 59: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 60: * may be used to endorse or promote products derived from this software
61: * without specific prior written permission.
62: *
63: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73: * SUCH DAMAGE.
74: *
1.32 cgd 75: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 76: */
77:
78: /*
79: * External virtual filesystem routines
80: */
1.162 lukem 81:
82: #include <sys/cdefs.h>
1.249 ! elad 83: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.248 2005/06/06 12:09:19 yamt Exp $");
1.78 mrg 84:
1.195 christos 85: #include "opt_inet.h"
1.125 chs 86: #include "opt_ddb.h"
1.95 thorpej 87: #include "opt_compat_netbsd.h"
1.97 christos 88: #include "opt_compat_43.h"
1.29 cgd 89:
90: #include <sys/param.h>
1.30 mycroft 91: #include <sys/systm.h>
1.29 cgd 92: #include <sys/proc.h>
1.138 bouyer 93: #include <sys/kernel.h>
1.29 cgd 94: #include <sys/mount.h>
95: #include <sys/time.h>
1.181 jdolecek 96: #include <sys/event.h>
1.46 mycroft 97: #include <sys/fcntl.h>
1.29 cgd 98: #include <sys/vnode.h>
1.30 mycroft 99: #include <sys/stat.h>
1.29 cgd 100: #include <sys/namei.h>
101: #include <sys/ucred.h>
102: #include <sys/buf.h>
103: #include <sys/errno.h>
104: #include <sys/malloc.h>
1.30 mycroft 105: #include <sys/domain.h>
106: #include <sys/mbuf.h>
1.184 thorpej 107: #include <sys/sa.h>
1.51 christos 108: #include <sys/syscallargs.h>
1.58 thorpej 109: #include <sys/device.h>
1.238 thorpej 110: #include <sys/extattr.h>
1.71 fvdl 111: #include <sys/dirent.h>
1.192 christos 112: #include <sys/filedesc.h>
1.50 christos 113:
1.30 mycroft 114: #include <miscfs/specfs/specdev.h>
1.113 fvdl 115: #include <miscfs/genfs/genfs.h>
116: #include <miscfs/syncfs/syncfs.h>
1.30 mycroft 117:
1.195 christos 118: #include <netinet/in.h>
119:
1.125 chs 120: #include <uvm/uvm.h>
121: #include <uvm/uvm_ddb.h>
1.129 mrg 122:
1.195 christos 123: #include <netinet/in.h>
124:
1.129 mrg 125: #include <sys/sysctl.h>
1.77 mrg 126:
1.189 jdolecek 127: const enum vtype iftovt_tab[16] = {
1.30 mycroft 128: VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
129: VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
130: };
1.146 jdolecek 131: const int vttoif_tab[9] = {
1.30 mycroft 132: 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
133: S_IFSOCK, S_IFIFO, S_IFMT,
134: };
135:
1.31 mycroft 136: int doforce = 1; /* 1 => permit forcible unmounting */
137: int prtactive = 0; /* 1 => print out reclaim of active vnodes */
1.29 cgd 138:
1.117 fvdl 139: extern int dovfsusermount; /* 1 => permit any user to mount filesystems */
140:
1.29 cgd 141: /*
142: * Insq/Remq for the vnode usage lists.
143: */
144: #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
145: #define bufremvn(bp) { \
146: LIST_REMOVE(bp, b_vnbufs); \
147: (bp)->b_vnbufs.le_next = NOLIST; \
1.217 junyoung 148: }
1.113 fvdl 149: /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */
150: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114 enami 151: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113 fvdl 152:
1.55 cgd 153: struct mntlist mountlist = /* mounted filesystem list */
154: CIRCLEQ_HEAD_INITIALIZER(mountlist);
1.79 thorpej 155: struct vfs_list_head vfs_list = /* vfs list */
1.118 mycroft 156: LIST_HEAD_INITIALIZER(vfs_list);
1.79 thorpej 157:
1.71 fvdl 158: struct nfs_public nfs_pub; /* publicly exported FS */
1.58 thorpej 159:
1.135 sommerfe 160: struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
161: static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
162: struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
163: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
164: struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
1.80 fvdl 165:
1.187 pk 166: /* XXX - gross; single global lock to protect v_numoutput */
167: struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
168:
1.79 thorpej 169: /*
170: * These define the root filesystem and device.
171: */
172: struct mount *rootfs;
173: struct vnode *rootvnode;
1.80 fvdl 174: struct device *root_device; /* root device */
1.79 thorpej 175:
1.223 simonb 176: POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
177: &pool_allocator_nointr);
1.186 thorpej 178:
179: MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
1.93 thorpej 180:
1.89 kleink 181: /*
182: * Local declarations.
183: */
1.217 junyoung 184: void insmntque(struct vnode *, struct mount *);
185: int getdevvp(dev_t, struct vnode **, enum vtype);
1.180 gmcgarry 186:
1.201 fvdl 187: void vclean(struct vnode *, int, struct proc *);
1.50 christos 188:
1.217 junyoung 189: static int vfs_hang_addrlist(struct mount *, struct netexport *,
190: struct export_args *);
191: static int vfs_free_netcred(struct radix_node *, void *);
192: static void vfs_free_addrlist(struct netexport *);
193: static struct vnode *getcleanvnode(struct proc *);
1.51 christos 194:
195: #ifdef DEBUG
1.217 junyoung 196: void printlockedvnodes(void);
1.51 christos 197: #endif
198:
1.29 cgd 199: /*
1.30 mycroft 200: * Initialize the vnode management data structures.
1.29 cgd 201: */
1.50 christos 202: void
1.247 thorpej 203: vntblinit(void)
1.29 cgd 204: {
1.93 thorpej 205:
1.113 fvdl 206: /*
207: * Initialize the filesystem syncer.
208: */
209: vn_initialize_syncerd();
1.29 cgd 210: }
211:
1.202 yamt 212: int
213: vfs_drainvnodes(long target, struct proc *p)
214: {
215:
216: simple_lock(&vnode_free_list_slock);
217: while (numvnodes > target) {
218: struct vnode *vp;
219:
220: vp = getcleanvnode(p);
221: if (vp == NULL)
222: return EBUSY; /* give up */
223: pool_put(&vnode_pool, vp);
224: simple_lock(&vnode_free_list_slock);
225: numvnodes--;
226: }
227: simple_unlock(&vnode_free_list_slock);
228:
229: return 0;
230: }
231:
232: /*
233: * grab a vnode from freelist and clean it.
234: */
235: struct vnode *
1.247 thorpej 236: getcleanvnode(struct proc *p)
1.202 yamt 237: {
238: struct vnode *vp;
1.208 hannken 239: struct mount *mp;
1.202 yamt 240: struct freelst *listhd;
241:
242: LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
1.229 yamt 243:
244: listhd = &vnode_free_list;
245: try_nextlist:
246: TAILQ_FOREACH(vp, listhd, v_freelist) {
1.208 hannken 247: if (!simple_lock_try(&vp->v_interlock))
248: continue;
1.227 yamt 249: /*
250: * as our lwp might hold the underlying vnode locked,
251: * don't try to reclaim the VLAYER vnode if it's locked.
252: */
1.228 yamt 253: if ((vp->v_flag & VXLOCK) == 0 &&
254: ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.208 hannken 255: if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
1.202 yamt 256: break;
257: }
1.208 hannken 258: mp = NULL;
259: simple_unlock(&vp->v_interlock);
1.202 yamt 260: }
261:
262: if (vp == NULLVP) {
1.229 yamt 263: if (listhd == &vnode_free_list) {
264: listhd = &vnode_hold_list;
265: goto try_nextlist;
266: }
1.202 yamt 267: simple_unlock(&vnode_free_list_slock);
268: return NULLVP;
269: }
270:
271: if (vp->v_usecount)
272: panic("free vnode isn't, vp %p", vp);
273: TAILQ_REMOVE(listhd, vp, v_freelist);
274: /* see comment on why 0xdeadb is set at end of vgone (below) */
275: vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
276: simple_unlock(&vnode_free_list_slock);
277: vp->v_lease = NULL;
278:
279: if (vp->v_type != VBAD)
280: vgonel(vp, p);
281: else
282: simple_unlock(&vp->v_interlock);
1.208 hannken 283: vn_finished_write(mp, 0);
1.202 yamt 284: #ifdef DIAGNOSTIC
285: if (vp->v_data || vp->v_uobj.uo_npages ||
286: TAILQ_FIRST(&vp->v_uobj.memq))
287: panic("cleaned vnode isn't, vp %p", vp);
288: if (vp->v_numoutput)
289: panic("clean vnode has pending I/O's, vp %p", vp);
290: #endif
291: KASSERT((vp->v_flag & VONWORKLST) == 0);
292:
293: return vp;
294: }
295:
1.29 cgd 296: /*
1.80 fvdl 297: * Mark a mount point as busy. Used to synchronize access and to delay
298: * unmounting. Interlock is not released on failure.
1.29 cgd 299: */
1.50 christos 300: int
1.247 thorpej 301: vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp)
1.29 cgd 302: {
1.80 fvdl 303: int lkflags;
1.29 cgd 304:
1.207 dbj 305: while (mp->mnt_iflag & IMNT_UNMOUNT) {
1.224 pk 306: int gone, n;
1.217 junyoung 307:
1.80 fvdl 308: if (flags & LK_NOWAIT)
309: return (ENOENT);
1.113 fvdl 310: if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
1.201 fvdl 311: && mp->mnt_unmounter == curproc)
1.113 fvdl 312: return (EDEADLK);
1.80 fvdl 313: if (interlkp)
314: simple_unlock(interlkp);
315: /*
316: * Since all busy locks are shared except the exclusive
317: * lock granted when unmounting, the only place that a
318: * wakeup needs to be done is at the release of the
319: * exclusive lock at the end of dounmount.
320: */
1.224 pk 321: simple_lock(&mp->mnt_slock);
1.103 sommerfe 322: mp->mnt_wcnt++;
1.224 pk 323: ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
324: n = --mp->mnt_wcnt;
325: simple_unlock(&mp->mnt_slock);
1.207 dbj 326: gone = mp->mnt_iflag & IMNT_GONE;
1.217 junyoung 327:
1.224 pk 328: if (n == 0)
1.103 sommerfe 329: wakeup(&mp->mnt_wcnt);
1.80 fvdl 330: if (interlkp)
331: simple_lock(interlkp);
1.103 sommerfe 332: if (gone)
333: return (ENOENT);
1.80 fvdl 334: }
335: lkflags = LK_SHARED;
336: if (interlkp)
337: lkflags |= LK_INTERLOCK;
338: if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
339: panic("vfs_busy: unexpected lock failure");
1.29 cgd 340: return (0);
341: }
342:
343: /*
1.80 fvdl 344: * Free a busy filesystem.
1.29 cgd 345: */
346: void
1.247 thorpej 347: vfs_unbusy(struct mount *mp)
1.29 cgd 348: {
349:
1.80 fvdl 350: lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29 cgd 351: }
352:
353: /*
1.80 fvdl 354: * Lookup a filesystem type, and if found allocate and initialize
355: * a mount structure for it.
356: *
357: * Devname is usually updated by mount(8) after booting.
1.29 cgd 358: */
1.50 christos 359: int
1.247 thorpej 360: vfs_rootmountalloc(const char *fstypename, const char *devname,
361: struct mount **mpp)
1.29 cgd 362: {
1.80 fvdl 363: struct vfsops *vfsp = NULL;
364: struct mount *mp;
1.29 cgd 365:
1.152 jdolecek 366: LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.80 fvdl 367: if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
368: break;
369:
370: if (vfsp == NULL)
371: return (ENODEV);
372: mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91 perry 373: memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80 fvdl 374: lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
1.224 pk 375: simple_lock_init(&mp->mnt_slock);
1.80 fvdl 376: (void)vfs_busy(mp, LK_NOWAIT, 0);
377: LIST_INIT(&mp->mnt_vnodelist);
378: mp->mnt_op = vfsp;
379: mp->mnt_flag = MNT_RDONLY;
380: mp->mnt_vnodecovered = NULLVP;
1.230 hannken 381: mp->mnt_leaf = mp;
1.80 fvdl 382: vfsp->vfs_refcount++;
383: strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
384: mp->mnt_stat.f_mntonname[0] = '/';
385: (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
386: *mpp = mp;
1.29 cgd 387: return (0);
388: }
389:
390: /*
391: * Lookup a mount point by filesystem identifier.
392: */
393: struct mount *
1.247 thorpej 394: vfs_getvfs(fsid_t *fsid)
1.29 cgd 395: {
1.123 augustss 396: struct mount *mp;
1.29 cgd 397:
1.80 fvdl 398: simple_lock(&mountlist_slock);
1.177 matt 399: CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
1.221 christos 400: if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
401: mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
1.80 fvdl 402: simple_unlock(&mountlist_slock);
1.29 cgd 403: return (mp);
1.80 fvdl 404: }
405: }
406: simple_unlock(&mountlist_slock);
1.29 cgd 407: return ((struct mount *)0);
408: }
409:
410: /*
411: * Get a new unique fsid
412: */
413: void
1.247 thorpej 414: vfs_getnewfsid(struct mount *mp)
1.29 cgd 415: {
416: static u_short xxxfs_mntid;
417: fsid_t tfsid;
1.80 fvdl 418: int mtype;
1.29 cgd 419:
1.80 fvdl 420: simple_lock(&mntid_slock);
1.127 assar 421: mtype = makefstype(mp->mnt_op->vfs_name);
1.221 christos 422: mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
423: mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
424: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1.29 cgd 425: if (xxxfs_mntid == 0)
426: ++xxxfs_mntid;
1.221 christos 427: tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
428: tfsid.__fsid_val[1] = mtype;
1.177 matt 429: if (!CIRCLEQ_EMPTY(&mountlist)) {
1.80 fvdl 430: while (vfs_getvfs(&tfsid)) {
1.221 christos 431: tfsid.__fsid_val[0]++;
1.29 cgd 432: xxxfs_mntid++;
433: }
434: }
1.221 christos 435: mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
436: mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1.80 fvdl 437: simple_unlock(&mntid_slock);
1.29 cgd 438: }
439:
440: /*
1.30 mycroft 441: * Make a 'unique' number from a mount type name.
1.29 cgd 442: */
443: long
1.247 thorpej 444: makefstype(const char *type)
1.29 cgd 445: {
446: long rv;
447:
448: for (rv = 0; *type; type++) {
449: rv <<= 2;
450: rv ^= *type;
451: }
452: return rv;
453: }
1.30 mycroft 454:
1.80 fvdl 455:
1.30 mycroft 456: /*
457: * Set vnode attributes to VNOVAL
458: */
459: void
1.247 thorpej 460: vattr_null(struct vattr *vap)
1.30 mycroft 461: {
462:
463: vap->va_type = VNON;
1.75 enami 464:
465: /*
466: * Assign individually so that it is safe even if size and
467: * sign of each member are varied.
468: */
469: vap->va_mode = VNOVAL;
470: vap->va_nlink = VNOVAL;
471: vap->va_uid = VNOVAL;
472: vap->va_gid = VNOVAL;
473: vap->va_fsid = VNOVAL;
474: vap->va_fileid = VNOVAL;
1.30 mycroft 475: vap->va_size = VNOVAL;
1.75 enami 476: vap->va_blocksize = VNOVAL;
1.76 christos 477: vap->va_atime.tv_sec =
478: vap->va_mtime.tv_sec =
1.191 enami 479: vap->va_ctime.tv_sec =
480: vap->va_birthtime.tv_sec = VNOVAL;
1.76 christos 481: vap->va_atime.tv_nsec =
482: vap->va_mtime.tv_nsec =
1.191 enami 483: vap->va_ctime.tv_nsec =
484: vap->va_birthtime.tv_nsec = VNOVAL;
1.75 enami 485: vap->va_gen = VNOVAL;
486: vap->va_flags = VNOVAL;
487: vap->va_rdev = VNOVAL;
1.30 mycroft 488: vap->va_bytes = VNOVAL;
489: vap->va_vaflags = 0;
490: }
491:
492: /*
493: * Routines having to do with the management of the vnode table.
494: */
1.217 junyoung 495: extern int (**dead_vnodeop_p)(void *);
1.30 mycroft 496: long numvnodes;
497:
1.29 cgd 498: /*
499: * Return the next vnode from the free list.
500: */
1.50 christos 501: int
1.247 thorpej 502: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
503: struct vnode **vpp)
1.29 cgd 504: {
1.142 chs 505: extern struct uvm_pagerops uvm_vnodeops;
506: struct uvm_object *uobj;
1.201 fvdl 507: struct proc *p = curproc; /* XXX */
1.113 fvdl 508: static int toggle;
1.80 fvdl 509: struct vnode *vp;
1.153 thorpej 510: int error = 0, tryalloc;
1.158 chs 511:
1.159 enami 512: try_again:
1.103 sommerfe 513: if (mp) {
514: /*
1.106 sommerfe 515: * Mark filesystem busy while we're creating a vnode.
516: * If unmount is in progress, this will wait; if the
517: * unmount succeeds (only if umount -f), this will
518: * return an error. If the unmount fails, we'll keep
519: * going afterwards.
520: * (This puts the per-mount vnode list logically under
521: * the protection of the vfs_busy lock).
1.103 sommerfe 522: */
1.113 fvdl 523: error = vfs_busy(mp, LK_RECURSEFAIL, 0);
524: if (error && error != EDEADLK)
1.103 sommerfe 525: return error;
526: }
1.29 cgd 527:
1.113 fvdl 528: /*
529: * We must choose whether to allocate a new vnode or recycle an
530: * existing one. The criterion for allocating a new one is that
531: * the total number of vnodes is less than the number desired or
532: * there are no vnodes on either free list. Generally we only
533: * want to recycle vnodes that have no buffers associated with
534: * them, so we look first on the vnode_free_list. If it is empty,
535: * we next consider vnodes with referencing buffers on the
536: * vnode_hold_list. The toggle ensures that half the time we
537: * will use a buffer from the vnode_hold_list, and half the time
538: * we will allocate a new one unless the list has grown to twice
539: * the desired size. We are reticent to recycle vnodes from the
540: * vnode_hold_list because we will lose the identity of all its
541: * referencing buffers.
542: */
1.142 chs 543:
1.153 thorpej 544: vp = NULL;
545:
546: simple_lock(&vnode_free_list_slock);
547:
1.113 fvdl 548: toggle ^= 1;
549: if (numvnodes > 2 * desiredvnodes)
550: toggle = 0;
551:
1.153 thorpej 552: tryalloc = numvnodes < desiredvnodes ||
1.159 enami 553: (TAILQ_FIRST(&vnode_free_list) == NULL &&
554: (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153 thorpej 555:
556: if (tryalloc &&
557: (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.206 yamt 558: numvnodes++;
1.80 fvdl 559: simple_unlock(&vnode_free_list_slock);
1.142 chs 560: memset(vp, 0, sizeof(*vp));
1.248 yamt 561: UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1);
1.225 yamt 562: /*
563: * done by memset() above.
564: * LIST_INIT(&vp->v_nclist);
565: * LIST_INIT(&vp->v_dnclist);
566: */
1.29 cgd 567: } else {
1.202 yamt 568: vp = getcleanvnode(p);
1.80 fvdl 569: /*
570: * Unless this is a bad time of the month, at most
571: * the first NCPUS items on the free list are
572: * locked, so this is close enough to being empty.
573: */
574: if (vp == NULLVP) {
1.113 fvdl 575: if (mp && error != EDEADLK)
576: vfs_unbusy(mp);
1.153 thorpej 577: if (tryalloc) {
578: printf("WARNING: unable to allocate new "
579: "vnode, retrying...\n");
580: (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
581: goto try_again;
582: }
1.132 jdolecek 583: tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29 cgd 584: *vpp = 0;
585: return (ENFILE);
586: }
1.248 yamt 587: vp->v_usecount = 1;
1.29 cgd 588: vp->v_flag = 0;
1.158 chs 589: vp->v_socket = NULL;
1.182 blymn 590: #ifdef VERIFIED_EXEC
1.244 blymn 591: vp->fp_status = FINGERPRINT_NOTEVAL;
1.249 ! elad 592: vp->vhe = NULL;
1.182 blymn 593: #endif
1.29 cgd 594: }
595: vp->v_type = VNON;
1.104 wrstuden 596: vp->v_vnlock = &vp->v_lock;
597: lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.225 yamt 598: KASSERT(LIST_EMPTY(&vp->v_nclist));
599: KASSERT(LIST_EMPTY(&vp->v_dnclist));
1.29 cgd 600: vp->v_tag = tag;
601: vp->v_op = vops;
602: insmntque(vp, mp);
1.30 mycroft 603: *vpp = vp;
604: vp->v_data = 0;
1.240 christos 605: simple_lock_init(&vp->v_interlock);
1.142 chs 606:
607: /*
608: * initialize uvm_object within vnode.
609: */
610:
1.158 chs 611: uobj = &vp->v_uobj;
612: KASSERT(uobj->pgops == &uvm_vnodeops);
613: KASSERT(uobj->uo_npages == 0);
614: KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
615: vp->v_size = VSIZENOTSET;
1.142 chs 616:
1.113 fvdl 617: if (mp && error != EDEADLK)
618: vfs_unbusy(mp);
1.29 cgd 619: return (0);
1.130 fvdl 620: }
621:
622: /*
623: * This is really just the reverse of getnewvnode(). Needed for
624: * VFS_VGET functions who may need to push back a vnode in case
625: * of a locking race.
626: */
627: void
1.247 thorpej 628: ungetnewvnode(struct vnode *vp)
1.130 fvdl 629: {
630: #ifdef DIAGNOSTIC
631: if (vp->v_usecount != 1)
1.131 fvdl 632: panic("ungetnewvnode: busy vnode");
1.130 fvdl 633: #endif
634: vp->v_usecount--;
635: insmntque(vp, NULL);
636: vp->v_type = VBAD;
637:
638: simple_lock(&vp->v_interlock);
1.217 junyoung 639: /*
1.130 fvdl 640: * Insert at head of LRU list
641: */
642: simple_lock(&vnode_free_list_slock);
643: if (vp->v_holdcnt > 0)
644: TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
645: else
646: TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1.217 junyoung 647: simple_unlock(&vnode_free_list_slock);
1.130 fvdl 648: simple_unlock(&vp->v_interlock);
1.29 cgd 649: }
650:
651: /*
652: * Move a vnode from one mount queue to another.
653: */
1.50 christos 654: void
1.247 thorpej 655: insmntque(struct vnode *vp, struct mount *mp)
1.29 cgd 656: {
657:
1.103 sommerfe 658: #ifdef DIAGNOSTIC
659: if ((mp != NULL) &&
1.207 dbj 660: (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113 fvdl 661: !(mp->mnt_flag & MNT_SOFTDEP) &&
662: vp->v_tag != VT_VFS) {
1.103 sommerfe 663: panic("insmntque into dying filesystem");
664: }
665: #endif
1.217 junyoung 666:
1.80 fvdl 667: simple_lock(&mntvnode_slock);
1.29 cgd 668: /*
669: * Delete from old mount point vnode list, if on one.
670: */
671: if (vp->v_mount != NULL)
672: LIST_REMOVE(vp, v_mntvnodes);
673: /*
674: * Insert into list of vnodes for the new mount point, if available.
675: */
1.80 fvdl 676: if ((vp->v_mount = mp) != NULL)
677: LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
678: simple_unlock(&mntvnode_slock);
1.29 cgd 679: }
680:
681: /*
682: * Update outstanding I/O count and do wakeup if requested.
683: */
1.50 christos 684: void
1.247 thorpej 685: vwakeup(struct buf *bp)
1.29 cgd 686: {
1.123 augustss 687: struct vnode *vp;
1.29 cgd 688:
1.50 christos 689: if ((vp = bp->b_vp) != NULL) {
1.187 pk 690: /* XXX global lock hack
691: * can't use v_interlock here since this is called
692: * in interrupt context from biodone().
693: */
694: simple_lock(&global_v_numoutput_slock);
1.30 mycroft 695: if (--vp->v_numoutput < 0)
1.125 chs 696: panic("vwakeup: neg numoutput, vp %p", vp);
1.29 cgd 697: if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
698: vp->v_flag &= ~VBWAIT;
699: wakeup((caddr_t)&vp->v_numoutput);
700: }
1.187 pk 701: simple_unlock(&global_v_numoutput_slock);
1.29 cgd 702: }
703: }
704:
705: /*
706: * Flush out and invalidate all buffers associated with a vnode.
1.126 mycroft 707: * Called with the underlying vnode locked, which should prevent new dirty
708: * buffers from being queued.
1.29 cgd 709: */
1.30 mycroft 710: int
1.247 thorpej 711: vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
712: int slpflag, int slptimeo)
1.29 cgd 713: {
1.126 mycroft 714: struct buf *bp, *nbp;
1.158 chs 715: int s, error;
1.166 chs 716: int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
1.142 chs 717: (flags & V_SAVE ? PGO_CLEANIT : 0);
718:
719: /* XXXUBC this doesn't look at flags or slp* */
1.166 chs 720: simple_lock(&vp->v_interlock);
721: error = VOP_PUTPAGES(vp, 0, 0, flushflags);
722: if (error) {
723: return error;
1.142 chs 724: }
1.166 chs 725:
1.30 mycroft 726: if (flags & V_SAVE) {
1.201 fvdl 727: error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
1.126 mycroft 728: if (error)
1.122 fvdl 729: return (error);
1.126 mycroft 730: #ifdef DIAGNOSTIC
1.122 fvdl 731: s = splbio();
1.126 mycroft 732: if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
1.125 chs 733: panic("vinvalbuf: dirty bufs, vp %p", vp);
1.113 fvdl 734: splx(s);
1.126 mycroft 735: #endif
1.30 mycroft 736: }
1.113 fvdl 737:
1.115 fvdl 738: s = splbio();
739:
1.126 mycroft 740: restart:
741: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
742: nbp = LIST_NEXT(bp, b_vnbufs);
1.187 pk 743: simple_lock(&bp->b_interlock);
1.126 mycroft 744: if (bp->b_flags & B_BUSY) {
745: bp->b_flags |= B_WANTED;
1.187 pk 746: error = ltsleep((caddr_t)bp,
747: slpflag | (PRIBIO + 1) | PNORELOCK,
748: "vinvalbuf", slptimeo, &bp->b_interlock);
1.126 mycroft 749: if (error) {
750: splx(s);
751: return (error);
752: }
753: goto restart;
1.113 fvdl 754: }
1.126 mycroft 755: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187 pk 756: simple_unlock(&bp->b_interlock);
1.126 mycroft 757: brelse(bp);
758: }
1.30 mycroft 759:
1.126 mycroft 760: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
761: nbp = LIST_NEXT(bp, b_vnbufs);
1.187 pk 762: simple_lock(&bp->b_interlock);
1.126 mycroft 763: if (bp->b_flags & B_BUSY) {
764: bp->b_flags |= B_WANTED;
1.187 pk 765: error = ltsleep((caddr_t)bp,
766: slpflag | (PRIBIO + 1) | PNORELOCK,
767: "vinvalbuf", slptimeo, &bp->b_interlock);
1.126 mycroft 768: if (error) {
769: splx(s);
770: return (error);
1.29 cgd 771: }
1.126 mycroft 772: goto restart;
773: }
774: /*
775: * XXX Since there are no node locks for NFS, I believe
776: * there is a slight chance that a delayed write will
777: * occur while sleeping just above, so check for it.
778: */
779: if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
780: #ifdef DEBUG
781: printf("buffer still DELWRI\n");
782: #endif
1.63 mycroft 783: bp->b_flags |= B_BUSY | B_VFLUSH;
1.187 pk 784: simple_unlock(&bp->b_interlock);
1.126 mycroft 785: VOP_BWRITE(bp);
786: goto restart;
787: }
788: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187 pk 789: simple_unlock(&bp->b_interlock);
1.126 mycroft 790: brelse(bp);
791: }
792:
793: #ifdef DIAGNOSTIC
794: if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
795: panic("vinvalbuf: flush failed, vp %p", vp);
1.113 fvdl 796: #endif
1.126 mycroft 797:
798: splx(s);
799:
800: return (0);
801: }
802:
803: /*
804: * Destroy any in core blocks past the truncation length.
805: * Called with the underlying vnode locked, which should prevent new dirty
806: * buffers from being queued.
807: */
808: int
1.247 thorpej 809: vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
1.126 mycroft 810: {
811: struct buf *bp, *nbp;
1.158 chs 812: int s, error;
1.166 chs 813: voff_t off;
814:
815: off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
816: simple_lock(&vp->v_interlock);
817: error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
818: if (error) {
819: return error;
820: }
1.126 mycroft 821:
822: s = splbio();
823:
824: restart:
825: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
826: nbp = LIST_NEXT(bp, b_vnbufs);
827: if (bp->b_lblkno < lbn)
828: continue;
1.187 pk 829: simple_lock(&bp->b_interlock);
1.126 mycroft 830: if (bp->b_flags & B_BUSY) {
831: bp->b_flags |= B_WANTED;
1.187 pk 832: error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
833: "vtruncbuf", slptimeo, &bp->b_interlock);
1.126 mycroft 834: if (error) {
835: splx(s);
836: return (error);
1.29 cgd 837: }
1.126 mycroft 838: goto restart;
1.29 cgd 839: }
1.126 mycroft 840: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187 pk 841: simple_unlock(&bp->b_interlock);
1.126 mycroft 842: brelse(bp);
1.29 cgd 843: }
1.115 fvdl 844:
1.126 mycroft 845: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
846: nbp = LIST_NEXT(bp, b_vnbufs);
847: if (bp->b_lblkno < lbn)
848: continue;
1.187 pk 849: simple_lock(&bp->b_interlock);
1.126 mycroft 850: if (bp->b_flags & B_BUSY) {
851: bp->b_flags |= B_WANTED;
1.187 pk 852: error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
853: "vtruncbuf", slptimeo, &bp->b_interlock);
1.126 mycroft 854: if (error) {
855: splx(s);
856: return (error);
857: }
858: goto restart;
859: }
860: bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187 pk 861: simple_unlock(&bp->b_interlock);
1.126 mycroft 862: brelse(bp);
863: }
1.115 fvdl 864:
865: splx(s);
866:
1.30 mycroft 867: return (0);
868: }
869:
870: void
1.247 thorpej 871: vflushbuf(struct vnode *vp, int sync)
1.30 mycroft 872: {
1.123 augustss 873: struct buf *bp, *nbp;
1.166 chs 874: int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
1.30 mycroft 875: int s;
876:
1.166 chs 877: simple_lock(&vp->v_interlock);
878: (void) VOP_PUTPAGES(vp, 0, 0, flags);
1.142 chs 879:
1.30 mycroft 880: loop:
881: s = splbio();
1.126 mycroft 882: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
883: nbp = LIST_NEXT(bp, b_vnbufs);
1.187 pk 884: simple_lock(&bp->b_interlock);
885: if ((bp->b_flags & B_BUSY)) {
886: simple_unlock(&bp->b_interlock);
1.30 mycroft 887: continue;
1.187 pk 888: }
1.30 mycroft 889: if ((bp->b_flags & B_DELWRI) == 0)
1.125 chs 890: panic("vflushbuf: not dirty, bp %p", bp);
1.63 mycroft 891: bp->b_flags |= B_BUSY | B_VFLUSH;
1.187 pk 892: simple_unlock(&bp->b_interlock);
1.30 mycroft 893: splx(s);
894: /*
895: * Wait for I/O associated with indirect blocks to complete,
896: * since there is no way to quickly wait for them below.
897: */
898: if (bp->b_vp == vp || sync == 0)
899: (void) bawrite(bp);
900: else
901: (void) bwrite(bp);
902: goto loop;
903: }
904: if (sync == 0) {
905: splx(s);
906: return;
907: }
1.187 pk 908: simple_lock(&global_v_numoutput_slock);
1.30 mycroft 909: while (vp->v_numoutput) {
910: vp->v_flag |= VBWAIT;
1.187 pk 911: ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
912: &global_v_numoutput_slock);
1.30 mycroft 913: }
1.187 pk 914: simple_unlock(&global_v_numoutput_slock);
1.30 mycroft 915: splx(s);
1.126 mycroft 916: if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1.30 mycroft 917: vprint("vflushbuf: dirty", vp);
918: goto loop;
919: }
1.29 cgd 920: }
921:
922: /*
923: * Associate a buffer with a vnode.
924: */
1.50 christos 925: void
1.247 thorpej 926: bgetvp(struct vnode *vp, struct buf *bp)
1.29 cgd 927: {
1.115 fvdl 928: int s;
1.29 cgd 929:
930: if (bp->b_vp)
1.125 chs 931: panic("bgetvp: not free, bp %p", bp);
1.29 cgd 932: VHOLD(vp);
1.115 fvdl 933: s = splbio();
1.29 cgd 934: bp->b_vp = vp;
935: if (vp->v_type == VBLK || vp->v_type == VCHR)
936: bp->b_dev = vp->v_rdev;
937: else
938: bp->b_dev = NODEV;
939: /*
940: * Insert onto list for new vnode.
941: */
942: bufinsvn(bp, &vp->v_cleanblkhd);
1.115 fvdl 943: splx(s);
1.29 cgd 944: }
945:
946: /*
947: * Disassociate a buffer from a vnode.
948: */
1.50 christos 949: void
1.247 thorpej 950: brelvp(struct buf *bp)
1.29 cgd 951: {
952: struct vnode *vp;
1.115 fvdl 953: int s;
1.29 cgd 954:
1.125 chs 955: if (bp->b_vp == NULL)
956: panic("brelvp: vp NULL, bp %p", bp);
1.115 fvdl 957:
958: s = splbio();
1.113 fvdl 959: vp = bp->b_vp;
1.29 cgd 960: /*
961: * Delete from old vnode list, if on one.
962: */
1.177 matt 963: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29 cgd 964: bufremvn(bp);
1.142 chs 965:
1.158 chs 966: if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
1.142 chs 967: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1.113 fvdl 968: vp->v_flag &= ~VONWORKLST;
969: LIST_REMOVE(vp, v_synclist);
970: }
1.142 chs 971:
972: bp->b_vp = NULL;
1.29 cgd 973: HOLDRELE(vp);
1.115 fvdl 974: splx(s);
1.29 cgd 975: }
976:
977: /*
978: * Reassign a buffer from one vnode to another.
979: * Used to assign file specific control information
980: * (indirect blocks) to the vnode to which they belong.
1.115 fvdl 981: *
982: * This function must be called at splbio().
1.29 cgd 983: */
1.50 christos 984: void
1.247 thorpej 985: reassignbuf(struct buf *bp, struct vnode *newvp)
1.29 cgd 986: {
1.113 fvdl 987: struct buflists *listheadp;
1.246 christos 988: int delayx;
1.29 cgd 989:
990: /*
991: * Delete from old vnode list, if on one.
992: */
1.177 matt 993: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29 cgd 994: bufremvn(bp);
995: /*
996: * If dirty, put on list of dirty buffers;
997: * otherwise insert onto list of clean buffers.
998: */
1.113 fvdl 999: if ((bp->b_flags & B_DELWRI) == 0) {
1000: listheadp = &newvp->v_cleanblkhd;
1.158 chs 1001: if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1.142 chs 1002: (newvp->v_flag & VONWORKLST) &&
1.113 fvdl 1003: LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
1004: newvp->v_flag &= ~VONWORKLST;
1005: LIST_REMOVE(newvp, v_synclist);
1006: }
1007: } else {
1.29 cgd 1008: listheadp = &newvp->v_dirtyblkhd;
1.113 fvdl 1009: if ((newvp->v_flag & VONWORKLST) == 0) {
1010: switch (newvp->v_type) {
1011: case VDIR:
1.246 christos 1012: delayx = dirdelay;
1.113 fvdl 1013: break;
1014: case VBLK:
1015: if (newvp->v_specmountpoint != NULL) {
1.246 christos 1016: delayx = metadelay;
1.113 fvdl 1017: break;
1018: }
1019: /* fall through */
1020: default:
1.246 christos 1021: delayx = filedelay;
1.118 mycroft 1022: break;
1.113 fvdl 1023: }
1.118 mycroft 1024: if (!newvp->v_mount ||
1025: (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1.246 christos 1026: vn_syncer_add_to_worklist(newvp, delayx);
1.113 fvdl 1027: }
1028: }
1.29 cgd 1029: bufinsvn(bp, listheadp);
1030: }
1031:
1032: /*
1033: * Create a vnode for a block device.
1.59 thorpej 1034: * Used for root filesystem and swap areas.
1.29 cgd 1035: * Also used for memory file system special devices.
1036: */
1.50 christos 1037: int
1.247 thorpej 1038: bdevvp(dev_t dev, struct vnode **vpp)
1.29 cgd 1039: {
1.30 mycroft 1040:
1041: return (getdevvp(dev, vpp, VBLK));
1.29 cgd 1042: }
1043:
1044: /*
1045: * Create a vnode for a character device.
1046: * Used for kernfs and some console handling.
1047: */
1.50 christos 1048: int
1.247 thorpej 1049: cdevvp(dev_t dev, struct vnode **vpp)
1.29 cgd 1050: {
1.30 mycroft 1051:
1052: return (getdevvp(dev, vpp, VCHR));
1.29 cgd 1053: }
1054:
1055: /*
1056: * Create a vnode for a device.
1057: * Used by bdevvp (block device) for root file system etc.,
1058: * and by cdevvp (character device) for console and kernfs.
1059: */
1.50 christos 1060: int
1.247 thorpej 1061: getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
1.29 cgd 1062: {
1.123 augustss 1063: struct vnode *vp;
1.29 cgd 1064: struct vnode *nvp;
1065: int error;
1066:
1.80 fvdl 1067: if (dev == NODEV) {
1068: *vpp = NULLVP;
1.29 cgd 1069: return (0);
1.80 fvdl 1070: }
1.50 christos 1071: error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29 cgd 1072: if (error) {
1073: *vpp = NULLVP;
1074: return (error);
1075: }
1076: vp = nvp;
1077: vp->v_type = type;
1.50 christos 1078: if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29 cgd 1079: vput(vp);
1080: vp = nvp;
1081: }
1082: *vpp = vp;
1083: return (0);
1084: }
1085:
1086: /*
1087: * Check to see if the new vnode represents a special device
1088: * for which we already have a vnode (either because of
1089: * bdevvp() or because of a different vnode representing
1090: * the same block device). If such an alias exists, deallocate
1091: * the existing contents and return the aliased vnode. The
1092: * caller is responsible for filling it with its new contents.
1093: */
1094: struct vnode *
1.247 thorpej 1095: checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
1.29 cgd 1096: {
1.201 fvdl 1097: struct proc *p = curproc; /* XXX */
1.123 augustss 1098: struct vnode *vp;
1.29 cgd 1099: struct vnode **vpp;
1100:
1101: if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1102: return (NULLVP);
1103:
1104: vpp = &speclisth[SPECHASH(nvp_rdev)];
1105: loop:
1.80 fvdl 1106: simple_lock(&spechash_slock);
1.29 cgd 1107: for (vp = *vpp; vp; vp = vp->v_specnext) {
1108: if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1109: continue;
1110: /*
1111: * Alias, but not in use, so flush it out.
1112: */
1.80 fvdl 1113: simple_lock(&vp->v_interlock);
1.231 mycroft 1114: simple_unlock(&spechash_slock);
1.29 cgd 1115: if (vp->v_usecount == 0) {
1.201 fvdl 1116: vgonel(vp, p);
1.29 cgd 1117: goto loop;
1118: }
1.231 mycroft 1119: /*
1120: * What we're interested to know here is if someone else has
1121: * removed this vnode from the device hash list while we were
1122: * waiting. This can only happen if vclean() did it, and
1123: * this requires the vnode to be locked. Therefore, we use
1124: * LK_SLEEPFAIL and retry.
1125: */
1126: if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL))
1.29 cgd 1127: goto loop;
1.231 mycroft 1128: simple_lock(&spechash_slock);
1.29 cgd 1129: break;
1130: }
1.34 cgd 1131: if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29 cgd 1132: MALLOC(nvp->v_specinfo, struct specinfo *,
1.150 thorpej 1133: sizeof(struct specinfo), M_VNODE, M_NOWAIT);
1134: /* XXX Erg. */
1135: if (nvp->v_specinfo == NULL) {
1136: simple_unlock(&spechash_slock);
1137: uvm_wait("checkalias");
1138: goto loop;
1139: }
1140:
1.29 cgd 1141: nvp->v_rdev = nvp_rdev;
1142: nvp->v_hashchain = vpp;
1143: nvp->v_specnext = *vpp;
1.113 fvdl 1144: nvp->v_specmountpoint = NULL;
1.80 fvdl 1145: simple_unlock(&spechash_slock);
1.62 kleink 1146: nvp->v_speclockf = NULL;
1.216 hannken 1147: simple_lock_init(&nvp->v_spec_cow_slock);
1148: SLIST_INIT(&nvp->v_spec_cow_head);
1149: nvp->v_spec_cow_req = 0;
1150: nvp->v_spec_cow_count = 0;
1151:
1.29 cgd 1152: *vpp = nvp;
1.80 fvdl 1153: if (vp != NULLVP) {
1.29 cgd 1154: nvp->v_flag |= VALIASED;
1155: vp->v_flag |= VALIASED;
1156: vput(vp);
1157: }
1158: return (NULLVP);
1159: }
1.80 fvdl 1160: simple_unlock(&spechash_slock);
1161: VOP_UNLOCK(vp, 0);
1162: simple_lock(&vp->v_interlock);
1.201 fvdl 1163: vclean(vp, 0, p);
1.29 cgd 1164: vp->v_op = nvp->v_op;
1165: vp->v_tag = nvp->v_tag;
1.104 wrstuden 1166: vp->v_vnlock = &vp->v_lock;
1167: lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29 cgd 1168: nvp->v_type = VNON;
1169: insmntque(vp, mp);
1170: return (vp);
1171: }
1172:
1173: /*
1174: * Grab a particular vnode from the free list, increment its
1.83 fvdl 1175: * reference count and lock it. If the vnode lock bit is set the
1176: * vnode is being eliminated in vgone. In that case, we can not
1177: * grab the vnode, so the process is awakened when the transition is
1178: * completed, and an error returned to indicate that the vnode is no
1179: * longer usable (possibly having been changed to a new file system type).
1.29 cgd 1180: */
1.30 mycroft 1181: int
1.247 thorpej 1182: vget(struct vnode *vp, int flags)
1.29 cgd 1183: {
1.175 perseant 1184: int error;
1.29 cgd 1185:
1.30 mycroft 1186: /*
1187: * If the vnode is in the process of being cleaned out for
1188: * another use, we wait for the cleaning to finish and then
1.80 fvdl 1189: * return failure. Cleaning is determined by checking that
1190: * the VXLOCK flag is set.
1191: */
1.142 chs 1192:
1.80 fvdl 1193: if ((flags & LK_INTERLOCK) == 0)
1194: simple_lock(&vp->v_interlock);
1195: if (vp->v_flag & VXLOCK) {
1.142 chs 1196: if (flags & LK_NOWAIT) {
1.143 sommerfe 1197: simple_unlock(&vp->v_interlock);
1.142 chs 1198: return EBUSY;
1199: }
1.29 cgd 1200: vp->v_flag |= VXWANT;
1.158 chs 1201: ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1.80 fvdl 1202: return (ENOENT);
1.29 cgd 1203: }
1.80 fvdl 1204: if (vp->v_usecount == 0) {
1205: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1206: if (vp->v_holdcnt > 0)
1207: TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1208: else
1209: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80 fvdl 1210: simple_unlock(&vnode_free_list_slock);
1211: }
1.29 cgd 1212: vp->v_usecount++;
1.112 mycroft 1213: #ifdef DIAGNOSTIC
1214: if (vp->v_usecount == 0) {
1215: vprint("vget", vp);
1.125 chs 1216: panic("vget: usecount overflow, vp %p", vp);
1.112 mycroft 1217: }
1218: #endif
1.80 fvdl 1219: if (flags & LK_TYPE_MASK) {
1.113 fvdl 1220: if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1221: /*
1222: * must expand vrele here because we do not want
1223: * to call VOP_INACTIVE if the reference count
1224: * drops back to zero since it was never really
1225: * active. We must remove it from the free list
1226: * before sleeping so that multiple processes do
1227: * not try to recycle it.
1228: */
1229: simple_lock(&vp->v_interlock);
1230: vp->v_usecount--;
1231: if (vp->v_usecount > 0) {
1232: simple_unlock(&vp->v_interlock);
1233: return (error);
1234: }
1235: /*
1236: * insert at tail of LRU list
1237: */
1238: simple_lock(&vnode_free_list_slock);
1239: if (vp->v_holdcnt > 0)
1240: TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1241: v_freelist);
1242: else
1243: TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1244: v_freelist);
1245: simple_unlock(&vnode_free_list_slock);
1246: simple_unlock(&vp->v_interlock);
1247: }
1.80 fvdl 1248: return (error);
1249: }
1250: simple_unlock(&vp->v_interlock);
1.29 cgd 1251: return (0);
1252: }
1253:
1254: /*
1255: * vput(), just unlock and vrele()
1256: */
1257: void
1.247 thorpej 1258: vput(struct vnode *vp)
1.29 cgd 1259: {
1.201 fvdl 1260: struct proc *p = curproc; /* XXX */
1.30 mycroft 1261:
1.111 mycroft 1262: #ifdef DIAGNOSTIC
1.80 fvdl 1263: if (vp == NULL)
1264: panic("vput: null vp");
1265: #endif
1266: simple_lock(&vp->v_interlock);
1267: vp->v_usecount--;
1268: if (vp->v_usecount > 0) {
1269: simple_unlock(&vp->v_interlock);
1270: VOP_UNLOCK(vp, 0);
1271: return;
1272: }
1273: #ifdef DIAGNOSTIC
1274: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1275: vprint("vput: bad ref count", vp);
1276: panic("vput: ref cnt");
1277: }
1278: #endif
1279: /*
1.87 pk 1280: * Insert at tail of LRU list.
1.80 fvdl 1281: */
1282: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1283: if (vp->v_holdcnt > 0)
1284: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1285: else
1286: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80 fvdl 1287: simple_unlock(&vnode_free_list_slock);
1.161 thorpej 1288: if (vp->v_flag & VEXECMAP) {
1.167 chs 1289: uvmexp.execpages -= vp->v_uobj.uo_npages;
1290: uvmexp.filepages += vp->v_uobj.uo_npages;
1.147 chs 1291: }
1.161 thorpej 1292: vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80 fvdl 1293: simple_unlock(&vp->v_interlock);
1.201 fvdl 1294: VOP_INACTIVE(vp, p);
1.29 cgd 1295: }
1296:
1297: /*
1298: * Vnode release.
1299: * If count drops to zero, call inactive routine and return to freelist.
1300: */
1301: void
1.247 thorpej 1302: vrele(struct vnode *vp)
1.29 cgd 1303: {
1.201 fvdl 1304: struct proc *p = curproc; /* XXX */
1.29 cgd 1305:
1306: #ifdef DIAGNOSTIC
1307: if (vp == NULL)
1308: panic("vrele: null vp");
1309: #endif
1.80 fvdl 1310: simple_lock(&vp->v_interlock);
1.29 cgd 1311: vp->v_usecount--;
1.80 fvdl 1312: if (vp->v_usecount > 0) {
1313: simple_unlock(&vp->v_interlock);
1.29 cgd 1314: return;
1.80 fvdl 1315: }
1.29 cgd 1316: #ifdef DIAGNOSTIC
1.80 fvdl 1317: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29 cgd 1318: vprint("vrele: bad ref count", vp);
1.142 chs 1319: panic("vrele: ref cnt vp %p", vp);
1.29 cgd 1320: }
1321: #endif
1.30 mycroft 1322: /*
1.87 pk 1323: * Insert at tail of LRU list.
1.30 mycroft 1324: */
1.80 fvdl 1325: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1326: if (vp->v_holdcnt > 0)
1327: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1328: else
1329: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80 fvdl 1330: simple_unlock(&vnode_free_list_slock);
1.161 thorpej 1331: if (vp->v_flag & VEXECMAP) {
1.167 chs 1332: uvmexp.execpages -= vp->v_uobj.uo_npages;
1333: uvmexp.filepages += vp->v_uobj.uo_npages;
1.147 chs 1334: }
1.161 thorpej 1335: vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80 fvdl 1336: if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1.201 fvdl 1337: VOP_INACTIVE(vp, p);
1.29 cgd 1338: }
1339:
1.80 fvdl 1340: #ifdef DIAGNOSTIC
1.29 cgd 1341: /*
1342: * Page or buffer structure gets a reference.
1343: */
1.30 mycroft 1344: void
1.247 thorpej 1345: vholdl(struct vnode *vp)
1.29 cgd 1346: {
1347:
1.113 fvdl 1348: /*
1349: * If it is on the freelist and the hold count is currently
1350: * zero, move it to the hold list. The test of the back
1351: * pointer and the use reference count of zero is because
1352: * it will be removed from a free list by getnewvnode,
1353: * but will not have its reference count incremented until
1354: * after calling vgone. If the reference count were
1355: * incremented first, vgone would (incorrectly) try to
1356: * close the previous instance of the underlying object.
1357: * So, the back pointer is explicitly set to `0xdeadb' in
1358: * getnewvnode after removing it from a freelist to ensure
1359: * that we do not try to move it here.
1360: */
1361: if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1362: vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1363: simple_lock(&vnode_free_list_slock);
1364: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1365: TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1366: simple_unlock(&vnode_free_list_slock);
1367: }
1.29 cgd 1368: vp->v_holdcnt++;
1369: }
1370:
1371: /*
1372: * Page or buffer structure frees a reference.
1373: */
1.30 mycroft 1374: void
1.247 thorpej 1375: holdrelel(struct vnode *vp)
1.29 cgd 1376: {
1377:
1378: if (vp->v_holdcnt <= 0)
1.215 yamt 1379: panic("holdrelel: holdcnt vp %p", vp);
1.29 cgd 1380: vp->v_holdcnt--;
1.142 chs 1381:
1.113 fvdl 1382: /*
1383: * If it is on the holdlist and the hold count drops to
1384: * zero, move it to the free list. The test of the back
1385: * pointer and the use reference count of zero is because
1386: * it will be removed from a free list by getnewvnode,
1387: * but will not have its reference count incremented until
1388: * after calling vgone. If the reference count were
1389: * incremented first, vgone would (incorrectly) try to
1390: * close the previous instance of the underlying object.
1391: * So, the back pointer is explicitly set to `0xdeadb' in
1392: * getnewvnode after removing it from a freelist to ensure
1393: * that we do not try to move it here.
1394: */
1.142 chs 1395:
1.113 fvdl 1396: if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1397: vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1398: simple_lock(&vnode_free_list_slock);
1399: TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1400: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1401: simple_unlock(&vnode_free_list_slock);
1402: }
1.81 ross 1403: }
1404:
1405: /*
1406: * Vnode reference.
1407: */
1408: void
1.247 thorpej 1409: vref(struct vnode *vp)
1.81 ross 1410: {
1411:
1412: simple_lock(&vp->v_interlock);
1413: if (vp->v_usecount <= 0)
1.125 chs 1414: panic("vref used where vget required, vp %p", vp);
1.81 ross 1415: vp->v_usecount++;
1.112 mycroft 1416: #ifdef DIAGNOSTIC
1417: if (vp->v_usecount == 0) {
1418: vprint("vref", vp);
1.125 chs 1419: panic("vref: usecount overflow, vp %p", vp);
1.112 mycroft 1420: }
1421: #endif
1.80 fvdl 1422: simple_unlock(&vp->v_interlock);
1.29 cgd 1423: }
1.80 fvdl 1424: #endif /* DIAGNOSTIC */
1.29 cgd 1425:
1426: /*
1427: * Remove any vnodes in the vnode table belonging to mount point mp.
1428: *
1.183 yamt 1429: * If FORCECLOSE is not specified, there should not be any active ones,
1.29 cgd 1430: * return error if any are found (nb: this is a user error, not a
1.183 yamt 1431: * system error). If FORCECLOSE is specified, detach any active vnodes
1.29 cgd 1432: * that are found.
1.183 yamt 1433: *
1434: * If WRITECLOSE is set, only flush out regular file vnodes open for
1435: * writing.
1436: *
1437: * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29 cgd 1438: */
1.30 mycroft 1439: #ifdef DEBUG
1440: int busyprt = 0; /* print out busy vnodes */
1441: struct ctldebug debug1 = { "busyprt", &busyprt };
1442: #endif
1.29 cgd 1443:
1.50 christos 1444: int
1.247 thorpej 1445: vflush(struct mount *mp, struct vnode *skipvp, int flags)
1.29 cgd 1446: {
1.201 fvdl 1447: struct proc *p = curproc; /* XXX */
1.123 augustss 1448: struct vnode *vp, *nvp;
1.29 cgd 1449: int busy = 0;
1450:
1.80 fvdl 1451: simple_lock(&mntvnode_slock);
1.29 cgd 1452: loop:
1.177 matt 1453: for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1.29 cgd 1454: if (vp->v_mount != mp)
1455: goto loop;
1.177 matt 1456: nvp = LIST_NEXT(vp, v_mntvnodes);
1.29 cgd 1457: /*
1458: * Skip over a selected vnode.
1459: */
1460: if (vp == skipvp)
1461: continue;
1.80 fvdl 1462: simple_lock(&vp->v_interlock);
1.29 cgd 1463: /*
1464: * Skip over a vnodes marked VSYSTEM.
1465: */
1.80 fvdl 1466: if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1467: simple_unlock(&vp->v_interlock);
1.29 cgd 1468: continue;
1.80 fvdl 1469: }
1.29 cgd 1470: /*
1.30 mycroft 1471: * If WRITECLOSE is set, only flush out regular file
1472: * vnodes open for writing.
1473: */
1474: if ((flags & WRITECLOSE) &&
1.92 thorpej 1475: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1476: simple_unlock(&vp->v_interlock);
1.30 mycroft 1477: continue;
1.92 thorpej 1478: }
1.30 mycroft 1479: /*
1.29 cgd 1480: * With v_usecount == 0, all we need to do is clear
1481: * out the vnode data structures and we are done.
1482: */
1483: if (vp->v_usecount == 0) {
1.80 fvdl 1484: simple_unlock(&mntvnode_slock);
1.201 fvdl 1485: vgonel(vp, p);
1.80 fvdl 1486: simple_lock(&mntvnode_slock);
1.29 cgd 1487: continue;
1488: }
1489: /*
1.30 mycroft 1490: * If FORCECLOSE is set, forcibly close the vnode.
1.29 cgd 1491: * For block or character devices, revert to an
1492: * anonymous device. For all other files, just kill them.
1493: */
1494: if (flags & FORCECLOSE) {
1.80 fvdl 1495: simple_unlock(&mntvnode_slock);
1.29 cgd 1496: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.201 fvdl 1497: vgonel(vp, p);
1.29 cgd 1498: } else {
1.201 fvdl 1499: vclean(vp, 0, p);
1.30 mycroft 1500: vp->v_op = spec_vnodeop_p;
1.29 cgd 1501: insmntque(vp, (struct mount *)0);
1502: }
1.80 fvdl 1503: simple_lock(&mntvnode_slock);
1.29 cgd 1504: continue;
1505: }
1.30 mycroft 1506: #ifdef DEBUG
1.29 cgd 1507: if (busyprt)
1508: vprint("vflush: busy vnode", vp);
1.30 mycroft 1509: #endif
1.80 fvdl 1510: simple_unlock(&vp->v_interlock);
1.29 cgd 1511: busy++;
1512: }
1.80 fvdl 1513: simple_unlock(&mntvnode_slock);
1.29 cgd 1514: if (busy)
1515: return (EBUSY);
1516: return (0);
1517: }
1518:
1519: /*
1520: * Disassociate the underlying file system from a vnode.
1521: */
1522: void
1.247 thorpej 1523: vclean(struct vnode *vp, int flags, struct proc *p)
1.29 cgd 1524: {
1.208 hannken 1525: struct mount *mp;
1.175 perseant 1526: int active;
1.29 cgd 1527:
1.166 chs 1528: LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1529:
1.29 cgd 1530: /*
1531: * Check to see if the vnode is in use.
1532: * If so we have to reference it before we clean it out
1533: * so that its count cannot fall to zero and generate a
1534: * race against ourselves to recycle it.
1535: */
1.166 chs 1536:
1.112 mycroft 1537: if ((active = vp->v_usecount) != 0) {
1.87 pk 1538: vp->v_usecount++;
1.112 mycroft 1539: #ifdef DIAGNOSTIC
1540: if (vp->v_usecount == 0) {
1541: vprint("vclean", vp);
1542: panic("vclean: usecount overflow");
1543: }
1544: #endif
1545: }
1.87 pk 1546:
1.29 cgd 1547: /*
1548: * Prevent the vnode from being recycled or
1549: * brought into use while we clean it out.
1550: */
1551: if (vp->v_flag & VXLOCK)
1.125 chs 1552: panic("vclean: deadlock, vp %p", vp);
1.29 cgd 1553: vp->v_flag |= VXLOCK;
1.161 thorpej 1554: if (vp->v_flag & VEXECMAP) {
1.167 chs 1555: uvmexp.execpages -= vp->v_uobj.uo_npages;
1556: uvmexp.filepages += vp->v_uobj.uo_npages;
1.147 chs 1557: }
1.161 thorpej 1558: vp->v_flag &= ~(VTEXT|VEXECMAP);
1.142 chs 1559:
1.29 cgd 1560: /*
1.80 fvdl 1561: * Even if the count is zero, the VOP_INACTIVE routine may still
1562: * have the object locked while it cleans it out. The VOP_LOCK
1563: * ensures that the VOP_INACTIVE routine is done with its work.
1564: * For active vnodes, it ensures that no other activity can
1565: * occur while the underlying object is being cleaned out.
1566: */
1567: VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1568:
1.98 wrstuden 1569: /*
1.142 chs 1570: * Clean out any cached data associated with the vnode.
1.231 mycroft 1571: * If special device, remove it from special device alias list.
1572: * if it is on one.
1.29 cgd 1573: */
1.166 chs 1574: if (flags & DOCLOSE) {
1.211 dbj 1575: int error;
1.231 mycroft 1576: struct vnode *vq, *vx;
1577:
1.208 hannken 1578: vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1.211 dbj 1579: error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1.208 hannken 1580: vn_finished_write(mp, V_LOWER);
1.211 dbj 1581: if (error)
1582: error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
1583: KASSERT(error == 0);
1.166 chs 1584: KASSERT((vp->v_flag & VONWORKLST) == 0);
1.231 mycroft 1585:
1586: if (active)
1587: VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1588:
1589: if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1590: vp->v_specinfo != 0) {
1591: simple_lock(&spechash_slock);
1592: if (vp->v_hashchain != NULL) {
1593: if (*vp->v_hashchain == vp) {
1594: *vp->v_hashchain = vp->v_specnext;
1595: } else {
1596: for (vq = *vp->v_hashchain; vq;
1597: vq = vq->v_specnext) {
1598: if (vq->v_specnext != vp)
1599: continue;
1600: vq->v_specnext = vp->v_specnext;
1601: break;
1602: }
1603: if (vq == NULL)
1604: panic("missing bdev");
1605: }
1606: if (vp->v_flag & VALIASED) {
1607: vx = NULL;
1608: for (vq = *vp->v_hashchain; vq;
1609: vq = vq->v_specnext) {
1610: if (vq->v_rdev != vp->v_rdev ||
1611: vq->v_type != vp->v_type)
1612: continue;
1613: if (vx)
1614: break;
1615: vx = vq;
1616: }
1617: if (vx == NULL)
1618: panic("missing alias");
1619: if (vq == NULL)
1620: vx->v_flag &= ~VALIASED;
1621: vp->v_flag &= ~VALIASED;
1622: }
1623: }
1624: simple_unlock(&spechash_slock);
1625: FREE(vp->v_specinfo, M_VNODE);
1626: vp->v_specinfo = NULL;
1627: }
1.166 chs 1628: }
1629: LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1.80 fvdl 1630:
1.29 cgd 1631: /*
1.30 mycroft 1632: * If purging an active vnode, it must be closed and
1.80 fvdl 1633: * deactivated before being reclaimed. Note that the
1634: * VOP_INACTIVE will unlock the vnode.
1.29 cgd 1635: */
1636: if (active) {
1.201 fvdl 1637: VOP_INACTIVE(vp, p);
1.80 fvdl 1638: } else {
1639: /*
1640: * Any other processes trying to obtain this lock must first
1641: * wait for VXLOCK to clear, then call the new lock operation.
1642: */
1643: VOP_UNLOCK(vp, 0);
1.29 cgd 1644: }
1645: /*
1646: * Reclaim the vnode.
1647: */
1.201 fvdl 1648: if (VOP_RECLAIM(vp, p))
1.125 chs 1649: panic("vclean: cannot reclaim, vp %p", vp);
1.87 pk 1650: if (active) {
1651: /*
1652: * Inline copy of vrele() since VOP_INACTIVE
1653: * has already been called.
1654: */
1655: simple_lock(&vp->v_interlock);
1656: if (--vp->v_usecount <= 0) {
1657: #ifdef DIAGNOSTIC
1658: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1659: vprint("vclean: bad ref count", vp);
1660: panic("vclean: ref cnt");
1661: }
1662: #endif
1663: /*
1664: * Insert at tail of LRU list.
1665: */
1.142 chs 1666:
1.113 fvdl 1667: simple_unlock(&vp->v_interlock);
1.87 pk 1668: simple_lock(&vnode_free_list_slock);
1.104 wrstuden 1669: #ifdef DIAGNOSTIC
1.113 fvdl 1670: if (vp->v_holdcnt > 0)
1.125 chs 1671: panic("vclean: not clean, vp %p", vp);
1.104 wrstuden 1672: #endif
1.87 pk 1673: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1674: simple_unlock(&vnode_free_list_slock);
1.113 fvdl 1675: } else
1676: simple_unlock(&vp->v_interlock);
1.87 pk 1677: }
1.30 mycroft 1678:
1.169 chs 1679: KASSERT(vp->v_uobj.uo_npages == 0);
1.80 fvdl 1680: cache_purge(vp);
1681:
1.29 cgd 1682: /*
1.30 mycroft 1683: * Done with purge, notify sleepers of the grim news.
1.29 cgd 1684: */
1.30 mycroft 1685: vp->v_op = dead_vnodeop_p;
1686: vp->v_tag = VT_NON;
1.139 enami 1687: simple_lock(&vp->v_interlock);
1.181 jdolecek 1688: VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */
1.234 thorpej 1689: vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1.29 cgd 1690: if (vp->v_flag & VXWANT) {
1691: vp->v_flag &= ~VXWANT;
1.139 enami 1692: simple_unlock(&vp->v_interlock);
1.29 cgd 1693: wakeup((caddr_t)vp);
1.139 enami 1694: } else
1695: simple_unlock(&vp->v_interlock);
1.29 cgd 1696: }
1697:
1698: /*
1.80 fvdl 1699: * Recycle an unused vnode to the front of the free list.
1700: * Release the passed interlock if the vnode will be recycled.
1.29 cgd 1701: */
1.80 fvdl 1702: int
1.247 thorpej 1703: vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct proc *p)
1.217 junyoung 1704: {
1705:
1.80 fvdl 1706: simple_lock(&vp->v_interlock);
1707: if (vp->v_usecount == 0) {
1708: if (inter_lkp)
1709: simple_unlock(inter_lkp);
1.201 fvdl 1710: vgonel(vp, p);
1.80 fvdl 1711: return (1);
1.29 cgd 1712: }
1.80 fvdl 1713: simple_unlock(&vp->v_interlock);
1714: return (0);
1.29 cgd 1715: }
1716:
1717: /*
1718: * Eliminate all activity associated with a vnode
1719: * in preparation for reuse.
1720: */
1721: void
1.247 thorpej 1722: vgone(struct vnode *vp)
1.80 fvdl 1723: {
1.201 fvdl 1724: struct proc *p = curproc; /* XXX */
1.80 fvdl 1725:
1726: simple_lock(&vp->v_interlock);
1.201 fvdl 1727: vgonel(vp, p);
1.80 fvdl 1728: }
1729:
1730: /*
1731: * vgone, with the vp interlock held.
1732: */
1733: void
1.247 thorpej 1734: vgonel(struct vnode *vp, struct proc *p)
1.29 cgd 1735: {
1736:
1.166 chs 1737: LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1738:
1.29 cgd 1739: /*
1740: * If a vgone (or vclean) is already in progress,
1741: * wait until it is done and return.
1742: */
1.166 chs 1743:
1.29 cgd 1744: if (vp->v_flag & VXLOCK) {
1745: vp->v_flag |= VXWANT;
1.166 chs 1746: ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1.29 cgd 1747: return;
1748: }
1.166 chs 1749:
1.29 cgd 1750: /*
1751: * Clean out the filesystem specific data.
1752: */
1.166 chs 1753:
1.201 fvdl 1754: vclean(vp, DOCLOSE, p);
1.166 chs 1755: KASSERT((vp->v_flag & VONWORKLST) == 0);
1756:
1.29 cgd 1757: /*
1758: * Delete from old mount point vnode list, if on one.
1759: */
1.166 chs 1760:
1.80 fvdl 1761: if (vp->v_mount != NULL)
1762: insmntque(vp, (struct mount *)0);
1.166 chs 1763:
1.29 cgd 1764: /*
1.202 yamt 1765: * The test of the back pointer and the reference count of
1766: * zero is because it will be removed from the free list by
1767: * getcleanvnode, but will not have its reference count
1768: * incremented until after calling vgone. If the reference
1769: * count were incremented first, vgone would (incorrectly)
1770: * try to close the previous instance of the underlying object.
1.30 mycroft 1771: * So, the back pointer is explicitly set to `0xdeadb' in
1772: * getnewvnode after removing it from the freelist to ensure
1773: * that we do not try to move it here.
1.29 cgd 1774: */
1.166 chs 1775:
1.202 yamt 1776: vp->v_type = VBAD;
1.80 fvdl 1777: if (vp->v_usecount == 0) {
1.202 yamt 1778: boolean_t dofree;
1779:
1.80 fvdl 1780: simple_lock(&vnode_free_list_slock);
1.113 fvdl 1781: if (vp->v_holdcnt > 0)
1.125 chs 1782: panic("vgonel: not clean, vp %p", vp);
1.202 yamt 1783: /*
1784: * if it isn't on the freelist, we're called by getcleanvnode
1785: * and vnode is being re-used. otherwise, we'll free it.
1786: */
1787: dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
1788: if (dofree) {
1.80 fvdl 1789: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.202 yamt 1790: numvnodes--;
1.80 fvdl 1791: }
1792: simple_unlock(&vnode_free_list_slock);
1.202 yamt 1793: if (dofree)
1794: pool_put(&vnode_pool, vp);
1.29 cgd 1795: }
1796: }
1797:
1798: /*
1799: * Lookup a vnode by device number.
1800: */
1.50 christos 1801: int
1.247 thorpej 1802: vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1.29 cgd 1803: {
1.80 fvdl 1804: struct vnode *vp;
1805: int rc = 0;
1.29 cgd 1806:
1.80 fvdl 1807: simple_lock(&spechash_slock);
1.29 cgd 1808: for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1809: if (dev != vp->v_rdev || type != vp->v_type)
1810: continue;
1811: *vpp = vp;
1.80 fvdl 1812: rc = 1;
1813: break;
1.29 cgd 1814: }
1.80 fvdl 1815: simple_unlock(&spechash_slock);
1816: return (rc);
1.96 thorpej 1817: }
1818:
1819: /*
1820: * Revoke all the vnodes corresponding to the specified minor number
1821: * range (endpoints inclusive) of the specified major.
1822: */
1823: void
1.247 thorpej 1824: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 1825: {
1826: struct vnode *vp;
1827: int mn;
1828:
1829: for (mn = minl; mn <= minh; mn++)
1830: if (vfinddev(makedev(maj, mn), type, &vp))
1831: VOP_REVOKE(vp, REVOKEALL);
1.29 cgd 1832: }
1833:
1834: /*
1835: * Calculate the total number of references to a special device.
1836: */
1.30 mycroft 1837: int
1.247 thorpej 1838: vcount(struct vnode *vp)
1.29 cgd 1839: {
1.123 augustss 1840: struct vnode *vq, *vnext;
1.29 cgd 1841: int count;
1842:
1843: loop:
1844: if ((vp->v_flag & VALIASED) == 0)
1845: return (vp->v_usecount);
1.80 fvdl 1846: simple_lock(&spechash_slock);
1.30 mycroft 1847: for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1848: vnext = vq->v_specnext;
1.29 cgd 1849: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1850: continue;
1851: /*
1852: * Alias, but not in use, so flush it out.
1853: */
1.151 wrstuden 1854: if (vq->v_usecount == 0 && vq != vp &&
1855: (vq->v_flag & VXLOCK) == 0) {
1.80 fvdl 1856: simple_unlock(&spechash_slock);
1.29 cgd 1857: vgone(vq);
1858: goto loop;
1859: }
1860: count += vq->v_usecount;
1861: }
1.80 fvdl 1862: simple_unlock(&spechash_slock);
1.29 cgd 1863: return (count);
1864: }
1865:
1.237 christos 1866: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
1867: #define ARRAY_PRINT(idx, arr) \
1868: ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1869:
1870: const char * const vnode_tags[] = { VNODE_TAGS };
1871: const char * const vnode_types[] = { VNODE_TYPES };
1872: const char vnode_flagbits[] = VNODE_FLAGBITS;
1873:
1.29 cgd 1874: /*
1875: * Print out a description of a vnode.
1876: */
1877: void
1.247 thorpej 1878: vprint(const char *label, struct vnode *vp)
1.29 cgd 1879: {
1.245 christos 1880: char bf[96];
1.29 cgd 1881:
1882: if (label != NULL)
1.57 christos 1883: printf("%s: ", label);
1.237 christos 1884: printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, "
1885: "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
1886: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1.190 jdolecek 1887: vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1.245 christos 1888: bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
1889: if (bf[0] != '\0')
1890: printf(" flags (%s)", &bf[1]);
1.30 mycroft 1891: if (vp->v_data == NULL) {
1.57 christos 1892: printf("\n");
1.30 mycroft 1893: } else {
1.57 christos 1894: printf("\n\t");
1.30 mycroft 1895: VOP_PRINT(vp);
1896: }
1.29 cgd 1897: }
1898:
1899: #ifdef DEBUG
1900: /*
1901: * List all of the locked vnodes in the system.
1902: * Called when debugging the kernel.
1903: */
1.51 christos 1904: void
1.247 thorpej 1905: printlockedvnodes(void)
1.29 cgd 1906: {
1.80 fvdl 1907: struct mount *mp, *nmp;
1908: struct vnode *vp;
1.29 cgd 1909:
1.57 christos 1910: printf("Locked vnodes\n");
1.80 fvdl 1911: simple_lock(&mountlist_slock);
1.177 matt 1912: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1913: mp = nmp) {
1.80 fvdl 1914: if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177 matt 1915: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80 fvdl 1916: continue;
1917: }
1.158 chs 1918: LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1.29 cgd 1919: if (VOP_ISLOCKED(vp))
1.158 chs 1920: vprint(NULL, vp);
1.80 fvdl 1921: }
1922: simple_lock(&mountlist_slock);
1.177 matt 1923: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80 fvdl 1924: vfs_unbusy(mp);
1.29 cgd 1925: }
1.80 fvdl 1926: simple_unlock(&mountlist_slock);
1.29 cgd 1927: }
1928: #endif
1929:
1.101 mrg 1930: /*
1.212 atatat 1931: * sysctl helper routine for vfs.generic.conf lookups.
1932: */
1933: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1934: static int
1935: sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
1936: {
1937: struct vfsconf vfc;
1938: extern const char * const mountcompatnames[];
1939: extern int nmountcompatnames;
1940: struct sysctlnode node;
1941: struct vfsops *vfsp;
1942: u_int vfsnum;
1943:
1944: if (namelen != 1)
1945: return (ENOTDIR);
1946: vfsnum = name[0];
1947: if (vfsnum >= nmountcompatnames ||
1948: mountcompatnames[vfsnum] == NULL)
1949: return (EOPNOTSUPP);
1950: vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
1951: if (vfsp == NULL)
1952: return (EOPNOTSUPP);
1953:
1954: vfc.vfc_vfsops = vfsp;
1955: strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
1956: vfc.vfc_typenum = vfsnum;
1957: vfc.vfc_refcount = vfsp->vfs_refcount;
1958: vfc.vfc_flags = 0;
1959: vfc.vfc_mountroot = vfsp->vfs_mountroot;
1960: vfc.vfc_next = NULL;
1961:
1962: node = *rnode;
1963: node.sysctl_data = &vfc;
1964: return (sysctl_lookup(SYSCTLFN_CALL(&node)));
1965: }
1966: #endif
1967:
1968: /*
1.220 lukem 1969: * sysctl helper routine to return list of supported fstypes
1970: */
1971: static int
1972: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
1973: {
1.245 christos 1974: char bf[MFSNAMELEN];
1.220 lukem 1975: char *where = oldp;
1976: struct vfsops *v;
1977: size_t needed, left, slen;
1978: int error, first;
1979:
1980: if (newp != NULL)
1981: return (EPERM);
1982: if (namelen != 0)
1983: return (EINVAL);
1984:
1985: first = 1;
1986: error = 0;
1987: needed = 0;
1988: left = *oldlenp;
1989:
1990: LIST_FOREACH(v, &vfs_list, vfs_list) {
1991: if (where == NULL)
1992: needed += strlen(v->vfs_name) + 1;
1993: else {
1.245 christos 1994: memset(bf, 0, sizeof(bf));
1.220 lukem 1995: if (first) {
1.245 christos 1996: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 1997: first = 0;
1998: } else {
1.245 christos 1999: bf[0] = ' ';
2000: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 2001: }
1.245 christos 2002: bf[sizeof(bf)-1] = '\0';
2003: slen = strlen(bf);
1.220 lukem 2004: if (left < slen + 1)
2005: break;
2006: /* +1 to copy out the trailing NUL byte */
1.245 christos 2007: error = copyout(bf, where, slen + 1);
1.220 lukem 2008: if (error)
2009: break;
2010: where += slen;
2011: needed += slen;
2012: left -= slen;
2013: }
2014: }
2015: *oldlenp = needed;
2016: return (error);
2017: }
2018:
2019: /*
1.80 fvdl 2020: * Top level filesystem related information gathering.
2021: */
1.212 atatat 2022: SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1.80 fvdl 2023: {
1.95 thorpej 2024: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.154 jdolecek 2025: extern int nmountcompatnames;
1.95 thorpej 2026: #endif
1.80 fvdl 2027:
1.218 atatat 2028: sysctl_createv(clog, 0, NULL, NULL,
2029: CTLFLAG_PERMANENT,
1.212 atatat 2030: CTLTYPE_NODE, "vfs", NULL,
2031: NULL, 0, NULL, 0,
2032: CTL_VFS, CTL_EOL);
1.218 atatat 2033: sysctl_createv(clog, 0, NULL, NULL,
2034: CTLFLAG_PERMANENT,
1.226 atatat 2035: CTLTYPE_NODE, "generic",
2036: SYSCTL_DESCR("Non-specific vfs related information"),
1.212 atatat 2037: NULL, 0, NULL, 0,
2038: CTL_VFS, VFS_GENERIC, CTL_EOL);
2039:
2040: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.218 atatat 2041: sysctl_createv(clog, 0, NULL, NULL,
2042: CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
1.226 atatat 2043: CTLTYPE_INT, "maxtypenum",
2044: SYSCTL_DESCR("Highest valid filesystem type number"),
1.212 atatat 2045: NULL, nmountcompatnames, NULL, 0,
2046: CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
2047: #endif
1.218 atatat 2048: sysctl_createv(clog, 0, NULL, NULL,
2049: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.226 atatat 2050: CTLTYPE_INT, "usermount",
2051: SYSCTL_DESCR("Whether unprivileged users may mount "
2052: "filesystems"),
1.212 atatat 2053: NULL, 0, &dovfsusermount, 0,
2054: CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1.220 lukem 2055: sysctl_createv(clog, 0, NULL, NULL,
2056: CTLFLAG_PERMANENT,
2057: CTLTYPE_STRING, "fstypes",
2058: SYSCTL_DESCR("List of file systems present"),
2059: sysctl_vfs_generic_fstypes, 0, NULL, 0,
2060: CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1.95 thorpej 2061: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.218 atatat 2062: sysctl_createv(clog, 0, NULL, NULL,
2063: CTLFLAG_PERMANENT,
1.226 atatat 2064: CTLTYPE_STRUCT, "conf",
2065: SYSCTL_DESCR("Filesystem configuration information"),
1.212 atatat 2066: sysctl_vfs_generic_conf, 0, NULL,
2067: sizeof(struct vfsconf),
2068: CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
1.95 thorpej 2069: #endif
1.80 fvdl 2070: }
2071:
1.212 atatat 2072:
1.29 cgd 2073: int kinfo_vdebug = 1;
2074: int kinfo_vgetfailed;
2075: #define KINFO_VNODESLOP 10
2076: /*
2077: * Dump vnode list (via sysctl).
2078: * Copyout address of vnode followed by vnode.
2079: */
2080: /* ARGSUSED */
1.50 christos 2081: int
1.212 atatat 2082: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29 cgd 2083: {
1.212 atatat 2084: char *where = oldp;
2085: size_t *sizep = oldlenp;
1.80 fvdl 2086: struct mount *mp, *nmp;
2087: struct vnode *nvp, *vp;
2088: char *bp = where, *savebp;
1.29 cgd 2089: char *ewhere;
2090: int error;
1.212 atatat 2091:
2092: if (namelen != 0)
2093: return (EOPNOTSUPP);
2094: if (newp != NULL)
2095: return (EPERM);
1.29 cgd 2096:
1.90 perry 2097: #define VPTRSZ sizeof(struct vnode *)
2098: #define VNODESZ sizeof(struct vnode)
1.29 cgd 2099: if (where == NULL) {
2100: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2101: return (0);
2102: }
2103: ewhere = where + *sizep;
1.80 fvdl 2104:
2105: simple_lock(&mountlist_slock);
1.177 matt 2106: for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2107: mp = nmp) {
1.80 fvdl 2108: if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177 matt 2109: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29 cgd 2110: continue;
1.80 fvdl 2111: }
1.29 cgd 2112: savebp = bp;
2113: again:
1.80 fvdl 2114: simple_lock(&mntvnode_slock);
1.177 matt 2115: for (vp = LIST_FIRST(&mp->mnt_vnodelist);
1.29 cgd 2116: vp != NULL;
1.80 fvdl 2117: vp = nvp) {
1.29 cgd 2118: /*
2119: * Check that the vp is still associated with
2120: * this filesystem. RACE: could have been
2121: * recycled onto the same filesystem.
2122: */
2123: if (vp->v_mount != mp) {
1.80 fvdl 2124: simple_unlock(&mntvnode_slock);
1.29 cgd 2125: if (kinfo_vdebug)
1.57 christos 2126: printf("kinfo: vp changed\n");
1.29 cgd 2127: bp = savebp;
2128: goto again;
2129: }
1.177 matt 2130: nvp = LIST_NEXT(vp, v_mntvnodes);
1.29 cgd 2131: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80 fvdl 2132: simple_unlock(&mntvnode_slock);
1.29 cgd 2133: *sizep = bp - where;
2134: return (ENOMEM);
2135: }
1.80 fvdl 2136: simple_unlock(&mntvnode_slock);
1.29 cgd 2137: if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2138: (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2139: return (error);
2140: bp += VPTRSZ + VNODESZ;
1.80 fvdl 2141: simple_lock(&mntvnode_slock);
1.29 cgd 2142: }
1.80 fvdl 2143: simple_unlock(&mntvnode_slock);
2144: simple_lock(&mountlist_slock);
1.177 matt 2145: nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29 cgd 2146: vfs_unbusy(mp);
2147: }
1.80 fvdl 2148: simple_unlock(&mountlist_slock);
1.29 cgd 2149:
2150: *sizep = bp - where;
2151: return (0);
1.30 mycroft 2152: }
2153:
2154: /*
2155: * Check to see if a filesystem is mounted on a block device.
2156: */
2157: int
1.247 thorpej 2158: vfs_mountedon(struct vnode *vp)
1.30 mycroft 2159: {
1.80 fvdl 2160: struct vnode *vq;
2161: int error = 0;
1.30 mycroft 2162:
1.113 fvdl 2163: if (vp->v_specmountpoint != NULL)
1.30 mycroft 2164: return (EBUSY);
2165: if (vp->v_flag & VALIASED) {
1.80 fvdl 2166: simple_lock(&spechash_slock);
1.30 mycroft 2167: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2168: if (vq->v_rdev != vp->v_rdev ||
2169: vq->v_type != vp->v_type)
2170: continue;
1.113 fvdl 2171: if (vq->v_specmountpoint != NULL) {
1.80 fvdl 2172: error = EBUSY;
2173: break;
2174: }
1.30 mycroft 2175: }
1.80 fvdl 2176: simple_unlock(&spechash_slock);
1.30 mycroft 2177: }
1.80 fvdl 2178: return (error);
1.30 mycroft 2179: }
2180:
1.195 christos 2181: static int
2182: sacheck(struct sockaddr *sa)
2183: {
2184: switch (sa->sa_family) {
2185: #ifdef INET
2186: case AF_INET: {
2187: struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2188: char *p = (char *)sin->sin_zero;
2189: size_t i;
2190:
2191: if (sin->sin_len != sizeof(*sin))
2192: return -1;
2193: if (sin->sin_port != 0)
2194: return -1;
2195: for (i = 0; i < sizeof(sin->sin_zero); i++)
2196: if (*p++ != '\0')
2197: return -1;
2198: return 0;
2199: }
2200: #endif
2201: #ifdef INET6
2202: case AF_INET6: {
2203: struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2204:
2205: if (sin6->sin6_len != sizeof(*sin6))
2206: return -1;
2207: if (sin6->sin6_port != 0)
2208: return -1;
2209: return 0;
2210: }
2211: #endif
2212: default:
2213: return -1;
2214: }
2215: }
2216:
1.30 mycroft 2217: /*
2218: * Build hash lists of net addresses and hang them off the mount point.
2219: * Called by ufs_mount() to set up the lists of export addresses.
2220: */
2221: static int
1.247 thorpej 2222: vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
2223: struct export_args *argp)
1.30 mycroft 2224: {
1.123 augustss 2225: struct netcred *np, *enp;
2226: struct radix_node_head *rnh;
2227: int i;
1.30 mycroft 2228: struct sockaddr *saddr, *smask = 0;
2229: struct domain *dom;
2230: int error;
2231:
2232: if (argp->ex_addrlen == 0) {
2233: if (mp->mnt_flag & MNT_DEFEXPORTED)
2234: return (EPERM);
2235: np = &nep->ne_defexported;
2236: np->netc_exflags = argp->ex_flags;
1.163 christos 2237: crcvt(&np->netc_anon, &argp->ex_anon);
1.30 mycroft 2238: np->netc_anon.cr_ref = 1;
2239: mp->mnt_flag |= MNT_DEFEXPORTED;
2240: return (0);
2241: }
1.156 jdolecek 2242:
1.195 christos 2243: if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
1.156 jdolecek 2244: return (EINVAL);
2245:
1.30 mycroft 2246: i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2247: np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1.91 perry 2248: memset((caddr_t)np, 0, i);
1.30 mycroft 2249: saddr = (struct sockaddr *)(np + 1);
1.50 christos 2250: error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
2251: if (error)
1.30 mycroft 2252: goto out;
2253: if (saddr->sa_len > argp->ex_addrlen)
2254: saddr->sa_len = argp->ex_addrlen;
1.195 christos 2255: if (sacheck(saddr) == -1)
2256: return EINVAL;
1.30 mycroft 2257: if (argp->ex_masklen) {
2258: smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1.66 mycroft 2259: error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1.30 mycroft 2260: if (error)
2261: goto out;
2262: if (smask->sa_len > argp->ex_masklen)
2263: smask->sa_len = argp->ex_masklen;
1.195 christos 2264: if (smask->sa_family != saddr->sa_family)
2265: return EINVAL;
2266: if (sacheck(smask) == -1)
2267: return EINVAL;
1.30 mycroft 2268: }
2269: i = saddr->sa_family;
2270: if ((rnh = nep->ne_rtable[i]) == 0) {
2271: /*
2272: * Seems silly to initialize every AF when most are not
2273: * used, do so on demand here
2274: */
1.241 matt 2275: DOMAIN_FOREACH(dom) {
1.30 mycroft 2276: if (dom->dom_family == i && dom->dom_rtattach) {
2277: dom->dom_rtattach((void **)&nep->ne_rtable[i],
2278: dom->dom_rtoffset);
2279: break;
2280: }
1.241 matt 2281: }
1.30 mycroft 2282: if ((rnh = nep->ne_rtable[i]) == 0) {
2283: error = ENOBUFS;
2284: goto out;
2285: }
2286: }
1.195 christos 2287:
2288: enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
2289: np->netc_rnodes);
2290: if (enp != np) {
2291: if (enp == NULL) {
1.72 fvdl 2292: enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
1.195 christos 2293: smask, rnh);
2294: if (enp == NULL) {
1.72 fvdl 2295: error = EPERM;
2296: goto out;
2297: }
1.195 christos 2298: } else
2299: enp->netc_refcnt++;
2300:
2301: goto check;
2302: } else
2303: enp->netc_refcnt = 1;
1.72 fvdl 2304:
1.30 mycroft 2305: np->netc_exflags = argp->ex_flags;
1.163 christos 2306: crcvt(&np->netc_anon, &argp->ex_anon);
1.30 mycroft 2307: np->netc_anon.cr_ref = 1;
1.195 christos 2308: return 0;
2309: check:
2310: if (enp->netc_exflags != argp->ex_flags ||
2311: crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
2312: error = EPERM;
2313: else
2314: error = 0;
1.30 mycroft 2315: out:
2316: free(np, M_NETADDR);
1.195 christos 2317: return error;
1.30 mycroft 2318: }
2319:
2320: /* ARGSUSED */
2321: static int
1.247 thorpej 2322: vfs_free_netcred(struct radix_node *rn, void *w)
1.30 mycroft 2323: {
1.123 augustss 2324: struct radix_node_head *rnh = (struct radix_node_head *)w;
1.195 christos 2325: struct netcred *np = (struct netcred *)(void *)rn;
1.30 mycroft 2326:
2327: (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1.195 christos 2328: if (--(np->netc_refcnt) <= 0)
2329: free(np, M_NETADDR);
1.30 mycroft 2330: return (0);
2331: }
2332:
2333: /*
2334: * Free the net address hash lists that are hanging off the mount points.
2335: */
2336: static void
1.247 thorpej 2337: vfs_free_addrlist(struct netexport *nep)
1.30 mycroft 2338: {
1.123 augustss 2339: int i;
2340: struct radix_node_head *rnh;
1.30 mycroft 2341:
2342: for (i = 0; i <= AF_MAX; i++)
1.50 christos 2343: if ((rnh = nep->ne_rtable[i]) != NULL) {
2344: (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1.30 mycroft 2345: free((caddr_t)rnh, M_RTABLE);
2346: nep->ne_rtable[i] = 0;
2347: }
2348: }
2349:
2350: int
1.247 thorpej 2351: vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
1.30 mycroft 2352: {
2353: int error;
2354:
2355: if (argp->ex_flags & MNT_DELEXPORT) {
1.71 fvdl 2356: if (mp->mnt_flag & MNT_EXPUBLIC) {
2357: vfs_setpublicfs(NULL, NULL, NULL);
2358: mp->mnt_flag &= ~MNT_EXPUBLIC;
2359: }
1.30 mycroft 2360: vfs_free_addrlist(nep);
2361: mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2362: }
2363: if (argp->ex_flags & MNT_EXPORTED) {
1.71 fvdl 2364: if (argp->ex_flags & MNT_EXPUBLIC) {
2365: if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2366: return (error);
2367: mp->mnt_flag |= MNT_EXPUBLIC;
2368: }
1.50 christos 2369: if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1.30 mycroft 2370: return (error);
2371: mp->mnt_flag |= MNT_EXPORTED;
2372: }
1.71 fvdl 2373: return (0);
2374: }
2375:
2376: /*
2377: * Set the publicly exported filesystem (WebNFS). Currently, only
2378: * one public filesystem is possible in the spec (RFC 2054 and 2055)
2379: */
2380: int
1.247 thorpej 2381: vfs_setpublicfs(struct mount *mp, struct netexport *nep,
2382: struct export_args *argp)
1.71 fvdl 2383: {
2384: int error;
2385: struct vnode *rvp;
2386: char *cp;
2387:
2388: /*
2389: * mp == NULL -> invalidate the current info, the FS is
2390: * no longer exported. May be called from either vfs_export
2391: * or unmount, so check if it hasn't already been done.
2392: */
2393: if (mp == NULL) {
2394: if (nfs_pub.np_valid) {
2395: nfs_pub.np_valid = 0;
2396: if (nfs_pub.np_index != NULL) {
2397: FREE(nfs_pub.np_index, M_TEMP);
2398: nfs_pub.np_index = NULL;
2399: }
2400: }
2401: return (0);
2402: }
2403:
2404: /*
2405: * Only one allowed at a time.
2406: */
2407: if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2408: return (EBUSY);
2409:
2410: /*
2411: * Get real filehandle for root of exported FS.
2412: */
1.91 perry 2413: memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
1.221 christos 2414: nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsidx;
1.71 fvdl 2415:
1.200 thorpej 2416: if ((error = VFS_ROOT(mp, &rvp)))
1.71 fvdl 2417: return (error);
2418:
2419: if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2420: return (error);
2421:
2422: vput(rvp);
2423:
2424: /*
2425: * If an indexfile was specified, pull it in.
2426: */
2427: if (argp->ex_indexfile != NULL) {
2428: MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2429: M_WAITOK);
2430: error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2431: MAXNAMLEN, (size_t *)0);
2432: if (!error) {
2433: /*
2434: * Check for illegal filenames.
2435: */
2436: for (cp = nfs_pub.np_index; *cp; cp++) {
2437: if (*cp == '/') {
2438: error = EINVAL;
2439: break;
2440: }
2441: }
2442: }
2443: if (error) {
2444: FREE(nfs_pub.np_index, M_TEMP);
2445: return (error);
2446: }
2447: }
2448:
2449: nfs_pub.np_mount = mp;
2450: nfs_pub.np_valid = 1;
1.30 mycroft 2451: return (0);
2452: }
2453:
2454: struct netcred *
1.247 thorpej 2455: vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
1.30 mycroft 2456: {
1.123 augustss 2457: struct netcred *np;
2458: struct radix_node_head *rnh;
1.30 mycroft 2459: struct sockaddr *saddr;
2460:
2461: np = NULL;
2462: if (mp->mnt_flag & MNT_EXPORTED) {
2463: /*
2464: * Lookup in the export list first.
2465: */
2466: if (nam != NULL) {
2467: saddr = mtod(nam, struct sockaddr *);
2468: rnh = nep->ne_rtable[saddr->sa_family];
2469: if (rnh != NULL) {
2470: np = (struct netcred *)
2471: (*rnh->rnh_matchaddr)((caddr_t)saddr,
2472: rnh);
2473: if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2474: np = NULL;
2475: }
2476: }
2477: /*
2478: * If no address match, use the default if it exists.
2479: */
2480: if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2481: np = &nep->ne_defexported;
2482: }
2483: return (np);
1.35 ws 2484: }
2485:
2486: /*
2487: * Do the usual access checking.
2488: * file_mode, uid and gid are from the vnode in question,
2489: * while acc_mode and cred are from the VOP_ACCESS parameter list
2490: */
1.41 mycroft 2491: int
1.247 thorpej 2492: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
2493: mode_t acc_mode, struct ucred *cred)
1.35 ws 2494: {
2495: mode_t mask;
1.217 junyoung 2496:
1.64 mycroft 2497: /*
2498: * Super-user always gets read/write access, but execute access depends
2499: * on at least one execute bit being set.
2500: */
2501: if (cred->cr_uid == 0) {
1.69 mycroft 2502: if ((acc_mode & VEXEC) && type != VDIR &&
1.68 mycroft 2503: (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1.64 mycroft 2504: return (EACCES);
1.68 mycroft 2505: return (0);
1.64 mycroft 2506: }
1.217 junyoung 2507:
1.35 ws 2508: mask = 0;
1.217 junyoung 2509:
1.35 ws 2510: /* Otherwise, check the owner. */
2511: if (cred->cr_uid == uid) {
1.68 mycroft 2512: if (acc_mode & VEXEC)
1.35 ws 2513: mask |= S_IXUSR;
2514: if (acc_mode & VREAD)
2515: mask |= S_IRUSR;
2516: if (acc_mode & VWRITE)
2517: mask |= S_IWUSR;
1.64 mycroft 2518: return ((file_mode & mask) == mask ? 0 : EACCES);
1.35 ws 2519: }
1.217 junyoung 2520:
1.35 ws 2521: /* Otherwise, check the groups. */
1.44 jtc 2522: if (cred->cr_gid == gid || groupmember(gid, cred)) {
1.68 mycroft 2523: if (acc_mode & VEXEC)
1.35 ws 2524: mask |= S_IXGRP;
2525: if (acc_mode & VREAD)
2526: mask |= S_IRGRP;
2527: if (acc_mode & VWRITE)
2528: mask |= S_IWGRP;
1.64 mycroft 2529: return ((file_mode & mask) == mask ? 0 : EACCES);
1.35 ws 2530: }
1.217 junyoung 2531:
1.35 ws 2532: /* Otherwise, check everyone else. */
1.68 mycroft 2533: if (acc_mode & VEXEC)
1.35 ws 2534: mask |= S_IXOTH;
2535: if (acc_mode & VREAD)
2536: mask |= S_IROTH;
2537: if (acc_mode & VWRITE)
2538: mask |= S_IWOTH;
1.64 mycroft 2539: return ((file_mode & mask) == mask ? 0 : EACCES);
1.39 mycroft 2540: }
2541:
2542: /*
2543: * Unmount all file systems.
2544: * We traverse the list in reverse order under the assumption that doing so
2545: * will avoid needing to worry about dependencies.
2546: */
2547: void
1.247 thorpej 2548: vfs_unmountall(struct proc *p)
1.39 mycroft 2549: {
1.123 augustss 2550: struct mount *mp, *nmp;
1.40 mycroft 2551: int allerror, error;
1.39 mycroft 2552:
1.235 lukem 2553: printf("unmounting file systems...");
1.39 mycroft 2554: for (allerror = 0,
2555: mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2556: nmp = mp->mnt_list.cqe_prev;
1.54 jtk 2557: #ifdef DEBUG
1.235 lukem 2558: printf("\nunmounting %s (%s)...",
1.56 christos 2559: mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54 jtk 2560: #endif
1.149 thorpej 2561: /*
2562: * XXX Freeze syncer. Must do this before locking the
2563: * mount point. See dounmount() for details.
2564: */
2565: lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
2566: if (vfs_busy(mp, 0, 0)) {
2567: lockmgr(&syncer_lock, LK_RELEASE, NULL);
1.60 fvdl 2568: continue;
1.149 thorpej 2569: }
1.201 fvdl 2570: if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
1.57 christos 2571: printf("unmount of %s failed with error %d\n",
1.40 mycroft 2572: mp->mnt_stat.f_mntonname, error);
2573: allerror = 1;
2574: }
1.39 mycroft 2575: }
1.235 lukem 2576: printf(" done\n");
1.39 mycroft 2577: if (allerror)
1.57 christos 2578: printf("WARNING: some file systems would not unmount\n");
1.40 mycroft 2579: }
2580:
1.205 yamt 2581: extern struct simplelock bqueue_slock; /* XXX */
2582:
1.40 mycroft 2583: /*
2584: * Sync and unmount file systems before shutting down.
2585: */
2586: void
1.247 thorpej 2587: vfs_shutdown(void)
1.40 mycroft 2588: {
1.184 thorpej 2589: struct lwp *l = curlwp;
1.185 christos 2590: struct proc *p;
1.40 mycroft 2591:
1.128 sommerfe 2592: /* XXX we're certainly not running in proc0's context! */
1.185 christos 2593: if (l == NULL || (p = l->l_proc) == NULL)
1.128 sommerfe 2594: p = &proc0;
1.185 christos 2595:
1.70 cgd 2596: printf("syncing disks... ");
2597:
1.138 bouyer 2598: /* remove user process from run queue */
2599: suspendsched();
1.40 mycroft 2600: (void) spl0();
2601:
1.128 sommerfe 2602: /* avoid coming back this way again if we panic. */
2603: doing_shutdown = 1;
2604:
1.184 thorpej 2605: sys_sync(l, NULL, NULL);
1.40 mycroft 2606:
2607: /* Wait for sync to finish. */
1.213 pk 2608: if (buf_syncwait() != 0) {
1.124 augustss 2609: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2610: Debugger();
2611: #endif
1.57 christos 2612: printf("giving up\n");
1.84 thorpej 2613: return;
1.73 thorpej 2614: } else
1.57 christos 2615: printf("done\n");
1.73 thorpej 2616:
1.84 thorpej 2617: /*
2618: * If we've panic'd, don't make the situation potentially
2619: * worse by unmounting the file systems.
2620: */
2621: if (panicstr != NULL)
2622: return;
2623:
2624: /* Release inodes held by texts before update. */
1.73 thorpej 2625: #ifdef notdef
1.84 thorpej 2626: vnshutdown();
1.73 thorpej 2627: #endif
1.84 thorpej 2628: /* Unmount file systems. */
1.201 fvdl 2629: vfs_unmountall(p);
1.58 thorpej 2630: }
2631:
2632: /*
2633: * Mount the root file system. If the operator didn't specify a
2634: * file system to use, try all possible file systems until one
2635: * succeeds.
2636: */
2637: int
1.247 thorpej 2638: vfs_mountroot(void)
1.58 thorpej 2639: {
1.79 thorpej 2640: struct vfsops *v;
1.239 mycroft 2641: int error = ENODEV;
1.58 thorpej 2642:
2643: if (root_device == NULL)
2644: panic("vfs_mountroot: root device unknown");
2645:
2646: switch (root_device->dv_class) {
2647: case DV_IFNET:
2648: if (rootdev != NODEV)
1.173 thorpej 2649: panic("vfs_mountroot: rootdev set for DV_IFNET "
2650: "(0x%08x -> %d,%d)", rootdev,
2651: major(rootdev), minor(rootdev));
1.58 thorpej 2652: break;
2653:
2654: case DV_DISK:
2655: if (rootdev == NODEV)
2656: panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239 mycroft 2657: if (bdevvp(rootdev, &rootvp))
2658: panic("vfs_mountroot: can't get vnode for rootdev");
2659: error = VOP_OPEN(rootvp, FREAD, FSCRED, curproc);
2660: if (error) {
2661: printf("vfs_mountroot: can't open root device\n");
2662: return (error);
2663: }
1.58 thorpej 2664: break;
2665:
2666: default:
2667: printf("%s: inappropriate for root file system\n",
2668: root_device->dv_xname);
2669: return (ENODEV);
2670: }
2671:
2672: /*
2673: * If user specified a file system, use it.
2674: */
1.239 mycroft 2675: if (mountroot != NULL) {
2676: error = (*mountroot)();
2677: goto done;
2678: }
1.58 thorpej 2679:
2680: /*
2681: * Try each file system currently configured into the kernel.
2682: */
1.220 lukem 2683: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2684: if (v->vfs_mountroot == NULL)
1.58 thorpej 2685: continue;
2686: #ifdef DEBUG
1.197 thorpej 2687: aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58 thorpej 2688: #endif
1.239 mycroft 2689: error = (*v->vfs_mountroot)();
2690: if (!error) {
1.197 thorpej 2691: aprint_normal("root file system type: %s\n",
2692: v->vfs_name);
1.79 thorpej 2693: break;
1.58 thorpej 2694: }
2695: }
2696:
1.79 thorpej 2697: if (v == NULL) {
2698: printf("no file system for %s", root_device->dv_xname);
2699: if (root_device->dv_class == DV_DISK)
2700: printf(" (dev 0x%x)", rootdev);
2701: printf("\n");
1.239 mycroft 2702: error = EFTYPE;
1.79 thorpej 2703: }
1.239 mycroft 2704:
2705: done:
2706: if (error && root_device->dv_class == DV_DISK) {
2707: VOP_CLOSE(rootvp, FREAD, FSCRED, curproc);
2708: vrele(rootvp);
2709: }
2710: return (error);
1.58 thorpej 2711: }
2712:
2713: /*
2714: * Given a file system name, look up the vfsops for that
2715: * file system, or return NULL if file system isn't present
2716: * in the kernel.
2717: */
2718: struct vfsops *
1.247 thorpej 2719: vfs_getopsbyname(const char *name)
1.58 thorpej 2720: {
1.79 thorpej 2721: struct vfsops *v;
2722:
1.220 lukem 2723: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2724: if (strcmp(v->vfs_name, name) == 0)
2725: break;
2726: }
2727:
2728: return (v);
2729: }
2730:
2731: /*
2732: * Establish a file system and initialize it.
2733: */
2734: int
1.247 thorpej 2735: vfs_attach(struct vfsops *vfs)
1.79 thorpej 2736: {
2737: struct vfsops *v;
2738: int error = 0;
2739:
1.58 thorpej 2740:
1.79 thorpej 2741: /*
2742: * Make sure this file system doesn't already exist.
2743: */
1.157 chs 2744: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2745: if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2746: error = EEXIST;
2747: goto out;
2748: }
2749: }
2750:
2751: /*
2752: * Initialize the vnode operations for this file system.
2753: */
2754: vfs_opv_init(vfs->vfs_opv_descs);
2755:
2756: /*
2757: * Now initialize the file system itself.
2758: */
2759: (*vfs->vfs_init)();
2760:
2761: /*
2762: * ...and link it into the kernel's list.
2763: */
2764: LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2765:
2766: /*
2767: * Sanity: make sure the reference count is 0.
2768: */
2769: vfs->vfs_refcount = 0;
2770:
2771: out:
2772: return (error);
2773: }
2774:
2775: /*
2776: * Remove a file system from the kernel.
2777: */
2778: int
1.247 thorpej 2779: vfs_detach(struct vfsops *vfs)
1.79 thorpej 2780: {
2781: struct vfsops *v;
2782:
2783: /*
2784: * Make sure no one is using the filesystem.
2785: */
2786: if (vfs->vfs_refcount != 0)
2787: return (EBUSY);
2788:
2789: /*
2790: * ...and remove it from the kernel's list.
2791: */
1.157 chs 2792: LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79 thorpej 2793: if (v == vfs) {
2794: LIST_REMOVE(v, vfs_list);
2795: break;
2796: }
2797: }
2798:
2799: if (v == NULL)
2800: return (ESRCH);
1.121 jdolecek 2801:
2802: /*
2803: * Now run the file system-specific cleanups.
2804: */
2805: (*vfs->vfs_done)();
1.79 thorpej 2806:
2807: /*
2808: * Free the vnode operations vector.
2809: */
2810: vfs_opv_free(vfs->vfs_opv_descs);
2811: return (0);
1.157 chs 2812: }
2813:
2814: void
2815: vfs_reinit(void)
2816: {
2817: struct vfsops *vfs;
2818:
2819: LIST_FOREACH(vfs, &vfs_list, vfs_list) {
2820: if (vfs->vfs_reinit) {
2821: (*vfs->vfs_reinit)();
2822: }
2823: }
1.192 christos 2824: }
2825:
1.214 hannken 2826: /*
2827: * Request a filesystem to suspend write operations.
2828: */
2829: int
2830: vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
2831: {
2832: struct proc *p = curproc; /* XXX */
2833: int error;
2834:
2835: while ((mp->mnt_iflag & IMNT_SUSPEND)) {
2836: if (slptimeo < 0)
2837: return EWOULDBLOCK;
2838: error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
2839: if (error)
2840: return error;
2841: }
2842: mp->mnt_iflag |= IMNT_SUSPEND;
2843:
1.224 pk 2844: simple_lock(&mp->mnt_slock);
1.214 hannken 2845: if (mp->mnt_writeopcountupper > 0)
1.224 pk 2846: ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
2847: 0, &mp->mnt_slock);
2848: simple_unlock(&mp->mnt_slock);
1.214 hannken 2849:
2850: error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
2851: if (error) {
2852: vfs_write_resume(mp);
2853: return error;
2854: }
2855: mp->mnt_iflag |= IMNT_SUSPENDLOW;
2856:
1.224 pk 2857: simple_lock(&mp->mnt_slock);
1.214 hannken 2858: if (mp->mnt_writeopcountlower > 0)
1.224 pk 2859: ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
2860: 0, &mp->mnt_slock);
1.214 hannken 2861: mp->mnt_iflag |= IMNT_SUSPENDED;
1.224 pk 2862: simple_unlock(&mp->mnt_slock);
1.214 hannken 2863:
2864: return 0;
2865: }
2866:
2867: /*
2868: * Request a filesystem to resume write operations.
2869: */
2870: void
2871: vfs_write_resume(struct mount *mp)
2872: {
2873:
2874: if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
2875: return;
2876: mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
2877: wakeup(&mp->mnt_flag);
2878: }
2879:
1.192 christos 2880: void
1.221 christos 2881: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
1.192 christos 2882: {
1.221 christos 2883: const struct statvfs *mbp;
1.193 christos 2884:
2885: if (sbp == (mbp = &mp->mnt_stat))
1.192 christos 2886: return;
1.193 christos 2887:
1.222 enami 2888: (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2889: sbp->f_fsid = mbp->f_fsid;
1.193 christos 2890: sbp->f_owner = mbp->f_owner;
1.221 christos 2891: sbp->f_flag = mbp->f_flag;
1.193 christos 2892: sbp->f_syncwrites = mbp->f_syncwrites;
2893: sbp->f_asyncwrites = mbp->f_asyncwrites;
1.221 christos 2894: sbp->f_syncreads = mbp->f_syncreads;
2895: sbp->f_asyncreads = mbp->f_asyncreads;
2896: (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
1.193 christos 2897: (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
1.192 christos 2898: sizeof(sbp->f_fstypename));
1.193 christos 2899: (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
1.192 christos 2900: sizeof(sbp->f_mntonname));
2901: (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2902: sizeof(sbp->f_mntfromname));
1.233 jdolecek 2903: sbp->f_namemax = mbp->f_namemax;
1.192 christos 2904: }
2905:
2906: int
1.221 christos 2907: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
1.201 fvdl 2908: struct mount *mp, struct proc *p)
1.192 christos 2909: {
2910: int error;
2911: size_t size;
1.221 christos 2912: struct statvfs *sfs = &mp->mnt_stat;
1.192 christos 2913: int (*fun)(const void *, void *, size_t, size_t *);
2914:
1.217 junyoung 2915: (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
1.192 christos 2916: sizeof(mp->mnt_stat.f_fstypename));
2917:
2918: if (onp) {
1.201 fvdl 2919: struct cwdinfo *cwdi = p->p_cwdi;
1.192 christos 2920: fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2921: if (cwdi->cwdi_rdir != NULL) {
2922: size_t len;
2923: char *bp;
2924: char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
2925:
1.209 dbj 2926: if (!path) /* XXX can't happen with M_WAITOK */
1.192 christos 2927: return ENOMEM;
2928:
2929: bp = path + MAXPATHLEN;
2930: *--bp = '\0';
2931: error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
1.201 fvdl 2932: path, MAXPATHLEN / 2, 0, p);
1.192 christos 2933: if (error) {
2934: free(path, M_TEMP);
2935: return error;
2936: }
2937:
2938: len = strlen(bp);
2939: if (len > sizeof(sfs->f_mntonname) - 1)
2940: len = sizeof(sfs->f_mntonname) - 1;
2941: (void)strncpy(sfs->f_mntonname, bp, len);
2942: free(path, M_TEMP);
2943:
2944: if (len < sizeof(sfs->f_mntonname) - 1) {
2945: error = (*fun)(onp, &sfs->f_mntonname[len],
1.194 christos 2946: sizeof(sfs->f_mntonname) - len - 1, &size);
1.192 christos 2947: if (error)
2948: return error;
2949: size += len;
2950: } else {
2951: size = len;
2952: }
2953: } else {
2954: error = (*fun)(onp, &sfs->f_mntonname,
2955: sizeof(sfs->f_mntonname) - 1, &size);
2956: if (error)
2957: return error;
2958: }
2959: (void)memset(sfs->f_mntonname + size, 0,
2960: sizeof(sfs->f_mntonname) - size);
2961: }
2962:
2963: if (fromp) {
2964: fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2965: error = (*fun)(fromp, sfs->f_mntfromname,
2966: sizeof(sfs->f_mntfromname) - 1, &size);
2967: if (error)
2968: return error;
2969: (void)memset(sfs->f_mntfromname + size, 0,
2970: sizeof(sfs->f_mntfromname) - size);
2971: }
2972: return 0;
1.29 cgd 2973: }
1.125 chs 2974:
1.238 thorpej 2975: /*
2976: * Default vfs_extattrctl routine for file systems that do not support
2977: * it.
2978: */
2979: /*ARGSUSED*/
2980: int
2981: vfs_stdextattrctl(struct mount *mp, int cmt, struct vnode *vp,
2982: int attrnamespace, const char *attrname, struct proc *p)
2983: {
2984:
2985: if (vp != NULL)
2986: VOP_UNLOCK(vp, 0);
2987: return (EOPNOTSUPP);
2988: }
2989:
2990: /*
2991: * Credential check based on process requesting service, and per-attribute
2992: * permissions.
2993: *
2994: * NOTE: Vnode must be locked.
2995: */
2996: int
2997: extattr_check_cred(struct vnode *vp, int attrnamespace,
2998: struct ucred *cred, struct proc *p, int access)
2999: {
3000:
3001: if (cred == NOCRED)
3002: return (0);
3003:
3004: switch (attrnamespace) {
3005: case EXTATTR_NAMESPACE_SYSTEM:
3006: /*
3007: * Do we really want to allow this, or just require that
3008: * these requests come from kernel code (NOCRED case above)?
3009: */
3010: return (suser(cred, &p->p_acflag));
1.242 perry 3011:
1.238 thorpej 3012: case EXTATTR_NAMESPACE_USER:
3013: return (VOP_ACCESS(vp, access, cred, p));
1.242 perry 3014:
1.238 thorpej 3015: default:
3016: return (EPERM);
3017: }
3018: }
3019:
1.125 chs 3020: #ifdef DDB
1.247 thorpej 3021: static const char buf_flagbits[] = BUF_FLAGBITS;
1.125 chs 3022:
3023: void
1.247 thorpej 3024: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
1.125 chs 3025: {
1.245 christos 3026: char bf[1024];
1.125 chs 3027:
1.198 dbj 3028: (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
1.125 chs 3029: bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
3030:
1.245 christos 3031: bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf));
3032: (*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
1.125 chs 3033:
1.164 msaitoh 3034: (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1.125 chs 3035: bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.142 chs 3036: (*pr)(" data %p saveaddr %p dep %p\n",
3037: bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
1.125 chs 3038: (*pr)(" iodone %p\n", bp->b_iodone);
3039: }
3040:
3041:
3042: void
1.247 thorpej 3043: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
1.125 chs 3044: {
1.245 christos 3045: char bf[256];
1.125 chs 3046:
1.158 chs 3047: uvm_object_printit(&vp->v_uobj, full, pr);
1.245 christos 3048: bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
3049: (*pr)("\nVNODE flags %s\n", bf);
1.158 chs 3050: (*pr)("mp %p numoutput %d size 0x%llx\n",
3051: vp->v_mount, vp->v_numoutput, vp->v_size);
1.125 chs 3052:
1.164 msaitoh 3053: (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
1.125 chs 3054: vp->v_data, vp->v_usecount, vp->v_writecount,
3055: vp->v_holdcnt, vp->v_numoutput);
3056:
1.237 christos 3057: (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
3058: ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
3059: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1.203 yamt 3060: vp->v_mount, vp->v_mountedhere);
1.125 chs 3061:
3062: if (full) {
3063: struct buf *bp;
3064:
3065: (*pr)("clean bufs:\n");
1.142 chs 3066: LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3067: (*pr)(" bp %p\n", bp);
1.125 chs 3068: vfs_buf_print(bp, full, pr);
3069: }
3070:
3071: (*pr)("dirty bufs:\n");
1.142 chs 3072: LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3073: (*pr)(" bp %p\n", bp);
1.125 chs 3074: vfs_buf_print(bp, full, pr);
3075: }
3076: }
3077: }
1.210 dbj 3078:
3079: void
1.247 thorpej 3080: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
1.210 dbj 3081: {
3082: char sbuf[256];
3083:
3084: (*pr)("vnodecovered = %p syncer = %p data = %p\n",
3085: mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3086:
1.232 mycroft 3087: (*pr)("fs_bshift %d dev_bshift = %d\n",
3088: mp->mnt_fs_bshift,mp->mnt_dev_bshift);
1.210 dbj 3089:
3090: bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
3091: (*pr)("flag = %s\n", sbuf);
3092:
3093: bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
3094: (*pr)("iflag = %s\n", sbuf);
3095:
3096: /* XXX use lockmgr_printinfo */
3097: if (mp->mnt_lock.lk_sharecount)
3098: (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
3099: mp->mnt_lock.lk_sharecount);
3100: else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
3101: (*pr)(" lock type %s: EXCL (count %d) by ",
3102: mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
3103: if (mp->mnt_lock.lk_flags & LK_SPIN)
3104: (*pr)("processor %lu", mp->mnt_lock.lk_cpu);
3105: else
3106: (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
3107: mp->mnt_lock.lk_locklwp);
3108: } else
3109: (*pr)(" not locked");
3110: if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
3111: (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
3112:
3113: (*pr)("\n");
3114:
3115: if (mp->mnt_unmounter) {
3116: (*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
3117: }
3118: (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
3119: mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
3120:
1.221 christos 3121: (*pr)("statvfs cache:\n");
3122: (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3123: (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3124: (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3125:
3126: (*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks);
3127: (*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree);
3128: (*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail);
3129: (*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd);
3130:
3131: (*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files);
3132: (*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree);
3133: (*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail);
3134: (*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd);
3135:
3136: (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3137: mp->mnt_stat.f_fsidx.__fsid_val[0],
3138: mp->mnt_stat.f_fsidx.__fsid_val[1]);
3139:
1.210 dbj 3140: (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
1.221 christos 3141: (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3142:
3143: bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
3144: sizeof(sbuf));
3145: (*pr)("\tflag = %s\n",sbuf);
3146: (*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3147: (*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3148: (*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads);
3149: (*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads);
1.210 dbj 3150: (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3151: (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3152: (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3153:
3154: {
3155: int cnt = 0;
3156: struct vnode *vp;
3157: (*pr)("locked vnodes =");
3158: /* XXX would take mountlist lock, except ddb may not have context */
3159: LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3160: if (VOP_ISLOCKED(vp)) {
3161: if ((++cnt % 6) == 0) {
3162: (*pr)(" %p,\n\t", vp);
3163: } else {
3164: (*pr)(" %p,", vp);
3165: }
3166: }
3167: }
3168: (*pr)("\n");
3169: }
3170:
3171: if (full) {
3172: int cnt = 0;
3173: struct vnode *vp;
3174: (*pr)("all vnodes =");
3175: /* XXX would take mountlist lock, except ddb may not have context */
3176: LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3177: if (!LIST_NEXT(vp, v_mntvnodes)) {
3178: (*pr)(" %p", vp);
3179: } else if ((++cnt % 6) == 0) {
3180: (*pr)(" %p,\n\t", vp);
3181: } else {
3182: (*pr)(" %p,", vp);
3183: }
3184: }
3185: (*pr)("\n", vp);
3186: }
3187: }
1.247 thorpej 3188: #endif /* DDB */
CVSweb <webmaster@jp.NetBSD.org>