Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/vfs_subr.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/vfs_subr.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.79 retrieving revision 1.80 diff -u -p -r1.79 -r1.80 --- src/sys/kern/vfs_subr.c 1998/02/18 07:16:41 1.79 +++ src/sys/kern/vfs_subr.c 1998/03/01 02:22:36 1.80 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_subr.c,v 1.79 1998/02/18 07:16:41 thorpej Exp $ */ +/* $NetBSD: vfs_subr.c,v 1.80 1998/03/01 02:22:36 fvdl Exp $ */ /*- * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. @@ -135,23 +135,25 @@ TAILQ_HEAD(freelst, vnode) vnode_free_li TAILQ_HEAD_INITIALIZER(vnode_free_list); struct mntlist mountlist = /* mounted filesystem list */ CIRCLEQ_HEAD_INITIALIZER(mountlist); - struct vfs_list_head vfs_list = /* vfs list */ - LIST_HEAD_INITIALIZER(vfs_list); + LIST_HEAD_INITIALIZER(vfs_list); struct nfs_public nfs_pub; /* publicly exported FS */ +struct simplelock mountlist_slock; +static struct simplelock mntid_slock; +struct simplelock mntvnode_slock; +struct simplelock vnode_free_list_slock; +static struct simplelock spechash_slock; + /* * These define the root filesystem and device. */ struct mount *rootfs; struct vnode *rootvnode; -struct device *root_device; +struct device *root_device; /* root device */ -int vfs_lock __P((struct mount *)); -void vfs_unlock __P((struct mount *)); -struct mount *getvfs __P((fsid_t *)); -long makefstype __P((char *)); +struct mount *vfs_getvfs __P((fsid_t *)); void vattr_null __P((struct vattr *)); int getnewvnode __P((enum vtagtype, struct mount *, int (**)(void *), struct vnode **)); @@ -164,7 +166,6 @@ int bdevvp __P((dev_t, struct vnode **)) int cdevvp __P((dev_t, struct vnode **)); int getdevvp __P((dev_t, struct vnode **, enum vtype)); struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *)); -int vget __P((struct vnode *, int)); void vref __P((struct vnode *)); void vput __P((struct vnode *)); void vrele __P((struct vnode *)); @@ -173,6 +174,7 @@ void holdrele __P((struct vnode *)); int vflush __P((struct mount *, struct vnode *, int)); void vgoneall __P((struct vnode *)); void vgone __P((struct vnode *)); +void vgonel __P((struct vnode *vp, struct proc *p)); int vcount __P((struct vnode *)); void vprint __P((char *, struct vnode *)); int vfs_mountedon __P((struct vnode *)); @@ -199,99 +201,117 @@ void vntblinit() { - /* - * Nothing to do here anymore; vnode_free_list and mountlist - * are now initialized data. - */ + simple_lock_init(&mntvnode_slock); + simple_lock_init(&mntid_slock); + simple_lock_init(&spechash_slock); + simple_lock_init(&vnode_free_list_slock); } /* - * Lock a filesystem. - * Used to prevent access to it while mounting and unmounting. + * Mark a mount point as busy. Used to synchronize access and to delay + * unmounting. Interlock is not released on failure. */ int -vfs_lock(mp) - register struct mount *mp; +vfs_busy(mp, flags, interlkp) + struct mount *mp; + int flags; + struct simplelock *interlkp; { + int lkflags; - while (mp->mnt_flag & MNT_MLOCK) { + if (mp->mnt_flag & MNT_UNMOUNT) { + if (flags & LK_NOWAIT) + return (ENOENT); mp->mnt_flag |= MNT_MWAIT; - tsleep((caddr_t)mp, PVFS, "vfslock", 0); - } - mp->mnt_flag |= MNT_MLOCK; + if (interlkp) + simple_unlock(interlkp); + /* + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. + */ + sleep((caddr_t)mp, PVFS); + if (interlkp) + simple_lock(interlkp); + return (ENOENT); + } + lkflags = LK_SHARED; + if (interlkp) + lkflags |= LK_INTERLOCK; + if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) + panic("vfs_busy: unexpected lock failure"); return (0); } /* - * Unlock a locked filesystem. - * Panic if filesystem is not locked. + * Free a busy filesystem. */ void -vfs_unlock(mp) - register struct mount *mp; +vfs_unbusy(mp) + struct mount *mp; { - if ((mp->mnt_flag & MNT_MLOCK) == 0) - panic("vfs_unlock: not locked"); - mp->mnt_flag &= ~MNT_MLOCK; - if (mp->mnt_flag & MNT_MWAIT) { - mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t)mp); - } + lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); } /* - * Mark a mount point as busy. - * Used to synchronize access and to delay unmounting. + * Lookup a filesystem type, and if found allocate and initialize + * a mount structure for it. + * + * Devname is usually updated by mount(8) after booting. */ int -vfs_busy(mp) - register struct mount *mp; +vfs_rootmountalloc(fstypename, devname, mpp) + char *fstypename; + char *devname; + struct mount **mpp; { - int unmounting = mp->mnt_flag & MNT_UNMOUNT; - - while(mp->mnt_flag & MNT_MPBUSY) { - mp->mnt_flag |= MNT_MPWANT; - tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); - if (unmounting) - return (1); - } - mp->mnt_flag |= MNT_MPBUSY; - return (0); -} + struct vfsops *vfsp = NULL; + struct mount *mp; -/* - * Free a busy filesystem. - * Panic if filesystem is not busy. - */ -void -vfs_unbusy(mp) - register struct mount *mp; -{ + for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; + vfsp = LIST_NEXT(vfsp, vfs_list)) + if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) + break; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vfs_unbusy: not busy"); - mp->mnt_flag &= ~MNT_MPBUSY; - if (mp->mnt_flag & MNT_MPWANT) { - mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t)&mp->mnt_flag); - } + if (vfsp == NULL) + return (ENODEV); + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0); + LIST_INIT(&mp->mnt_vnodelist); + mp->mnt_op = vfsp; + mp->mnt_flag = MNT_RDONLY; + mp->mnt_vnodecovered = NULLVP; + vfsp->vfs_refcount++; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); + mp->mnt_stat.f_mntonname[0] = '/'; + (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); + *mpp = mp; + return (0); } /* * Lookup a mount point by filesystem identifier. */ struct mount * -getvfs(fsid) +vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) + mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && - mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) + mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + simple_unlock(&mountlist_slock); return (mp); + } + } + simple_unlock(&mountlist_slock); return ((struct mount *)0); } @@ -299,27 +319,30 @@ getvfs(fsid) * Get a new unique fsid */ void -getnewfsid(mp, mtype) +vfs_getnewfsid(mp, fstypename) struct mount *mp; - int mtype; + char *fstypename; { static u_short xxxfs_mntid; - fsid_t tfsid; + int mtype; - mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ + simple_lock(&mntid_slock); + mtype = makefstype(fstypename); + mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) ++xxxfs_mntid; tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { - while (getvfs(&tfsid)) { + while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; + simple_unlock(&mntid_slock); } /* @@ -338,6 +361,7 @@ makefstype(type) return rv; } + /* * Set vnode attributes to VNOVAL */ @@ -389,39 +413,50 @@ getnewvnode(tag, mp, vops, vpp) int (**vops) __P((void *)); struct vnode **vpp; { - register struct vnode *vp; -#ifdef DIAGNOSTIC + struct proc *p = curproc; /* XXX */ + struct vnode *vp; int s; -#endif + simple_lock(&vnode_free_list_slock); if ((vnode_free_list.tqh_first == NULL && numvnodes < 2 * desiredvnodes) || numvnodes < desiredvnodes) { + simple_unlock(&vnode_free_list_slock); vp = (struct vnode *)malloc((u_long)sizeof *vp, M_VNODE, M_WAITOK); bzero((char *)vp, sizeof *vp); numvnodes++; } else { - if ((vp = vnode_free_list.tqh_first) == NULL) { + for (vp = vnode_free_list.tqh_first; + vp != NULLVP; vp = vp->v_freelist.tqe_next) { + if (simple_lock_try(&vp->v_interlock)) + break; + } + /* + * Unless this is a bad time of the month, at most + * the first NCPUS items on the free list are + * locked, so this is close enough to being empty. + */ + if (vp == NULLVP) { + simple_unlock(&vnode_free_list_slock); tablefull("vnode"); *vpp = 0; return (ENFILE); } - if (vp->v_usecount) { - vprint("free vnode", vp); + if (vp->v_usecount) panic("free vnode isn't"); - } TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; + simple_unlock(&vnode_free_list_slock); vp->v_lease = NULL; if (vp->v_type != VBAD) - vgone(vp); + vgonel(vp, p); + else + simple_unlock(&vp->v_interlock); #ifdef DIAGNOSTIC - if (vp->v_data) { - vprint("cleaned vnode", vp); + if (vp->v_data) panic("cleaned vnode isn't"); - } s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); @@ -460,6 +495,7 @@ insmntque(vp, mp) register struct mount *mp; { + simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ @@ -468,9 +504,9 @@ insmntque(vp, mp) /* * Insert into list of vnodes for the new mount point, if available. */ - if ((vp->v_mount = mp) == NULL) - return; - LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + if ((vp->v_mount = mp) != NULL) + LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + simple_unlock(&mntvnode_slock); } /* @@ -723,8 +759,10 @@ getdevvp(dev, vpp, type) struct vnode *nvp; int error; - if (dev == NODEV) + if (dev == NODEV) { + *vpp = NULLVP; return (0); + } error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); if (error) { *vpp = NULLVP; @@ -754,6 +792,7 @@ checkalias(nvp, nvp_rdev, mp) dev_t nvp_rdev; struct mount *mp; { + struct proc *p = curproc; /* XXX */ register struct vnode *vp; struct vnode **vpp; @@ -762,18 +801,23 @@ checkalias(nvp, nvp_rdev, mp) vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: + simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ + simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&spechash_slock); + vgonel(vp, p); goto loop; } - if (vget(vp, 1)) + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { + simple_unlock(&spechash_slock); goto loop; + } break; } if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { @@ -783,17 +827,20 @@ loop: nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; nvp->v_specflags = 0; + simple_unlock(&spechash_slock); nvp->v_speclockf = NULL; *vpp = nvp; - if (vp != NULL) { + if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } - VOP_UNLOCK(vp); - vclean(vp, 0); + simple_unlock(&spechash_slock); + VOP_UNLOCK(vp, 0); + simple_lock(&vp->v_interlock); + vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; @@ -810,47 +857,54 @@ loop: * been changed to a new file system type). */ int -vget(vp, lockflag) - register struct vnode *vp; - int lockflag; +vget(vp, flags) + struct vnode *vp; + int flags; { + int error; /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then - * return failure. Cleaning is determined either by checking - * that the VXLOCK flag is set, or that the use count is - * zero with the back pointer set to show that it has been - * removed from the free list by getnewvnode. The VXLOCK - * flag may not have been set yet because vclean is blocked in - * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. - */ - if ((vp->v_flag & VXLOCK) || - (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { + * return failure. Cleaning is determined by checking that + * the VXLOCK flag is set. + */ + if ((flags & LK_INTERLOCK) == 0) + simple_lock(&vp->v_interlock); + if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); - return (1); + return (ENOENT); } - if (vp->v_usecount == 0) + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } vp->v_usecount++; - if (lockflag) - VOP_LOCK(vp); + if (flags & LK_TYPE_MASK) { + if ((error = vn_lock(vp, flags | LK_INTERLOCK))) + vrele(vp); + return (error); + } + simple_unlock(&vp->v_interlock); return (0); } /* - * Vnode reference, just increment the count + * Vnode reference. */ void vref(vp) struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_usecount <= 0) panic("vref used where vget required"); vp->v_usecount++; + simple_unlock(&vp->v_interlock); } /* @@ -858,11 +912,35 @@ vref(vp) */ void vput(vp) - register struct vnode *vp; + struct vnode *vp; { + struct proc *p = curproc; /* XXX */ - VOP_UNLOCK(vp); - vrele(vp); +#ifdef DIGANOSTIC + if (vp == NULL) + panic("vput: null vp"); +#endif + simple_lock(&vp->v_interlock); + vp->v_usecount--; + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); + VOP_UNLOCK(vp, 0); + return; + } +#ifdef DIAGNOSTIC + if (vp->v_usecount < 0 || vp->v_writecount != 0) { + vprint("vput: bad ref count", vp); + panic("vput: ref cnt"); + } +#endif + /* + * insert at tail of LRU list + */ + simple_lock(&vnode_free_list_slock); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + simple_unlock(&vp->v_interlock); + VOP_INACTIVE(vp, p); } /* @@ -871,18 +949,22 @@ vput(vp) */ void vrele(vp) - register struct vnode *vp; + struct vnode *vp; { + struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif + simple_lock(&vp->v_interlock); vp->v_usecount--; - if (vp->v_usecount > 0) + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); return; + } #ifdef DIAGNOSTIC - if (vp->v_usecount != 0 || vp->v_writecount != 0) { + if (vp->v_usecount < 0 || vp->v_writecount != 0) { vprint("vrele: bad ref count", vp); panic("vrele: ref cnt"); } @@ -890,10 +972,14 @@ vrele(vp) /* * insert at tail of LRU list */ + simple_lock(&vnode_free_list_slock); TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - VOP_INACTIVE(vp); + simple_unlock(&vnode_free_list_slock); + if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) + VOP_INACTIVE(vp, p); } +#ifdef DIAGNOSTIC /* * Page or buffer structure gets a reference. */ @@ -902,7 +988,9 @@ vhold(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); } /* @@ -913,10 +1001,13 @@ holdrele(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; + simple_unlock(&vp->v_interlock); } +#endif /* DIAGNOSTIC */ /* * Remove any vnodes in the vnode table belonging to mount point mp. @@ -937,11 +1028,11 @@ vflush(mp, skipvp, flags) struct vnode *skipvp; int flags; { + struct proc *p = curproc; /* XXX */ register struct vnode *vp, *nvp; int busy = 0; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vflush: not busy"); + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) @@ -952,11 +1043,14 @@ loop: */ if (vp == skipvp) continue; + simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ - if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) + if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { + simple_unlock(&vp->v_interlock); continue; + } /* * If WRITECLOSE is set, only flush out regular file * vnodes open for writing. @@ -969,7 +1063,9 @@ loop: * out the vnode data structures and we are done. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&mntvnode_slock); + vgonel(vp, p); + simple_lock(&mntvnode_slock); continue; } /* @@ -978,21 +1074,25 @@ loop: * anonymous device. For all other files, just kill them. */ if (flags & FORCECLOSE) { + simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgone(vp); + vgonel(vp, p); } else { - vclean(vp, 0); + vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *)0); } + simple_lock(&mntvnode_slock); continue; } #ifdef DEBUG if (busyprt) vprint("vflush: busy vnode", vp); #endif + simple_unlock(&vp->v_interlock); busy++; } + simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); @@ -1002,9 +1102,10 @@ loop: * Disassociate the underlying file system from a vnode. */ void -vclean(vp, flags) +vclean(vp, flags, p) register struct vnode *vp; int flags; + struct proc *p; { int active; @@ -1017,14 +1118,6 @@ vclean(vp, flags) if ((active = vp->v_usecount) != 0) VREF(vp); /* - * Even if the count is zero, the VOP_INACTIVE routine may still - * have the object locked while it cleans it out. The VOP_LOCK - * ensures that the VOP_INACTIVE routine is done with its work. - * For active vnodes, it ensures that no other activity can - * occur while the underlying object is being cleaned out. - */ - VOP_LOCK(vp); - /* * Prevent the vnode from being recycled or * brought into use while we clean it out. */ @@ -1038,32 +1131,52 @@ vclean(vp, flags) uvm_vnp_terminate(vp); #endif /* - * Clean out any buffers associated with the vnode. + * Even if the count is zero, the VOP_INACTIVE routine may still + * have the object locked while it cleans it out. The VOP_LOCK + * ensures that the VOP_INACTIVE routine is done with its work. + * For active vnodes, it ensures that no other activity can + * occur while the underlying object is being cleaned out. */ - if (flags & DOCLOSE) - vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); + VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); + /* - * Any other processes trying to obtain this lock must first - * wait for VXLOCK to clear, then call the new lock operation. + * Clean out any buffers associated with the vnode. */ - VOP_UNLOCK(vp); + if (flags & DOCLOSE) + vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); + /* * If purging an active vnode, it must be closed and - * deactivated before being reclaimed. + * deactivated before being reclaimed. Note that the + * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) - VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); - VOP_INACTIVE(vp); + VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); + VOP_INACTIVE(vp, p); + } else { + /* + * Any other processes trying to obtain this lock must first + * wait for VXLOCK to clear, then call the new lock operation. + */ + VOP_UNLOCK(vp, 0); } /* * Reclaim the vnode. */ - if (VOP_RECLAIM(vp)) + if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) vrele(vp); + cache_purge(vp); + if (vp->v_vnlock) { + if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) + vprint("vclean: lock not drained", vp); + FREE(vp->v_vnlock, M_VNODE); + vp->v_vnlock = NULL; + } + /* * Done with purge, notify sleepers of the grim news. */ @@ -1077,47 +1190,25 @@ vclean(vp, flags) } /* - * Eliminate all activity associated with the requested vnode - * and with all vnodes aliased to the requested vnode. + * Recycle an unused vnode to the front of the free list. + * Release the passed interlock if the vnode will be recycled. */ -void -vgoneall(vp) - register struct vnode *vp; -{ - register struct vnode *vq; - - if (vp->v_flag & VALIASED) { - /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. - */ - if (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep((caddr_t)vp, PINOD, "vgoneall", 0); - return; - } - /* - * Ensure that vp will not be vgone'd while we - * are eliminating its aliases. - */ - vp->v_flag |= VXLOCK; - while (vp->v_flag & VALIASED) { - for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { - if (vq->v_rdev != vp->v_rdev || - vq->v_type != vp->v_type || vp == vq) - continue; - vgone(vq); - break; - } - } - /* - * Remove the lock so that vgone below will - * really eliminate the vnode after which time - * vgone will awaken any sleepers. - */ - vp->v_flag &= ~VXLOCK; +int +vrecycle(vp, inter_lkp, p) + struct vnode *vp; + struct simplelock *inter_lkp; + struct proc *p; +{ + + simple_lock(&vp->v_interlock); + if (vp->v_usecount == 0) { + if (inter_lkp) + simple_unlock(inter_lkp); + vgonel(vp, p); + return (1); } - vgone(vp); + simple_unlock(&vp->v_interlock); + return (0); } /* @@ -1126,9 +1217,23 @@ vgoneall(vp) */ void vgone(vp) + struct vnode *vp; +{ + struct proc *p = curproc; /* XXX */ + + simple_lock(&vp->v_interlock); + vgonel(vp, p); +} + +/* + * vgone, with the vp interlock held. + */ +void +vgonel(vp, p) register struct vnode *vp; + struct proc *p; { - register struct vnode *vq; + struct vnode *vq; struct vnode *vx; /* @@ -1137,21 +1242,25 @@ vgone(vp) */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ - vclean(vp, DOCLOSE); + vclean(vp, DOCLOSE, p); /* * Delete from old mount point vnode list, if on one. */ - insmntque(vp, (struct mount *)0); + if (vp->v_mount != NULL) + insmntque(vp, (struct mount *)0); /* * If special device, remove it from special device alias list. + * if it is on one. */ - if (vp->v_type == VBLK || vp->v_type == VCHR) { + if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { + simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { @@ -1180,6 +1289,7 @@ vgone(vp) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } + simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } @@ -1196,11 +1306,14 @@ vgone(vp) * getnewvnode after removing it from the freelist to ensure * that we do not try to move it here. */ - if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && - vnode_free_list.tqh_first != vp) { - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && + vnode_free_list.tqh_first != vp) { + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); + } + simple_unlock(&vnode_free_list_slock); } vp->v_type = VBAD; } @@ -1214,15 +1327,19 @@ vfinddev(dev, type, vpp) enum vtype type; struct vnode **vpp; { - register struct vnode *vp; + struct vnode *vp; + int rc = 0; + simple_lock(&spechash_slock); for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; - return (1); + rc = 1; + break; } - return (0); + simple_unlock(&spechash_slock); + return (rc); } /* @@ -1238,6 +1355,7 @@ vcount(vp) loop: if ((vp->v_flag & VALIASED) == 0) return (vp->v_usecount); + simple_lock(&spechash_slock); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) @@ -1246,11 +1364,13 @@ loop: * Alias, but not in use, so flush it out. */ if (vq->v_usecount == 0 && vq != vp) { + simple_unlock(&spechash_slock); vgone(vq); goto loop; } count += vq->v_usecount; } + simple_unlock(&spechash_slock); return (count); } @@ -1305,21 +1425,93 @@ vprint(label, vp) void printlockedvnodes() { - register struct mount *mp; - register struct vnode *vp; + struct mount *mp, *nmp; + struct vnode *vp; printf("Locked vnodes\n"); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) { + simple_lock(&mountlist_slock); + for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { + nmp = mp->mnt_list.cqe_next; + continue; + } for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) + vp = vp->v_mntvnodes.le_next) { if (VOP_ISLOCKED(vp)) vprint((char *)0, vp); + } + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp); } + simple_unlock(&mountlist_slock); } #endif +extern const char *mountcompatnames[]; +extern const int nmountcompatnames; + +/* + * Top level filesystem related information gathering. + */ +int +vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; + struct proc *p; +{ +/* + * XXX needs work. Old interface (Lite2) used fs type numbers, can't + * do that anymore. It should work with names. Provide some compat + * code. + */ +#ifdef COMPAT_44 + struct vfsconf vfc; + struct vfsops *vfsp; + + /* all sysctl names at this level are at least name and field */ + if (namelen < 2) + return (ENOTDIR); /* overloaded */ + if (name[0] != VFS_GENERIC) { + if (name[0] >= nmountcompatnames || name[0] < 0 || + mountcompatnames[name[0]] == NULL) + return (EOPNOTSUPP); + vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); + if (vfsp == NULL) + return (EINVAL); + return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, + oldp, oldlenp, newp, newlen, p)); + } + switch (name[1]) { + case VFS_MAXTYPENUM: + return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); + case VFS_CONF: + if (namelen < 3) + return (ENOTDIR); /* overloaded */ + if (name[2] >= nmountcompatnames || name[2] < 0 || + mountcompatnames[name[2]] == NULL) + return (EOPNOTSUPP); + vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); + if (vfsp == NULL) + return (EINVAL); + vfc.vfc_vfsops = NULL; /* XXX point to vfsops->vfs_mount? */ + strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); + vfc.vfc_typenum = name[2]; + vfc.vfc_flags = 0; + vfc.vfc_mountroot = vfsp->vfs_mountroot; + vfc.vfc_next = NULL; + return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, + sizeof(struct vfsconf))); + } +#endif + return (EOPNOTSUPP); +} + int kinfo_vdebug = 1; int kinfo_vgetfailed; #define KINFO_VNODESLOP 10 @@ -1329,13 +1521,14 @@ int kinfo_vgetfailed; */ /* ARGSUSED */ int -sysctl_vnode(where, sizep) +sysctl_vnode(where, sizep, p) char *where; size_t *sizep; + struct proc *p; { - register struct mount *mp, *nmp; - struct vnode *vp; - register char *bp = where, *savebp; + struct mount *mp, *nmp; + struct vnode *nvp, *vp; + char *bp = where, *savebp; char *ewhere; int error; @@ -1346,38 +1539,50 @@ sysctl_vnode(where, sizep) return (0); } ewhere = where + *sizep; - + + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_next; - if (vfs_busy(mp)) + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { + nmp = mp->mnt_list.cqe_next; continue; + } savebp = bp; again: + simple_lock(&mntvnode_slock); for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) { + vp = nvp) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { + simple_unlock(&mntvnode_slock); if (kinfo_vdebug) printf("kinfo: vp changed\n"); bp = savebp; goto again; } + nvp = vp->v_mntvnodes.le_next; if (bp + VPTRSZ + VNODESZ > ewhere) { + simple_unlock(&mntvnode_slock); *sizep = bp - where; return (ENOMEM); } + simple_unlock(&mntvnode_slock); if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) return (error); bp += VPTRSZ + VNODESZ; + simple_lock(&mntvnode_slock); } + simple_unlock(&mntvnode_slock); + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp); } + simple_unlock(&mountlist_slock); *sizep = bp - where; return (0); @@ -1388,22 +1593,27 @@ again: */ int vfs_mountedon(vp) - register struct vnode *vp; + struct vnode *vp; { - register struct vnode *vq; + struct vnode *vq; + int error = 0; if (vp->v_specflags & SI_MOUNTEDON) return (EBUSY); if (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) - return (EBUSY); + if (vq->v_specflags & SI_MOUNTEDON) { + error = EBUSY; + break; + } } + simple_unlock(&spechash_slock); } - return (0); + return (error); } /* @@ -1747,6 +1957,7 @@ vfs_unmountall() { register struct mount *mp, *nmp; int allerror, error; + struct proc *p = curproc; /* XXX */ for (allerror = 0, mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { @@ -1755,9 +1966,9 @@ vfs_unmountall() printf("unmounting %s (%s)...\n", mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); #endif - if (vfs_busy(mp)) + if (vfs_busy(mp, 0, 0)) continue; - if ((error = dounmount(mp, MNT_FORCE, &proc0)) != 0) { + if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { printf("unmount of %s failed with error %d\n", mp->mnt_stat.f_mntonname, error); allerror = 1;