Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

===================================================================
RCS file: /ftp/cvs/cvsroot/src/sys/kern/vfs_subr.c,v
rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/vfs_subr.c,v: warning: Unknown phrases like `commitid ...;' are present.
retrieving revision 1.112
retrieving revision 1.112.2.2
diff -u -p -r1.112 -r1.112.2.2
--- src/sys/kern/vfs_subr.c	1999/10/01 22:03:17	1.112
+++ src/sys/kern/vfs_subr.c	2000/12/08 09:14:01	1.112.2.2
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_subr.c,v 1.112 1999/10/01 22:03:17 mycroft Exp $	*/
+/*	$NetBSD: vfs_subr.c,v 1.112.2.2 2000/12/08 09:14:01 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
@@ -81,12 +81,14 @@
  * External virtual filesystem routines
  */
 
+#include "opt_ddb.h"
 #include "opt_compat_netbsd.h"
 #include "opt_compat_43.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
+#include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/time.h>
 #include <sys/fcntl.h>
@@ -103,12 +105,14 @@
 #include <sys/device.h>
 #include <sys/dirent.h>
 
-#include <vm/vm.h>
-#include <sys/sysctl.h>
-
 #include <miscfs/specfs/specdev.h>
+#include <miscfs/genfs/genfs.h>
+#include <miscfs/syncfs/syncfs.h>
 
-#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
+#include <uvm/uvm_ddb.h>
+
+#include <sys/sysctl.h>
 
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
@@ -122,6 +126,8 @@ int	vttoif_tab[9] = {
 int doforce = 1;		/* 1 => permit forcible unmounting */
 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
 
+extern int dovfsusermount;	/* 1 => permit any user to mount filesystems */
+
 /*
  * Insq/Remq for the vnode usage lists.
  */
@@ -130,20 +136,22 @@ int prtactive = 0;		/* 1 => print out re
 	LIST_REMOVE(bp, b_vnbufs);					\
 	(bp)->b_vnbufs.le_next = NOLIST;				\
 }  
-TAILQ_HEAD(freelst, vnode) vnode_free_list =	/* vnode free list */
-    TAILQ_HEAD_INITIALIZER(vnode_free_list);
+/* TAILQ_HEAD(freelst, vnode) vnode_free_list =	vnode free list (in vnode.h) */
+struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
+struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
+
 struct mntlist mountlist =			/* mounted filesystem list */
     CIRCLEQ_HEAD_INITIALIZER(mountlist);
 struct vfs_list_head vfs_list =			/* vfs list */
-	 LIST_HEAD_INITIALIZER(vfs_list);
+    LIST_HEAD_INITIALIZER(vfs_list);
 
 struct nfs_public nfs_pub;			/* publicly exported FS */
 
-struct simplelock mountlist_slock;
-static struct simplelock mntid_slock;
-struct simplelock mntvnode_slock;
-struct simplelock vnode_free_list_slock;
-struct simplelock spechash_slock;
+struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
+static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
+struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
+struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
+struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
 
 /*
  * These define the root filesystem and device.
@@ -177,13 +185,13 @@ void
 vntblinit()
 {
 
-	simple_lock_init(&mntvnode_slock);
-	simple_lock_init(&mntid_slock);
-	simple_lock_init(&spechash_slock);
-	simple_lock_init(&vnode_free_list_slock);
-
 	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
 	    0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
+
+	/*
+	 * Initialize the filesystem syncer.
+	 */
+	vn_initialize_syncerd();
 }
 
 /*
@@ -203,6 +211,9 @@ vfs_busy(mp, flags, interlkp)
 		
 		if (flags & LK_NOWAIT)
 			return (ENOENT);
+		if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
+		    && mp->mnt_unmounter == curproc)
+			return (EDEADLK);
 		if (interlkp)
 			simple_unlock(interlkp);
 		/*
@@ -215,7 +226,7 @@ vfs_busy(mp, flags, interlkp)
 		 * can atomically unlock-and-sleep.
 		 */
 		mp->mnt_wcnt++;
-		sleep((caddr_t)mp, PVFS);
+		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
 		mp->mnt_wcnt--;
 		gone = mp->mnt_flag & MNT_GONE;
 		
@@ -290,7 +301,7 @@ struct mount *
 vfs_getvfs(fsid)
 	fsid_t *fsid;
 {
-	register struct mount *mp;
+	struct mount *mp;
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
@@ -309,16 +320,15 @@ vfs_getvfs(fsid)
  * Get a new unique fsid
  */
 void
-vfs_getnewfsid(mp, fstypename)
+vfs_getnewfsid(mp)
 	struct mount *mp;
-	char *fstypename;
 {
 	static u_short xxxfs_mntid;
 	fsid_t tfsid;
 	int mtype;
 
 	simple_lock(&mntid_slock);
-	mtype = makefstype(fstypename);
+	mtype = makefstype(mp->mnt_op->vfs_name);
 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 	mp->mnt_stat.f_fsid.val[1] = mtype;
 	if (xxxfs_mntid == 0)
@@ -340,7 +350,7 @@ vfs_getnewfsid(mp, fstypename)
  */
 long
 makefstype(type)
-	char *type;
+	const char *type;
 {
 	long rv;
 
@@ -357,7 +367,7 @@ makefstype(type)
  */
 void
 vattr_null(vap)
-	register struct vattr *vap;
+	struct vattr *vap;
 {
 
 	vap->va_type = VNON;
@@ -403,9 +413,13 @@ getnewvnode(tag, mp, vops, vpp)
 	int (**vops) __P((void *));
 	struct vnode **vpp;
 {
+	extern struct uvm_pagerops uvm_vnodeops;
+	struct uvm_object *uobj;
 	struct proc *p = curproc;	/* XXX */
+	struct freelst *listhd;
+	static int toggle;
 	struct vnode *vp;
-	int error;
+	int error = 0;
 #ifdef DIAGNOSTIC
 	int s;
 #endif
@@ -419,23 +433,43 @@ getnewvnode(tag, mp, vops, vpp)
 		 * (This puts the per-mount vnode list logically under
 		 * the protection of the vfs_busy lock).
 		 */
-		error = vfs_busy(mp, 0, 0);
-		if (error)
+		error = vfs_busy(mp, LK_RECURSEFAIL, 0);
+		if (error && error != EDEADLK)
 			return error;
 	}
 
+	/*
+	 * We must choose whether to allocate a new vnode or recycle an
+	 * existing one. The criterion for allocating a new one is that
+	 * the total number of vnodes is less than the number desired or
+	 * there are no vnodes on either free list. Generally we only
+	 * want to recycle vnodes that have no buffers associated with
+	 * them, so we look first on the vnode_free_list. If it is empty,
+	 * we next consider vnodes with referencing buffers on the
+	 * vnode_hold_list. The toggle ensures that half the time we
+	 * will use a buffer from the vnode_hold_list, and half the time
+	 * we will allocate a new one unless the list has grown to twice
+	 * the desired size. We are reticent to recycle vnodes from the
+	 * vnode_hold_list because we will lose the identity of all its
+	 * referencing buffers.
+	 */
+
+	toggle ^= 1;
+	if (numvnodes > 2 * desiredvnodes)
+		toggle = 0;
+
 	simple_lock(&vnode_free_list_slock);
-	if ((vnode_free_list.tqh_first == NULL &&
-	     numvnodes < 2 * desiredvnodes) ||
-	    numvnodes < desiredvnodes) {
+	if (numvnodes < desiredvnodes ||
+	    (TAILQ_FIRST(listhd = &vnode_free_list) == NULL &&
+	    (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle))) {
 		simple_unlock(&vnode_free_list_slock);
 		vp = pool_get(&vnode_pool, PR_WAITOK);
-		memset((char *)vp, 0, sizeof(*vp));
+		memset(vp, 0, sizeof(*vp));
 		simple_lock_init(&vp->v_interlock);
 		numvnodes++;
 	} else {
-		for (vp = vnode_free_list.tqh_first;
-				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
+		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
+		    vp = TAILQ_NEXT(vp, v_freelist)) {
 			if (simple_lock_try(&vp->v_interlock)) {
 				if ((vp->v_flag & VLAYER) == 0) {
 					break;
@@ -453,14 +487,15 @@ getnewvnode(tag, mp, vops, vpp)
 		 */
 		if (vp == NULLVP) {
 			simple_unlock(&vnode_free_list_slock);
-			if (mp) vfs_unbusy(mp);
-			tablefull("vnode");
+			if (mp && error != EDEADLK)
+				vfs_unbusy(mp);
+			tablefull("vnode", "increase kern.maxvnodes or NVNODE");
 			*vpp = 0;
 			return (ENFILE);
 		}
 		if (vp->v_usecount)
-			panic("free vnode isn't");
-		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+			panic("free vnode isn't, vp %p", vp);
+		TAILQ_REMOVE(listhd, vp, v_freelist);
 		/* see comment on why 0xdeadb is set at end of vgone (below) */
 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
 		simple_unlock(&vnode_free_list_slock);
@@ -471,10 +506,10 @@ getnewvnode(tag, mp, vops, vpp)
 			simple_unlock(&vp->v_interlock);
 #ifdef DIAGNOSTIC
 		if (vp->v_data)
-			panic("cleaned vnode isn't");
+			panic("cleaned vnode isn't, vp %p", vp);
 		s = splbio();
 		if (vp->v_numoutput)
-			panic("Clean vnode has pending I/O's");
+			panic("clean vnode has pending I/O's, vp %p", vp);
 		splx(s);
 #endif
 		vp->v_flag = 0;
@@ -490,6 +525,7 @@ getnewvnode(tag, mp, vops, vpp)
 	vp->v_type = VNON;
 	vp->v_vnlock = &vp->v_lock;
 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
+	lockinit(&vp->v_glock, PVFS, "glock", 0, 0);
 	cache_purge(vp);
 	vp->v_tag = tag;
 	vp->v_op = vops;
@@ -498,22 +534,65 @@ getnewvnode(tag, mp, vops, vpp)
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
-	if (mp) vfs_unbusy(mp);
+
+	/*
+	 * initialize uvm_object within vnode.
+	 */
+
+	uobj = &vp->v_uvm.u_obj;
+	uobj->pgops = &uvm_vnodeops;
+	TAILQ_INIT(&uobj->memq);
+	vp->v_uvm.u_size = VSIZENOTSET;
+
+	if (mp && error != EDEADLK)
+		vfs_unbusy(mp);
 	return (0);
 }
 
 /*
+ * This is really just the reverse of getnewvnode(). Needed for
+ * VFS_VGET functions who may need to push back a vnode in case
+ * of a locking race.
+ */
+void
+ungetnewvnode(vp)
+	struct vnode *vp;
+{
+#ifdef DIAGNOSTIC
+	if (vp->v_usecount != 1)
+		panic("ungetnewvnode: busy vnode");
+#endif
+	vp->v_usecount--;
+	insmntque(vp, NULL);
+	vp->v_type = VBAD;
+
+	simple_lock(&vp->v_interlock);
+	/* 
+	 * Insert at head of LRU list
+	 */
+	simple_lock(&vnode_free_list_slock);
+	if (vp->v_holdcnt > 0)
+		TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
+	else
+		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+	simple_unlock(&vnode_free_list_slock); 
+	simple_unlock(&vp->v_interlock);
+}
+
+/*
  * Move a vnode from one mount queue to another.
  */
 void
 insmntque(vp, mp)
-	register struct vnode *vp;
-	register struct mount *mp;
+	struct vnode *vp;
+	struct mount *mp;
 {
 
 #ifdef DIAGNOSTIC
 	if ((mp != NULL) &&
-	    (mp->mnt_flag & MNT_UNMOUNT)) {
+	    (mp->mnt_flag & MNT_UNMOUNT) &&
+	    !(mp->mnt_flag & MNT_SOFTDEP) &&
+	    vp->v_tag != VT_VFS) {
 		panic("insmntque into dying filesystem");
 	}
 #endif
@@ -537,14 +616,13 @@ insmntque(vp, mp)
  */
 void
 vwakeup(bp)
-	register struct buf *bp;
+	struct buf *bp;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
 
-	bp->b_flags &= ~B_WRITEINPROG;
 	if ((vp = bp->b_vp) != NULL) {
 		if (--vp->v_numoutput < 0)
-			panic("vwakeup: neg numoutput");
+			panic("vwakeup: neg numoutput, vp %p", vp);
 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t)&vp->v_numoutput);
@@ -554,90 +632,197 @@ vwakeup(bp)
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
- * Called with the underlying object locked.
+ * Called with the underlying vnode locked, which should prevent new dirty
+ * buffers from being queued.
  */
 int
 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
-	register struct vnode *vp;
+	struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int slpflag, slptimeo;
 {
-	register struct buf *bp;
-	struct buf *nbp, *blist;
-	int s, error;
-
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
+	struct buf *bp, *nbp;
+	int s, error, rv;
+	int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO|
+		(flags & V_SAVE ? PGO_CLEANIT : 0);
+
+	/* XXXUBC this doesn't look at flags or slp* */
+	if (vp->v_type == VREG) {
+		simple_lock(&uobj->vmobjlock);
+		rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
+		simple_unlock(&uobj->vmobjlock);
+		if (!rv) {
+			return EIO;
+		}
+	}
 	if (flags & V_SAVE) {
-		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, p);
-		if (error != 0)
-			return (error);
-		if (vp->v_dirtyblkhd.lh_first != NULL)
-			panic("vinvalbuf: dirty bufs");
+		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
+		if (error)
+		        return (error);
+#ifdef DIAGNOSTIC
+		s = splbio();
+		if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
+		        panic("vinvalbuf: dirty bufs, vp %p", vp);
+		splx(s);
+#endif
 	}
-	for (;;) {
-		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
-			while (blist && blist->b_lblkno < 0)
-				blist = blist->b_vnbufs.le_next;
-		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
-		    (flags & V_SAVEMETA))
-			while (blist && blist->b_lblkno < 0)
-				blist = blist->b_vnbufs.le_next;
-		if (!blist)
-			break;
 
-		for (bp = blist; bp; bp = nbp) {
-			nbp = bp->b_vnbufs.le_next;
-			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
-				continue;
-			s = splbio();
-			if (bp->b_flags & B_BUSY) {
-				bp->b_flags |= B_WANTED;
-				error = tsleep((caddr_t)bp,
-					slpflag | (PRIBIO + 1), "vinvalbuf",
-					slptimeo);
+	s = splbio();
+
+restart:
+	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
+		nbp = LIST_NEXT(bp, b_vnbufs);
+		if (bp->b_flags & B_BUSY) {
+			bp->b_flags |= B_WANTED;
+			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+			    "vinvalbuf", slptimeo);
+			if (error) {
 				splx(s);
-				if (error)
-					return (error);
-				break;
+				return (error);
+			}
+			goto restart;
+		}
+		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
+		brelse(bp);
+	}
+
+	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+		nbp = LIST_NEXT(bp, b_vnbufs);
+		if (bp->b_flags & B_BUSY) {
+			bp->b_flags |= B_WANTED;
+			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+			    "vinvalbuf", slptimeo);
+			if (error) {
+				splx(s);
+				return (error);
 			}
+			goto restart;
+		}
+		/*
+		 * XXX Since there are no node locks for NFS, I believe
+		 * there is a slight chance that a delayed write will
+		 * occur while sleeping just above, so check for it.
+		 */
+		if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+#ifdef DEBUG
+			printf("buffer still DELWRI\n");
+#endif
 			bp->b_flags |= B_BUSY | B_VFLUSH;
+			VOP_BWRITE(bp);
+			goto restart;
+		}
+		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
+		brelse(bp);
+	}
+
+#ifdef DIAGNOSTIC
+	if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
+		panic("vinvalbuf: flush failed, vp %p", vp);
+#endif
+
+	splx(s);
+
+	return (0);
+}
+
+/*
+ * Destroy any in core blocks past the truncation length.
+ * Called with the underlying vnode locked, which should prevent new dirty
+ * buffers from being queued.
+ */
+int
+vtruncbuf(vp, lbn, slpflag, slptimeo)
+	struct vnode *vp;
+	daddr_t lbn;
+	int slpflag, slptimeo;
+{
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
+	struct buf *bp, *nbp;
+	int s, error, rv;
+
+	s = splbio();
+	if (vp->v_type == VREG) {
+		simple_lock(&uobj->vmobjlock);
+		rv = (uobj->pgops->pgo_flush)(uobj,
+		    round_page(lbn << vp->v_mount->mnt_fs_bshift),
+		    vp->v_uvm.u_size, PGO_FREE);
+		simple_unlock(&uobj->vmobjlock);
+		if (!rv) {
 			splx(s);
-			/*
-			 * XXX Since there are no node locks for NFS, I believe
-			 * there is a slight chance that a delayed write will
-			 * occur while sleeping just above, so check for it.
-			 */
-			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
-				(void) VOP_BWRITE(bp);
-				break;
+			return EIO;
+		}
+	}
+
+restart:
+	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
+		nbp = LIST_NEXT(bp, b_vnbufs);
+		if (bp->b_lblkno < lbn)
+			continue;
+		if (bp->b_flags & B_BUSY) {
+			bp->b_flags |= B_WANTED;
+			error = tsleep(bp, slpflag | (PRIBIO + 1),
+			    "vtruncbuf", slptimeo);
+			if (error) {
+				splx(s);
+				return (error);
+			}
+			goto restart;
+		}
+		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
+		brelse(bp);
+	}
+
+	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+		nbp = LIST_NEXT(bp, b_vnbufs);
+		if (bp->b_lblkno < lbn)
+			continue;
+		if (bp->b_flags & B_BUSY) {
+			bp->b_flags |= B_WANTED;
+			error = tsleep(bp, slpflag | (PRIBIO + 1),
+			    "vtruncbuf", slptimeo);
+			if (error) {
+				splx(s);
+				return (error);
 			}
-			bp->b_flags |= B_INVAL;
-			brelse(bp);
+			goto restart;
 		}
+		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
+		brelse(bp);
 	}
-	if (!(flags & V_SAVEMETA) &&
-	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
-		panic("vinvalbuf: flush failed");
+
+	splx(s);
+
 	return (0);
 }
 
 void
 vflushbuf(vp, sync)
-	register struct vnode *vp;
+	struct vnode *vp;
 	int sync;
 {
-	register struct buf *bp, *nbp;
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
+	struct buf *bp, *nbp;
 	int s;
 
+	if (vp->v_type == VREG) {
+		int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0);
+
+		simple_lock(&uobj->vmobjlock);
+		(uobj->pgops->pgo_flush)(uobj, 0, 0, flags);
+		simple_unlock(&uobj->vmobjlock);
+	}
+
 loop:
 	s = splbio();
-	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-		nbp = bp->b_vnbufs.le_next;
+	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+		nbp = LIST_NEXT(bp, b_vnbufs);
 		if ((bp->b_flags & B_BUSY))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
-			panic("vflushbuf: not dirty");
+			panic("vflushbuf: not dirty, bp %p", bp);
 		bp->b_flags |= B_BUSY | B_VFLUSH;
 		splx(s);
 		/*
@@ -659,7 +844,7 @@ loop:
 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
 	}
 	splx(s);
-	if (vp->v_dirtyblkhd.lh_first != NULL) {
+	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
 		vprint("vflushbuf: dirty", vp);
 		goto loop;
 	}
@@ -670,13 +855,15 @@ loop:
  */
 void
 bgetvp(vp, bp)
-	register struct vnode *vp;
-	register struct buf *bp;
+	struct vnode *vp;
+	struct buf *bp;
 {
+	int s;
 
 	if (bp->b_vp)
-		panic("bgetvp: not free");
+		panic("bgetvp: not free, bp %p", bp);
 	VHOLD(vp);
+	s = splbio();
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
@@ -686,6 +873,7 @@ bgetvp(vp, bp)
 	 * Insert onto list for new vnode.
 	 */
 	bufinsvn(bp, &vp->v_cleanblkhd);
+	splx(s);
 }
 
 /*
@@ -693,38 +881,48 @@ bgetvp(vp, bp)
  */
 void
 brelvp(bp)
-	register struct buf *bp;
+	struct buf *bp;
 {
 	struct vnode *vp;
+	int s;
 
-	if (bp->b_vp == (struct vnode *) 0)
-		panic("brelvp: NULL");
+	if (bp->b_vp == NULL)
+		panic("brelvp: vp NULL, bp %p", bp);
+
+	s = splbio();
+	vp = bp->b_vp;
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_vnbufs.le_next != NOLIST)
 		bufremvn(bp);
-	vp = bp->b_vp;
-	bp->b_vp = (struct vnode *) 0;
+
+	if (vp->v_type != VREG && (vp->v_flag & VONWORKLST) &&
+	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
+		vp->v_flag &= ~VONWORKLST;
+		LIST_REMOVE(vp, v_synclist);
+	}
+
+	bp->b_vp = NULL;
 	HOLDRELE(vp);
+	splx(s);
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
+ *
+ * This function must be called at splbio().
  */
 void
 reassignbuf(bp, newvp)
-	register struct buf *bp;
-	register struct vnode *newvp;
+	struct buf *bp;
+	struct vnode *newvp;
 {
-	register struct buflists *listheadp;
+	struct buflists *listheadp;
+	int delay;
 
-	if (newvp == NULL) {
-		printf("reassignbuf: NULL");
-		return;
-	}
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
@@ -734,10 +932,36 @@ reassignbuf(bp, newvp)
 	 * If dirty, put on list of dirty buffers;
 	 * otherwise insert onto list of clean buffers.
 	 */
-	if (bp->b_flags & B_DELWRI)
-		listheadp = &newvp->v_dirtyblkhd;
-	else
+	if ((bp->b_flags & B_DELWRI) == 0) {
 		listheadp = &newvp->v_cleanblkhd;
+		if (newvp->v_type != VREG &&
+		    (newvp->v_flag & VONWORKLST) &&
+		    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
+			newvp->v_flag &= ~VONWORKLST;
+			LIST_REMOVE(newvp, v_synclist);
+		}
+	} else {
+		listheadp = &newvp->v_dirtyblkhd;
+		if ((newvp->v_flag & VONWORKLST) == 0) {
+			switch (newvp->v_type) {
+			case VDIR:
+				delay = dirdelay;
+				break;
+			case VBLK:
+				if (newvp->v_specmountpoint != NULL) {
+					delay = metadelay;
+					break;
+				}
+				/* fall through */
+			default:
+				delay = filedelay;
+				break;
+			}
+			if (!newvp->v_mount ||
+			    (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
+				vn_syncer_add_to_worklist(newvp, delay);
+		}
+	}
 	bufinsvn(bp, listheadp);
 }
 
@@ -779,7 +1003,7 @@ getdevvp(dev, vpp, type)
 	struct vnode **vpp;
 	enum vtype type;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
 	struct vnode *nvp;
 	int error;
 
@@ -812,12 +1036,12 @@ getdevvp(dev, vpp, type)
  */
 struct vnode *
 checkalias(nvp, nvp_rdev, mp)
-	register struct vnode *nvp;
+	struct vnode *nvp;
 	dev_t nvp_rdev;
 	struct mount *mp;
 {
 	struct proc *p = curproc;       /* XXX */
-	register struct vnode *vp;
+	struct vnode *vp;
 	struct vnode **vpp;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
@@ -850,7 +1074,7 @@ loop:
 		nvp->v_rdev = nvp_rdev;
 		nvp->v_hashchain = vpp;
 		nvp->v_specnext = *vpp;
-		nvp->v_specflags = 0;
+		nvp->v_specmountpoint = NULL;
 		simple_unlock(&spechash_slock);
 		nvp->v_speclockf = NULL;
 		*vpp = nvp;
@@ -895,29 +1119,62 @@ vget(vp, flags)
 	 * return failure. Cleaning is determined by checking that
 	 * the VXLOCK flag is set.
 	 */
+
 	if ((flags & LK_INTERLOCK) == 0)
 		simple_lock(&vp->v_interlock);
 	if (vp->v_flag & VXLOCK) {
+		if (flags & LK_NOWAIT) {
+			return EBUSY;
+		}
 		vp->v_flag |= VXWANT;
-		simple_unlock(&vp->v_interlock);
-		tsleep((caddr_t)vp, PINOD, "vget", 0);
+		ltsleep((caddr_t)vp, PINOD|PNORELOCK,
+		    "vget", 0, &vp->v_interlock);
 		return (ENOENT);
 	}
 	if (vp->v_usecount == 0) {
 		simple_lock(&vnode_free_list_slock);
-		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		if (vp->v_holdcnt > 0)
+			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
+		else
+			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		simple_unlock(&vnode_free_list_slock);
 	}
 	vp->v_usecount++;
 #ifdef DIAGNOSTIC
 	if (vp->v_usecount == 0) {
 		vprint("vget", vp);
-		panic("vget: usecount overflow");
+		panic("vget: usecount overflow, vp %p", vp);
 	}
 #endif
 	if (flags & LK_TYPE_MASK) {
-		if ((error = vn_lock(vp, flags | LK_INTERLOCK)))
-			vrele(vp);
+		if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
+			/*
+			 * must expand vrele here because we do not want
+			 * to call VOP_INACTIVE if the reference count
+			 * drops back to zero since it was never really
+			 * active. We must remove it from the free list
+			 * before sleeping so that multiple processes do
+			 * not try to recycle it.
+			 */
+			simple_lock(&vp->v_interlock);
+			vp->v_usecount--;
+			if (vp->v_usecount > 0) {
+				simple_unlock(&vp->v_interlock);
+				return (error);
+			}
+			/*
+			 * insert at tail of LRU list
+			 */
+			simple_lock(&vnode_free_list_slock);
+			if (vp->v_holdcnt > 0)
+				TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
+				    v_freelist);
+			else
+				TAILQ_INSERT_TAIL(&vnode_free_list, vp,
+				    v_freelist);
+			simple_unlock(&vnode_free_list_slock);
+			simple_unlock(&vp->v_interlock);
+		}
 		return (error);
 	}
 	simple_unlock(&vp->v_interlock);
@@ -954,8 +1211,12 @@ vput(vp)
 	 * Insert at tail of LRU list.
 	 */
 	simple_lock(&vnode_free_list_slock);
-	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+	if (vp->v_holdcnt > 0)
+		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+	else
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	simple_unlock(&vnode_free_list_slock);
+	vp->v_flag &= ~VTEXT;
 	simple_unlock(&vp->v_interlock);
 	VOP_INACTIVE(vp, p);
 }
@@ -983,15 +1244,19 @@ vrele(vp)
 #ifdef DIAGNOSTIC
 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 		vprint("vrele: bad ref count", vp);
-		panic("vrele: ref cnt");
+		panic("vrele: ref cnt vp %p", vp);
 	}
 #endif
 	/*
 	 * Insert at tail of LRU list.
 	 */
 	simple_lock(&vnode_free_list_slock);
-	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+	if (vp->v_holdcnt > 0)
+		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+	else
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	simple_unlock(&vnode_free_list_slock);
+	vp->v_flag &= ~VTEXT;
 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
 		VOP_INACTIVE(vp, p);
 }
@@ -1002,10 +1267,30 @@ vrele(vp)
  */
 void
 vhold(vp)
-	register struct vnode *vp;
+	struct vnode *vp;
 {
 
-	simple_lock(&vp->v_interlock);
+	/*
+	 * If it is on the freelist and the hold count is currently
+	 * zero, move it to the hold list. The test of the back
+	 * pointer and the use reference count of zero is because
+	 * it will be removed from a free list by getnewvnode,
+	 * but will not have its reference count incremented until
+	 * after calling vgone. If the reference count were
+	 * incremented first, vgone would (incorrectly) try to
+	 * close the previous instance of the underlying object.
+	 * So, the back pointer is explicitly set to `0xdeadb' in
+	 * getnewvnode after removing it from a freelist to ensure
+	 * that we do not try to move it here.
+	 */
+  	simple_lock(&vp->v_interlock);
+	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
+	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+		simple_lock(&vnode_free_list_slock);
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+		simple_unlock(&vnode_free_list_slock);
+	}
 	vp->v_holdcnt++;
 	simple_unlock(&vp->v_interlock);
 }
@@ -1015,13 +1300,35 @@ vhold(vp)
  */
 void
 holdrele(vp)
-	register struct vnode *vp;
+	struct vnode *vp;
 {
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_holdcnt <= 0)
-		panic("holdrele: holdcnt");
+		panic("holdrele: holdcnt vp %p", vp);
 	vp->v_holdcnt--;
+
+	/*
+	 * If it is on the holdlist and the hold count drops to
+	 * zero, move it to the free list. The test of the back
+	 * pointer and the use reference count of zero is because
+	 * it will be removed from a free list by getnewvnode,
+	 * but will not have its reference count incremented until
+	 * after calling vgone. If the reference count were
+	 * incremented first, vgone would (incorrectly) try to
+	 * close the previous instance of the underlying object.
+	 * So, the back pointer is explicitly set to `0xdeadb' in
+	 * getnewvnode after removing it from a freelist to ensure
+	 * that we do not try to move it here.
+	 */
+
+	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
+	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+		simple_lock(&vnode_free_list_slock);
+		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+		simple_unlock(&vnode_free_list_slock);
+	}
 	simple_unlock(&vp->v_interlock);
 }
 
@@ -1035,12 +1342,12 @@ vref(vp)
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount <= 0)
-		panic("vref used where vget required");
+		panic("vref used where vget required, vp %p", vp);
 	vp->v_usecount++;
 #ifdef DIAGNOSTIC
 	if (vp->v_usecount == 0) {
 		vprint("vref", vp);
-		panic("vref: usecount overflow");
+		panic("vref: usecount overflow, vp %p", vp);
 	}
 #endif
 	simple_unlock(&vp->v_interlock);
@@ -1067,7 +1374,7 @@ vflush(mp, skipvp, flags)
 	int flags;
 {
 	struct proc *p = curproc;	/* XXX */
-	register struct vnode *vp, *nvp;
+	struct vnode *vp, *nvp;
 	int busy = 0;
 
 	simple_lock(&mntvnode_slock);
@@ -1143,7 +1450,7 @@ loop:
  */
 void
 vclean(vp, flags, p)
-	register struct vnode *vp;
+	struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
@@ -1171,8 +1478,10 @@ vclean(vp, flags, p)
 	 * brought into use while we clean it out.
 	 */
 	if (vp->v_flag & VXLOCK)
-		panic("vclean: deadlock");
+		panic("vclean: deadlock, vp %p", vp);
 	vp->v_flag |= VXLOCK;
+	vp->v_flag &= ~VTEXT;
+
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
@@ -1183,11 +1492,7 @@ vclean(vp, flags, p)
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
 
 	/*
-	 * clean out any VM data associated with the vnode.
-	 */
-	uvm_vnp_terminate(vp);
-	/*
-	 * Clean out any buffers associated with the vnode.
+	 * Clean out any cached data associated with the vnode.
 	 */
 	if (flags & DOCLOSE)
 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
@@ -1212,8 +1517,7 @@ vclean(vp, flags, p)
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
-		panic("vclean: cannot reclaim");
-
+		panic("vclean: cannot reclaim, vp %p", vp);
 	if (active) {
 		/*
 		 * Inline copy of vrele() since VOP_INACTIVE
@@ -1230,17 +1534,21 @@ vclean(vp, flags, p)
 			/*
 			 * Insert at tail of LRU list.
 			 */
+
+			simple_unlock(&vp->v_interlock);
 			simple_lock(&vnode_free_list_slock);
 #ifdef DIAGNOSTIC
 			if (vp->v_vnlock) {
 				if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
 					vprint("vclean: lock not drained", vp);
 			}
+			if (vp->v_holdcnt > 0)
+				panic("vclean: not clean, vp %p", vp);
 #endif
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 			simple_unlock(&vnode_free_list_slock);
-		}
-		simple_unlock(&vp->v_interlock);
+		} else
+			simple_unlock(&vp->v_interlock);
 	}
 
 	cache_purge(vp);
@@ -1250,11 +1558,14 @@ vclean(vp, flags, p)
 	 */
 	vp->v_op = dead_vnodeop_p;
 	vp->v_tag = VT_NON;
+	simple_lock(&vp->v_interlock);
 	vp->v_flag &= ~VXLOCK;
 	if (vp->v_flag & VXWANT) {
 		vp->v_flag &= ~VXWANT;
+		simple_unlock(&vp->v_interlock);
 		wakeup((caddr_t)vp);
-	}
+	} else
+		simple_unlock(&vp->v_interlock);
 }
 
 /*
@@ -1298,7 +1609,7 @@ vgone(vp)
  */
 void
 vgonel(vp, p)
-	register struct vnode *vp;
+	struct vnode *vp;
 	struct proc *p;
 {
 	struct vnode *vq;
@@ -1310,8 +1621,8 @@ vgonel(vp, p)
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
-		simple_unlock(&vp->v_interlock);
-		tsleep((caddr_t)vp, PINOD, "vgone", 0);
+		ltsleep((caddr_t)vp, PINOD | PNORELOCK,
+		    "vgone", 0, &vp->v_interlock);
 		return;
 	}
 	/*
@@ -1380,8 +1691,10 @@ vgonel(vp, p)
 	 */
 	if (vp->v_usecount == 0) {
 		simple_lock(&vnode_free_list_slock);
+		if (vp->v_holdcnt > 0)
+			panic("vgonel: not clean, vp %p", vp);
 		if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
-		    vnode_free_list.tqh_first != vp) {
+		    TAILQ_FIRST(&vnode_free_list) != vp) {
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		}
@@ -1436,9 +1749,9 @@ vdevgone(maj, minl, minh, type)
  */
 int
 vcount(vp)
-	register struct vnode *vp;
+	struct vnode *vp;
 {
-	register struct vnode *vq, *vnext;
+	struct vnode *vq, *vnext;
 	int count;
 
 loop:
@@ -1472,14 +1785,14 @@ static char *typename[] =
 void
 vprint(label, vp)
 	char *label;
-	register struct vnode *vp;
+	struct vnode *vp;
 {
 	char buf[64];
 
 	if (label != NULL)
 		printf("%s: ", label);
-	printf("type %s, usecount %ld, writecount %ld, refcount %ld,",
-	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+	printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
+	    vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 	    vp->v_holdcnt);
 	buf[0] = '\0';
 	if (vp->v_flag & VROOT)
@@ -1577,6 +1890,8 @@ vfs_sysctl(name, namelen, oldp, oldlenp,
 
 	/* The rest are generic vfs sysctls. */
 	switch (name[1]) {
+	case VFS_USERMOUNT:
+		return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount);
 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 	case VFS_MAXTYPENUM:
 		/*
@@ -1698,7 +2013,7 @@ vfs_mountedon(vp)
 	struct vnode *vq;
 	int error = 0;
 
-	if (vp->v_specflags & SI_MOUNTEDON)
+	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_flag & VALIASED) {
 		simple_lock(&spechash_slock);
@@ -1706,7 +2021,7 @@ vfs_mountedon(vp)
 			if (vq->v_rdev != vp->v_rdev ||
 			    vq->v_type != vp->v_type)
 				continue;
-			if (vq->v_specflags & SI_MOUNTEDON) {
+			if (vq->v_specmountpoint != NULL) {
 				error = EBUSY;
 				break;
 			}
@@ -1726,9 +2041,9 @@ vfs_hang_addrlist(mp, nep, argp)
 	struct netexport *nep;
 	struct export_args *argp;
 {
-	register struct netcred *np, *enp;
-	register struct radix_node_head *rnh;
-	register int i;
+	struct netcred *np, *enp;
+	struct radix_node_head *rnh;
+	int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
@@ -1817,7 +2132,7 @@ vfs_free_netcred(rn, w)
 	struct radix_node *rn;
 	void *w;
 {
-	register struct radix_node_head *rnh = (struct radix_node_head *)w;
+	struct radix_node_head *rnh = (struct radix_node_head *)w;
 
 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
 	free((caddr_t)rn, M_NETADDR);
@@ -1831,8 +2146,8 @@ static void
 vfs_free_addrlist(nep)
 	struct netexport *nep;
 {
-	register int i;
-	register struct radix_node_head *rnh;
+	int i;
+	struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i]) != NULL) {
@@ -1953,12 +2268,12 @@ vfs_setpublicfs(mp, nep, argp)
 
 struct netcred *
 vfs_export_lookup(mp, nep, nam)
-	register struct mount *mp;
+	struct mount *mp;
 	struct netexport *nep;
 	struct mbuf *nam;
 {
-	register struct netcred *np;
-	register struct radix_node_head *rnh;
+	struct netcred *np;
+	struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	np = NULL;
@@ -2053,23 +2368,11 @@ vaccess(type, file_mode, uid, gid, acc_m
  * will avoid needing to worry about dependencies.
  */
 void
-vfs_unmountall()
+vfs_unmountall(p)
+	struct proc *p;
 {
-	register struct mount *mp, *nmp;
+	struct mount *mp, *nmp;
 	int allerror, error;
-	struct proc *p = curproc;	/* XXX */
-
-	/*
-	 * Unmounting a file system blocks the requesting process.
-	 * However, it's possible for this routine to be called when
-	 * curproc is NULL (e.g. panic situation, or via the debugger).
-	 * If we get stuck in this situation, just abort, since any
-	 * attempts to sleep will fault.
-	 */
-	if (p == NULL) {
-		printf("vfs_unmountall: no context, aborting\n");
-		return;
-	}
 
 	for (allerror = 0,
 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
@@ -2096,36 +2399,79 @@ vfs_unmountall()
 void
 vfs_shutdown()
 {
-	register struct buf *bp;
-	int iter, nbusy;
-
+	struct buf *bp;
+	int iter, nbusy, nbusy_prev = 0, dcount, s;
+	struct proc *p = curproc;
+
+	/* XXX we're certainly not running in proc0's context! */
+	if (p == NULL)
+		p = &proc0;
+	
 	printf("syncing disks... ");
 
-	/* XXX Should suspend scheduling. */
+	/* remove user process from run queue */
+	suspendsched();
 	(void) spl0();
 
-	sys_sync(&proc0, (void *)0, (register_t *)0);
+	/* avoid coming back this way again if we panic. */
+	doing_shutdown = 1;
+
+	sys_sync(p, NULL, NULL);
 
 	/* Wait for sync to finish. */
-	for (iter = 0; iter < 20; iter++) {
+	dcount = 10000;
+	for (iter = 0; iter < 20;) {
 		nbusy = 0;
-		for (bp = &buf[nbuf]; --bp >= buf; )
-			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
+		for (bp = &buf[nbuf]; --bp >= buf; ) {
+			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
+				nbusy++;
+			/*
+			 * With soft updates, some buffers that are
+			 * written will be remarked as dirty until other
+			 * buffers are written.
+			 */
+			if (bp->b_vp && bp->b_vp->v_mount
+			    && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
+			    && (bp->b_flags & B_DELWRI)) {
+				s = splbio();
+				bremfree(bp);
+				bp->b_flags |= B_BUSY;
+				splx(s);
 				nbusy++;
+				bawrite(bp);
+				if (dcount-- <= 0) {
+					printf("softdep ");
+					goto fail;
+				}
+			}
+		}
 		if (nbusy == 0)
 			break;
+		if (nbusy_prev == 0)
+			nbusy_prev = nbusy;
 		printf("%d ", nbusy);
-		DELAY(40000 * iter);
+		tsleep(&nbusy, PRIBIO, "bflush",
+		    (iter == 0) ? 1 : hz / 25 * iter);
+		if (nbusy >= nbusy_prev) /* we didn't flush anything */
+			iter++;
+		else
+			nbusy_prev = nbusy;
 	}
 	if (nbusy) {
-#ifdef DEBUG
+fail:
+#if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
 		printf("giving up\nPrinting vnodes for busy buffers\n");
 		for (bp = &buf[nbuf]; --bp >= buf; )
-			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
+			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
 				vprint(NULL, bp->b_vp);
-#else
-		printf("giving up\n");
+
+#if defined(DDB) && defined(DEBUG_HALT_BUSY)
+		Debugger();
 #endif
+
+#else  /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
+		printf("giving up\n");
+#endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
 		return;
 	} else
 		printf("done\n");
@@ -2142,7 +2488,7 @@ vfs_shutdown()
 	vnshutdown();
 #endif
 	/* Unmount file systems. */
-	vfs_unmountall();
+	vfs_unmountall(p);
 }
 
 /*
@@ -2300,8 +2646,139 @@ vfs_detach(vfs)
 		return (ESRCH);
 
 	/*
+	 * Now run the file system-specific cleanups.
+	 */
+	(*vfs->vfs_done)();
+
+	/*
 	 * Free the vnode operations vector.
 	 */
 	vfs_opv_free(vfs->vfs_opv_descs);
 	return (0);
 }
+
+#ifdef DDB
+const char buf_flagbits[] =
+	"\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
+	"\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
+	"\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
+	"\32XXX\33VFLUSH";
+
+void
+vfs_buf_print(bp, full, pr)
+	struct buf *bp;
+	int full;
+	void (*pr) __P((const char *, ...));
+{
+	char buf[1024];
+
+	(*pr)("  vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n",
+		  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
+
+	bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
+	(*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
+
+	(*pr)("  bufsize 0x%x bcount 0x%x resid 0x%x\n",
+		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
+	(*pr)("  data %p saveaddr %p dep %p\n",
+		  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
+	(*pr)("  iodone %p\n", bp->b_iodone);
+}
+
+
+const char vnode_flagbits[] =
+	"\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED"
+	"\15DIROP\17DIRTY";
+
+const char *vnode_types[] = {
+	"VNON",
+	"VREG",
+	"VDIR",
+	"VBLK",
+	"VCHR",
+	"VLNK",
+	"VSOCK",
+	"VFIFO",
+	"VBAD",
+};
+
+const char *vnode_tags[] = {
+	"VT_NON",
+	"VT_UFS",
+	"VT_NFS",
+	"VT_MFS",
+	"VT_MSDOSFS",
+	"VT_LFS",
+	"VT_LOFS",
+	"VT_FDESC",
+	"VT_PORTAL",
+	"VT_NULL",
+	"VT_UMAP",
+	"VT_KERNFS",
+	"VT_PROCFS",
+	"VT_AFS",
+	"VT_ISOFS",
+	"VT_UNION",
+	"VT_ADOSFS",
+	"VT_EXT2FS",
+	"VT_CODA",
+	"VT_FILECORE",
+	"VT_NTFS",
+	"VT_VFS",
+	"VT_OVERLAY"
+};
+
+void
+vfs_vnode_print(vp, full, pr)
+	struct vnode *vp;
+	int full;
+	void (*pr) __P((const char *, ...));
+{
+	char buf[256];
+
+	const char *vtype, *vtag;
+
+	uvm_object_printit(&vp->v_uvm.u_obj, full, pr);
+	bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
+	(*pr)("\nVNODE flags %s\n", buf);
+	(*pr)("mp %p nio %d size 0x%x rwlock 0x%x glock 0x%x\n",
+	      vp->v_mount, vp->v_uvm.u_nio, (int)vp->v_uvm.u_size,
+	      vp->v_vnlock ? lockstatus(vp->v_vnlock) : 0x999,
+	      lockstatus(&vp->v_glock));
+
+	(*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n",
+	      vp->v_data, vp->v_usecount, vp->v_writecount,
+	      vp->v_holdcnt, vp->v_numoutput);
+
+	vtype = (vp->v_type >= 0 &&
+		 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
+		vnode_types[vp->v_type] : "UNKNOWN";
+	vtag = (vp->v_tag >= 0 &&
+		vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
+		vnode_tags[vp->v_tag] : "UNKNOWN";
+	
+	(*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n",
+	      vtype, vp->v_type, vtag, vp->v_tag,
+	      vp->v_id, vp->v_mount, vp->v_mountedhere);
+	(*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n",
+	      vp->v_lastr, vp->v_lastw, vp->v_lasta);
+	(*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n",
+	      vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra);
+
+	if (full) {
+		struct buf *bp;
+
+		(*pr)("clean bufs:\n");
+		LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
+			(*pr)(" bp %p\n", bp);
+			vfs_buf_print(bp, full, pr);
+		}
+
+		(*pr)("dirty bufs:\n");
+		LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
+			(*pr)(" bp %p\n", bp);
+			vfs_buf_print(bp, full, pr);
+		}
+	}
+}
+#endif