Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.86.4.10 retrieving revision 1.89.4.2 diff -u -p -r1.86.4.10 -r1.89.4.2 --- src/sys/ufs/ffs/ffs_vnops.c 2007/08/24 23:28:45 1.86.4.10 +++ src/sys/ufs/ffs/ffs_vnops.c 2007/09/03 16:49:15 1.89.4.2 @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vnops.c,v 1.86.4.10 2007/08/24 23:28:45 ad Exp $ */ +/* $NetBSD: ffs_vnops.c,v 1.89.4.2 2007/09/03 16:49:15 jmcneill Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.86.4.10 2007/08/24 23:28:45 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.89.4.2 2007/09/03 16:49:15 jmcneill Exp $"); #include #include @@ -246,7 +246,7 @@ ffs_fsync(void *v) struct lwp *a_l; } */ *ap = v; struct buf *bp; - int num, error, i; + int s, num, error, i; struct indir ia[NIADDR + 1]; int bsize; daddr_t blk_high; @@ -273,7 +273,7 @@ ffs_fsync(void *v) * First, flush all pages in range. */ - mutex_enter(&vp->v_interlock); + simple_lock(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), PGO_CLEANIT | ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); @@ -285,32 +285,40 @@ ffs_fsync(void *v) * Then, flush indirect blocks. */ + s = splbio(); if (blk_high >= NDADDR) { error = ufs_getlbns(vp, blk_high, ia, &num); - if (error) + if (error) { + splx(s); goto out; - - mutex_enter(&bufcache_lock); + } for (i = 0; i < num; i++) { - if ((bp = incore(vp, ia[i].in_lbn)) == NULL) - continue; - if ((bp->b_cflags & BC_BUSY) != 0 || - (bp->b_oflags & BO_DELWRI) == 0) - continue; - bp->b_cflags |= BC_BUSY | BC_VFLUSH; - mutex_exit(&bufcache_lock); - bawrite(bp); - mutex_enter(&bufcache_lock); + bp = incore(vp, ia[i].in_lbn); + if (bp != NULL) { + simple_lock(&bp->b_interlock); + if (!(bp->b_flags & B_BUSY) && (bp->b_flags & B_DELWRI)) { + bp->b_flags |= B_BUSY | B_VFLUSH; + simple_unlock(&bp->b_interlock); + splx(s); + bawrite(bp); + s = splbio(); + } else { + simple_unlock(&bp->b_interlock); + } + } } - mutex_exit(&bufcache_lock); } if (ap->a_flags & FSYNC_WAIT) { - mutex_enter(&vp->v_interlock); - while (vp->v_numoutput > 0) - cv_wait(&vp->v_cv, &vp->v_interlock); - mutex_exit(&vp->v_interlock); + simple_lock(&global_v_numoutput_slock); + while (vp->v_numoutput > 0) { + vp->v_flag |= VBWAIT; + ltsleep(&vp->v_numoutput, PRIBIO + 1, "fsync_range", 0, + &global_v_numoutput_slock); + } + simple_unlock(&global_v_numoutput_slock); } + splx(s); error = ffs_update(vp, NULL, NULL, ((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT) @@ -344,15 +352,13 @@ ffs_full_fsync(void *v) } */ *ap = v; struct vnode *vp = ap->a_vp; struct buf *bp, *nbp; - int error, passes, skipmeta, inodedeps_only, waitfor; + int s, error, passes, skipmeta, inodedeps_only, waitfor; if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) softdep_fsync_mountdev(vp); - mutex_enter(&vp->v_interlock); - inodedeps_only = DOINGSOFTDEP(vp) && (ap->a_flags & FSYNC_RECLAIM) && UVM_OBJ_IS_CLEAN(&vp->v_uobj) && LIST_EMPTY(&vp->v_dirtyblkhd); @@ -361,6 +367,7 @@ ffs_full_fsync(void *v) */ if (vp->v_type == VREG || vp->v_type == VBLK) { + simple_lock(&vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES | PGO_CLEANIT | ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0) | (fstrans_getstate(vp->v_mount) == FSTRANS_SUSPENDING ? @@ -368,31 +375,33 @@ ffs_full_fsync(void *v) if (error) { return error; } - } else - mutex_exit(&vp->v_interlock); + } passes = NIADDR + 1; skipmeta = 0; if (ap->a_flags & FSYNC_WAIT) skipmeta = 1; + s = splbio(); - mutex_enter(&bufcache_lock); - mutex_enter(&vp->v_interlock); loop: - LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { - bp->b_cflags &= ~BC_SCANNED; - } + LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) + bp->b_flags &= ~B_SCANNED; for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); - if (bp->b_cflags & (BC_BUSY | BC_SCANNED)) + simple_lock(&bp->b_interlock); + if (bp->b_flags & (B_BUSY | B_SCANNED)) { + simple_unlock(&bp->b_interlock); continue; - if ((bp->b_oflags & BO_DELWRI) == 0) + } + if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); - if (skipmeta && bp->b_lblkno < 0) + if (skipmeta && bp->b_lblkno < 0) { + simple_unlock(&bp->b_interlock); continue; - mutex_exit(&vp->v_interlock); - bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED; - mutex_exit(&bufcache_lock); + } + simple_unlock(&bp->b_interlock); + bp->b_flags |= B_BUSY | B_VFLUSH | B_SCANNED; + splx(s); /* * On our final pass through, do all I/O synchronously * so that we can find out if our flush is failing @@ -402,25 +411,26 @@ loop: (void) bawrite(bp); else if ((error = bwrite(bp)) != 0) return (error); + s = splbio(); /* - * Since we unlocked during the I/O, we need + * Since we may have slept during the I/O, we need * to start from a known point. */ - mutex_enter(&bufcache_lock); - mutex_enter(&vp->v_interlock); nbp = LIST_FIRST(&vp->v_dirtyblkhd); } if (skipmeta) { skipmeta = 0; goto loop; } - mutex_exit(&bufcache_lock); - if (ap->a_flags & FSYNC_WAIT) { + simple_lock(&global_v_numoutput_slock); while (vp->v_numoutput) { - cv_wait(&vp->v_cv, &vp->v_interlock); + vp->v_flag |= VBWAIT; + (void) ltsleep(&vp->v_numoutput, PRIBIO + 1, + "ffsfsync", 0, &global_v_numoutput_slock); } - mutex_exit(&vp->v_interlock); + simple_unlock(&global_v_numoutput_slock); + splx(s); /* * Ensure that any filesystem metadata associated @@ -429,6 +439,7 @@ loop: if ((error = softdep_sync_metadata(ap)) != 0) return (error); + s = splbio(); if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { /* * Block devices associated with filesystems may @@ -447,8 +458,8 @@ loop: vprint("ffs_fsync: dirty", vp); #endif } - } else - mutex_exit(&vp->v_interlock); + } + splx(s); if (inodedeps_only) waitfor = 0; @@ -479,7 +490,6 @@ ffs_reclaim(void *v) struct inode *ip = VTOI(vp); struct mount *mp = vp->v_mount; struct ufsmount *ump = ip->i_ump; - void *data; int error; fstrans_start(mp, FSTRANS_LAZY); @@ -494,19 +504,12 @@ ffs_reclaim(void *v) pool_put(&ffs_dinode2_pool, ip->i_din.ffs2_din); } /* - * To interlock with ffs_sync(). - */ - genfs_node_destroy(vp); - mutex_enter(&vp->v_interlock); - data = vp->v_data; - vp->v_data = NULL; - mutex_exit(&vp->v_interlock); - - /* * XXX MFS ends up here, too, to free an inode. Should we create * XXX a separate pool for MFS inodes? */ - pool_put(&ffs_inode_pool, data); + genfs_node_destroy(vp); + pool_put(&ffs_inode_pool, vp->v_data); + vp->v_data = NULL; fstrans_done(mp); return (0); } @@ -540,7 +543,7 @@ ffs_getpages(void *v) blkoff(fs, *ap->a_count << PAGE_SHIFT) != 0) && DOINGSOFTDEP(ap->a_vp)) { if ((ap->a_flags & PGO_LOCKED) == 0) { - mutex_exit(&vp->v_interlock); + simple_unlock(&vp->v_interlock); } return EINVAL; } @@ -737,6 +740,9 @@ ffs_lock(void *v) } */ *ap = v; struct vnode *vp = ap->a_vp; struct mount *mp = vp->v_mount; + struct lock *lkp; + int flags = ap->a_flags; + int result; /* * Fake lock during file system suspension. @@ -744,11 +750,34 @@ ffs_lock(void *v) if ((vp->v_type == VREG || vp->v_type == VDIR) && fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING) { - if ((ap->a_flags & LK_INTERLOCK) != 0) - mutex_exit(&vp->v_interlock); + if ((flags & LK_INTERLOCK) != 0) + simple_unlock(&vp->v_interlock); return 0; } - return (lockmgr(vp->v_vnlock, ap->a_flags, &vp->v_interlock)); + + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (lockmgr(vp->v_vnlock, flags, &vp->v_interlock)); + + KASSERT((flags & ~(LK_SHARED | LK_EXCLUSIVE | LK_SLEEPFAIL | + LK_INTERLOCK | LK_NOWAIT | LK_SETRECURSE | LK_CANRECURSE)) == 0); + for (;;) { + if ((flags & LK_INTERLOCK) == 0) { + simple_lock(&vp->v_interlock); + flags |= LK_INTERLOCK; + } + lkp = vp->v_vnlock; + result = lockmgr(lkp, flags, &vp->v_interlock); + if (lkp == vp->v_vnlock || result != 0) + return result; + /* + * Apparent success, except that the vnode mutated between + * snapshot file vnode and regular file vnode while this + * thread slept. The lock currently held is not the right + * lock. Release it, and try to get the new lock. + */ + (void) lockmgr(lkp, LK_RELEASE, NULL); + flags &= ~LK_INTERLOCK; + } } /* @@ -771,7 +800,7 @@ ffs_unlock(void *v) fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING) { if ((ap->a_flags & LK_INTERLOCK) != 0) - mutex_exit(&vp->v_interlock); + simple_unlock(&vp->v_interlock); return 0; } return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE,