Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.77.8.3 retrieving revision 1.98.10.1 diff -u -p -r1.77.8.3 -r1.98.10.1 --- src/sys/ufs/ffs/ffs_vnops.c 2006/05/24 10:59:25 1.77.8.3 +++ src/sys/ufs/ffs/ffs_vnops.c 2008/05/16 02:26:00 1.98.10.1 @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vnops.c,v 1.77.8.3 2006/05/24 10:59:25 yamt Exp $ */ +/* $NetBSD: ffs_vnops.c,v 1.98.10.1 2008/05/16 02:26:00 yamt Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.77.8.3 2006/05/24 10:59:25 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.98.10.1 2008/05/16 02:26:00 yamt Exp $"); #include #include @@ -48,6 +48,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c, #include #include #include +#include #include #include @@ -63,8 +64,6 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c, #include -static int ffs_full_fsync(void *); - /* Global vfs data structures for ufs. */ int (**ffs_vnodeop_p)(void *); const struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { @@ -80,7 +79,6 @@ const struct vnodeopv_entry_desc ffs_vno { &vop_setattr_desc, ufs_setattr }, /* setattr */ { &vop_read_desc, ffs_read }, /* read */ { &vop_write_desc, ffs_write }, /* write */ - { &vop_lease_desc, ufs_lease_check }, /* lease */ { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ { &vop_poll_desc, ufs_poll }, /* poll */ @@ -100,16 +98,16 @@ const struct vnodeopv_entry_desc ffs_vno { &vop_abortop_desc, ufs_abortop }, /* abortop */ { &vop_inactive_desc, ufs_inactive }, /* inactive */ { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */ - { &vop_lock_desc, ufs_lock }, /* lock */ - { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_lock_desc, ffs_lock }, /* lock */ + { &vop_unlock_desc, ffs_unlock }, /* unlock */ { &vop_bmap_desc, ufs_bmap }, /* bmap */ { &vop_strategy_desc, ufs_strategy }, /* strategy */ { &vop_print_desc, ufs_print }, /* print */ - { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_islocked_desc, ffs_islocked }, /* islocked */ { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ { &vop_advlock_desc, ufs_advlock }, /* advlock */ { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ - { &vop_getpages_desc, ffs_getpages }, /* getpages */ + { &vop_getpages_desc, genfs_getpages }, /* getpages */ { &vop_putpages_desc, genfs_putpages }, /* putpages */ { &vop_openextattr_desc, ffs_openextattr }, /* openextattr */ { &vop_closeextattr_desc, ffs_closeextattr }, /* closeextattr */ @@ -135,7 +133,6 @@ const struct vnodeopv_entry_desc ffs_spe { &vop_setattr_desc, ufs_setattr }, /* setattr */ { &vop_read_desc, ufsspec_read }, /* read */ { &vop_write_desc, ufsspec_write }, /* write */ - { &vop_lease_desc, spec_lease_check }, /* lease */ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ { &vop_poll_desc, spec_poll }, /* poll */ @@ -155,12 +152,12 @@ const struct vnodeopv_entry_desc ffs_spe { &vop_abortop_desc, spec_abortop }, /* abortop */ { &vop_inactive_desc, ufs_inactive }, /* inactive */ { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */ - { &vop_lock_desc, ufs_lock }, /* lock */ - { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_lock_desc, ffs_lock }, /* lock */ + { &vop_unlock_desc, ffs_unlock }, /* unlock */ { &vop_bmap_desc, spec_bmap }, /* bmap */ { &vop_strategy_desc, spec_strategy }, /* strategy */ { &vop_print_desc, ufs_print }, /* print */ - { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_islocked_desc, ffs_islocked }, /* islocked */ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ { &vop_advlock_desc, spec_advlock }, /* advlock */ { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ @@ -190,7 +187,6 @@ const struct vnodeopv_entry_desc ffs_fif { &vop_setattr_desc, ufs_setattr }, /* setattr */ { &vop_read_desc, ufsfifo_read }, /* read */ { &vop_write_desc, ufsfifo_write }, /* write */ - { &vop_lease_desc, fifo_lease_check }, /* lease */ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ { &vop_poll_desc, fifo_poll }, /* poll */ @@ -210,12 +206,12 @@ const struct vnodeopv_entry_desc ffs_fif { &vop_abortop_desc, fifo_abortop }, /* abortop */ { &vop_inactive_desc, ufs_inactive }, /* inactive */ { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */ - { &vop_lock_desc, ufs_lock }, /* lock */ - { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_lock_desc, ffs_lock }, /* lock */ + { &vop_unlock_desc, ffs_unlock }, /* unlock */ { &vop_bmap_desc, fifo_bmap }, /* bmap */ { &vop_strategy_desc, fifo_strategy }, /* strategy */ { &vop_print_desc, ufs_print }, /* print */ - { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_islocked_desc, ffs_islocked }, /* islocked */ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ { &vop_advlock_desc, fifo_advlock }, /* advlock */ { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ @@ -245,22 +241,25 @@ ffs_fsync(void *v) struct lwp *a_l; } */ *ap = v; struct buf *bp; - int s, num, error, i; + int num, error, i; struct indir ia[NIADDR + 1]; int bsize; daddr_t blk_high; struct vnode *vp; + vp = ap->a_vp; + + fstrans_start(vp->v_mount, FSTRANS_LAZY); /* * XXX no easy way to sync a range in a file with softdep. */ - if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(ap->a_vp) || - (ap->a_vp->v_type != VREG)) - return ffs_full_fsync(v); - - vp = ap->a_vp; + if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(vp) || + (vp->v_type != VREG)) { + error = ffs_full_fsync(vp, ap->a_flags); + goto out; + } - bsize = ap->a_vp->v_mount->mnt_stat.f_iosize; + bsize = vp->v_mount->mnt_stat.f_iosize; blk_high = ap->a_offhi / bsize; if (ap->a_offhi % bsize != 0) blk_high++; @@ -269,52 +268,44 @@ ffs_fsync(void *v) * First, flush all pages in range. */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), PGO_CLEANIT | ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); if (error) { - return error; + goto out; } /* * Then, flush indirect blocks. */ - s = splbio(); if (blk_high >= NDADDR) { error = ufs_getlbns(vp, blk_high, ia, &num); - if (error) { - splx(s); - return error; - } + if (error) + goto out; + + mutex_enter(&bufcache_lock); for (i = 0; i < num; i++) { - bp = incore(vp, ia[i].in_lbn); - if (bp != NULL) { - simple_lock(&bp->b_interlock); - if (!(bp->b_flags & B_BUSY) && (bp->b_flags & B_DELWRI)) { - bp->b_flags |= B_BUSY | B_VFLUSH; - simple_unlock(&bp->b_interlock); - splx(s); - bawrite(bp); - s = splbio(); - } else { - simple_unlock(&bp->b_interlock); - } - } + if ((bp = incore(vp, ia[i].in_lbn)) == NULL) + continue; + if ((bp->b_cflags & BC_BUSY) != 0 || + (bp->b_oflags & BO_DELWRI) == 0) + continue; + bp->b_cflags |= BC_BUSY | BC_VFLUSH; + mutex_exit(&bufcache_lock); + bawrite(bp); + mutex_enter(&bufcache_lock); } + mutex_exit(&bufcache_lock); } if (ap->a_flags & FSYNC_WAIT) { - simple_lock(&global_v_numoutput_slock); - while (vp->v_numoutput > 0) { - vp->v_flag |= VBWAIT; - ltsleep(&vp->v_numoutput, PRIBIO + 1, "fsync_range", 0, - &global_v_numoutput_slock); - } - simple_unlock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); + while (vp->v_numoutput > 0) + cv_wait(&vp->v_cv, &vp->v_interlock); + mutex_exit(&vp->v_interlock); } - splx(s); error = ffs_update(vp, NULL, NULL, ((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT) @@ -323,9 +314,11 @@ ffs_fsync(void *v) if (error == 0 && ap->a_flags & FSYNC_CACHE) { int l = 0; VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &l, FWRITE, - ap->a_l->l_proc->p_cred, ap->a_l); + curlwp->l_cred); } +out: + fstrans_done(vp->v_mount); return error; } @@ -333,105 +326,99 @@ ffs_fsync(void *v) * Synch an open file. */ /* ARGSUSED */ -static int -ffs_full_fsync(void *v) +int +ffs_full_fsync(struct vnode *vp, int flags) { - struct vop_fsync_args /* { - struct vnode *a_vp; - kauth_cred_t a_cred; - int a_flags; - off_t a_offlo; - off_t a_offhi; - struct lwp *a_l; - } */ *ap = v; - struct vnode *vp = ap->a_vp; struct buf *bp, *nbp; - int s, error, passes, skipmeta, inodedeps_only, waitfor; + int error, passes, skipmeta, inodedeps_only, waitfor; + struct mount *mp; + + error = 0; if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) softdep_fsync_mountdev(vp); - inodedeps_only = DOINGSOFTDEP(vp) && (ap->a_flags & FSYNC_RECLAIM) - && vp->v_uobj.uo_npages == 0 && LIST_EMPTY(&vp->v_dirtyblkhd); + mutex_enter(&vp->v_interlock); + + inodedeps_only = DOINGSOFTDEP(vp) && (flags & FSYNC_RECLAIM) + && UVM_OBJ_IS_CLEAN(&vp->v_uobj) && LIST_EMPTY(&vp->v_dirtyblkhd); /* * Flush all dirty data associated with a vnode. */ if (vp->v_type == VREG || vp->v_type == VBLK) { - simple_lock(&vp->v_interlock); + if ((flags & FSYNC_VFS) != 0) + mp = vp->v_specmountpoint; + else + mp = vp->v_mount; error = VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES | PGO_CLEANIT | - ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); - if (error) { + ((flags & FSYNC_WAIT) ? PGO_SYNCIO : 0) | + (fstrans_getstate(mp) == FSTRANS_SUSPENDING ? + PGO_FREE : 0)); + if (error) return error; - } - } + } else + mutex_exit(&vp->v_interlock); passes = NIADDR + 1; skipmeta = 0; - if (ap->a_flags & FSYNC_WAIT) + if (flags & FSYNC_WAIT) skipmeta = 1; - s = splbio(); loop: - LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) - bp->b_flags &= ~B_SCANNED; + mutex_enter(&bufcache_lock); + LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { + bp->b_cflags &= ~BC_SCANNED; + } for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); - simple_lock(&bp->b_interlock); - if (bp->b_flags & (B_BUSY | B_SCANNED)) { - simple_unlock(&bp->b_interlock); + if (bp->b_cflags & (BC_BUSY | BC_SCANNED)) continue; - } - if ((bp->b_flags & B_DELWRI) == 0) + if ((bp->b_oflags & BO_DELWRI) == 0) panic("ffs_fsync: not dirty"); - if (skipmeta && bp->b_lblkno < 0) { - simple_unlock(&bp->b_interlock); + if (skipmeta && bp->b_lblkno < 0) continue; - } - simple_unlock(&bp->b_interlock); - bp->b_flags |= B_BUSY | B_VFLUSH | B_SCANNED; - splx(s); + bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED; + mutex_exit(&bufcache_lock); /* * On our final pass through, do all I/O synchronously * so that we can find out if our flush is failing * because of write errors. */ - if (passes > 0 || !(ap->a_flags & FSYNC_WAIT)) + if (passes > 0 || !(flags & FSYNC_WAIT)) (void) bawrite(bp); else if ((error = bwrite(bp)) != 0) return (error); - s = splbio(); /* - * Since we may have slept during the I/O, we need + * Since we unlocked during the I/O, we need * to start from a known point. */ + mutex_enter(&bufcache_lock); nbp = LIST_FIRST(&vp->v_dirtyblkhd); } + mutex_exit(&bufcache_lock); if (skipmeta) { skipmeta = 0; goto loop; } - if (ap->a_flags & FSYNC_WAIT) { - simple_lock(&global_v_numoutput_slock); + + if (flags & FSYNC_WAIT) { + mutex_enter(&vp->v_interlock); while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - (void) ltsleep(&vp->v_numoutput, PRIBIO + 1, - "ffsfsync", 0, &global_v_numoutput_slock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - simple_unlock(&global_v_numoutput_slock); - splx(s); + mutex_exit(&vp->v_interlock); /* * Ensure that any filesystem metadata associated * with the vnode has been written. */ - if ((error = softdep_sync_metadata(ap)) != 0) + if ((error = softdep_sync_metadata(vp)) != 0) return (error); - s = splbio(); if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { /* * Block devices associated with filesystems may @@ -451,18 +438,24 @@ loop: #endif } } - splx(s); if (inodedeps_only) waitfor = 0; else - waitfor = (ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0; - error = ffs_update(vp, NULL, NULL, waitfor); + waitfor = (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0; - if (error == 0 && ap->a_flags & FSYNC_CACHE) { + if (vp->v_tag == VT_UFS) + error = ffs_update(vp, NULL, NULL, waitfor); + else { + KASSERT(vp->v_type == VBLK); + KASSERT((flags & FSYNC_VFS) != 0); + } + + if (error == 0 && flags & FSYNC_CACHE) { int i = 0; - VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE, - ap->a_l->l_proc->p_cred, ap->a_l); + if ((flags & FSYNC_VFS) == 0) + vp = VTOI(vp)->i_devvp; + VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, curlwp->l_cred); } return error; @@ -480,26 +473,41 @@ ffs_reclaim(void *v) } */ *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); + struct mount *mp = vp->v_mount; struct ufsmount *ump = ip->i_ump; + void *data; int error; - if ((error = ufs_reclaim(vp, ap->a_l)) != 0) + fstrans_start(mp, FSTRANS_LAZY); + if ((error = ufs_reclaim(vp)) != 0) { + fstrans_done(mp); return (error); + } if (ip->i_din.ffs1_din != NULL) { if (ump->um_fstype == UFS1) - pool_put(&ffs_dinode1_pool, ip->i_din.ffs1_din); + pool_cache_put(ffs_dinode1_cache, ip->i_din.ffs1_din); else - pool_put(&ffs_dinode2_pool, ip->i_din.ffs2_din); + pool_cache_put(ffs_dinode2_cache, ip->i_din.ffs2_din); } /* + * To interlock with ffs_sync(). + */ + genfs_node_destroy(vp); + mutex_enter(&vp->v_interlock); + data = vp->v_data; + vp->v_data = NULL; + mutex_exit(&vp->v_interlock); + + /* * XXX MFS ends up here, too, to free an inode. Should we create * XXX a separate pool for MFS inodes? */ - pool_put(&ffs_inode_pool, vp->v_data); - vp->v_data = NULL; + pool_cache_put(ffs_inode_cache, data); + fstrans_done(mp); return (0); } +#if 0 int ffs_getpages(void *v) { @@ -528,12 +536,13 @@ ffs_getpages(void *v) blkoff(fs, *ap->a_count << PAGE_SHIFT) != 0) && DOINGSOFTDEP(ap->a_vp)) { if ((ap->a_flags & PGO_LOCKED) == 0) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } return EINVAL; } return genfs_getpages(v); } +#endif /* * Return the last logical file offset that should be written for this file @@ -607,12 +616,18 @@ ffs_getextattr(void *v) kauth_cred_t a_cred; struct proc *a_p; } */ *ap = v; - struct inode *ip = VTOI(ap->a_vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; if (fs->fs_magic == FS_UFS1_MAGIC) { #ifdef UFS_EXTATTR - return (ufs_getextattr(ap)); + int error; + + fstrans_start(vp->v_mount, FSTRANS_SHARED); + error = ufs_getextattr(ap); + fstrans_done(vp->v_mount); + return error; #else return (EOPNOTSUPP); #endif @@ -633,12 +648,18 @@ ffs_setextattr(void *v) kauth_cred_t a_cred; struct proc *a_p; } */ *ap = v; - struct inode *ip = VTOI(ap->a_vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; if (fs->fs_magic == FS_UFS1_MAGIC) { #ifdef UFS_EXTATTR - return (ufs_setextattr(ap)); + int error; + + fstrans_start(vp->v_mount, FSTRANS_SHARED); + error = ufs_setextattr(ap); + fstrans_done(vp->v_mount); + return error; #else return (EOPNOTSUPP); #endif @@ -679,12 +700,18 @@ ffs_deleteextattr(void *v) kauth_cred_t a_cred; struct proc *a_p; } */ *ap = v; - struct inode *ip = VTOI(ap->a_vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; if (fs->fs_magic == FS_UFS1_MAGIC) { #ifdef UFS_EXTATTR - return (ufs_deleteextattr(ap)); + int error; + + fstrans_start(vp->v_mount, FSTRANS_SHARED); + error = ufs_deleteextattr(ap); + fstrans_done(vp->v_mount); + return error; #else return (EOPNOTSUPP); #endif @@ -693,3 +720,88 @@ ffs_deleteextattr(void *v) /* XXX Not implemented for UFS2 file systems. */ return (EOPNOTSUPP); } + +/* + * Lock the node. + */ +int +ffs_lock(void *v) +{ + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct mount *mp = vp->v_mount; + struct vnlock *lkp; + int flags = ap->a_flags; + int result; + + if ((flags & LK_INTERLOCK) != 0) { + mutex_exit(&vp->v_interlock); + flags &= ~LK_INTERLOCK; + } + + /* + * Fake lock during file system suspension. + */ + if ((vp->v_type == VREG || vp->v_type == VDIR) && + fstrans_is_owner(mp) && + fstrans_getstate(mp) == FSTRANS_SUSPENDING) { + return 0; + } + + for (;;) { + lkp = vp->v_vnlock; + result = vlockmgr(lkp, flags); + if (lkp == vp->v_vnlock || result != 0) + return result; + /* + * Apparent success, except that the vnode mutated between + * snapshot file vnode and regular file vnode while this + * thread slept. The lock currently held is not the right + * lock. Release it, and try to get the new lock. + */ + (void) vlockmgr(lkp, LK_RELEASE); + } +} + +/* + * Unlock the node. + */ +int +ffs_unlock(void *v) +{ + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct mount *mp = vp->v_mount; + + KASSERT(ap->a_flags == 0); + + /* + * Fake unlock during file system suspension. + */ + if ((vp->v_type == VREG || vp->v_type == VDIR) && + fstrans_is_owner(mp) && + fstrans_getstate(mp) == FSTRANS_SUSPENDING) { + return 0; + } + return (vlockmgr(vp->v_vnlock, LK_RELEASE)); +} + +/* + * Return whether or not the node is locked. + */ +int +ffs_islocked(void *v) +{ + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + return (vlockstatus(vp->v_vnlock)); +}