Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/ufs/ufs/ufs_lookup.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/ufs/ufs/ufs_lookup.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.99.2.2 retrieving revision 1.124 diff -u -p -r1.99.2.2 -r1.124 --- src/sys/ufs/ufs/ufs_lookup.c 2009/03/03 18:34:40 1.99.2.2 +++ src/sys/ufs/ufs/ufs_lookup.c 2013/06/16 13:33:30 1.124 @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_lookup.c,v 1.99.2.2 2009/03/03 18:34:40 skrll Exp $ */ +/* $NetBSD: ufs_lookup.c,v 1.124 2013/06/16 13:33:30 hannken Exp $ */ /* * Copyright (c) 1989, 1993 @@ -37,11 +37,10 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.99.2.2 2009/03/03 18:34:40 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.124 2013/06/16 13:33:30 hannken Exp $"); #ifdef _KERNEL_OPT #include "opt_ffs.h" -#include "fs_ffs.h" #endif #include @@ -69,14 +68,14 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c #include #include +#include + #ifdef DIAGNOSTIC int dirchk = 1; #else int dirchk = 0; #endif -#define FSFMT(vp) (((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0) - /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. @@ -123,19 +122,27 @@ ufs_lookup(void *v) struct buf *bp; /* a buffer of directory entries */ struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ - enum {NONE, COMPACT, FOUND} slotstatus; - doff_t slotoffset; /* offset of area with free space */ + enum { + NONE, /* need to search a slot for our new entry */ + COMPACT, /* a compaction can make a slot in the current + DIRBLKSIZ block */ + FOUND, /* found a slot (or no need to search) */ + } slotstatus; + doff_t slotoffset; /* offset of area with free space. + a special value -1 for invalid */ int slotsize; /* size of area at slotoffset */ - int slotfreespace; /* amount of space free in slot */ + int slotfreespace; /* accumulated amount of space free in + the current DIRBLKSIZ block */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ - doff_t prevoff; /* prev entry dp->i_offset */ + doff_t prevoff; /* previous value of ulr_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ - doff_t enduseful; /* pointer past last used dir slot */ + doff_t enduseful; /* pointer past last used dir slot. + used for directory truncation. */ u_long bmask; /* block offset mask */ - int namlen, error; + int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; kauth_cred_t cred = cnp->cn_cred; @@ -145,6 +152,8 @@ ufs_lookup(void *v) const int needswap = UFS_MPNEEDSWAP(ump); int dirblksiz = ump->um_dirblksiz; ino_t foundino; + struct ufs_lookup_results *results; + int iswhiteout; /* temp result from cache_lookup() */ flags = cnp->cn_flags; @@ -152,6 +161,15 @@ ufs_lookup(void *v) slotoffset = -1; *vpp = NULL; endsearch = 0; /* silence compiler warning */ + + /* + * Produce the auxiliary lookup results into i_crap. Increment + * its serial number so elsewhere we can tell if we're using + * stale results. This should not be done this way. XXX. + */ + results = &dp->i_crap; + dp->i_crapcounter++; + /* * Check accessiblity of directory. */ @@ -169,8 +187,25 @@ ufs_lookup(void *v) * check the name cache to see if the directory/name pair * we are looking for is known already. */ - if ((error = cache_lookup(vdp, vpp, cnp)) >= 0) { - return (error); + if (cache_lookup(vdp, cnp->cn_nameptr, cnp->cn_namelen, + cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { + if (iswhiteout) { + cnp->cn_flags |= ISWHITEOUT; + } + return *vpp == NULLVP ? ENOENT : 0; + } + if (iswhiteout) { + /* + * The namecache set iswhiteout without finding a + * cache entry. As of this writing (20121014), this + * can happen if there was a whiteout entry that has + * been invalidated by the lookup. It is not clear if + * it is correct to set ISWHITEOUT in this case or + * not; however, doing so retains the prior behavior, + * so we'll go with that until some clearer answer + * appears. XXX + */ + cnp->cn_flags |= ISWHITEOUT; } fstrans_start(vdp->v_mount, FSTRANS_SHARED); @@ -183,10 +218,9 @@ ufs_lookup(void *v) */ slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; - if ((nameiop == CREATE || nameiop == RENAME) && - (flags & ISLASTCN)) { + if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slotstatus = NONE; - slotneeded = DIRECTSIZ(cnp->cn_namelen); + slotneeded = UFS_DIRECTSIZ(cnp->cn_namelen); } /* @@ -226,13 +260,13 @@ ufs_lookup(void *v) numdirpasses = 1; entryoffsetinblock = 0; /* silence compiler warning */ switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, - &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { + &results->ulr_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { case 0: ep = (struct direct *)((char *)bp->b_data + - (dp->i_offset & bmask)); + (results->ulr_offset & bmask)); goto foundentry; case ENOENT: - dp->i_offset = roundup(dp->i_size, dirblksiz); + results->ulr_offset = roundup(dp->i_size, dirblksiz); goto notfound; default: /* Something failed; just do a linear search. */ @@ -241,36 +275,36 @@ ufs_lookup(void *v) } #endif /* UFS_DIRHASH */ - if (nameiop != LOOKUP || dp->i_diroff == 0 || - dp->i_diroff >= dp->i_size) { + if (nameiop != LOOKUP || results->ulr_diroff == 0 || + results->ulr_diroff >= dp->i_size) { entryoffsetinblock = 0; - dp->i_offset = 0; + results->ulr_offset = 0; numdirpasses = 1; } else { - dp->i_offset = dp->i_diroff; - if ((entryoffsetinblock = dp->i_offset & bmask) && - (error = ufs_blkatoff(vdp, (off_t)dp->i_offset, + results->ulr_offset = results->ulr_diroff; + if ((entryoffsetinblock = results->ulr_offset & bmask) && + (error = ufs_blkatoff(vdp, (off_t)results->ulr_offset, NULL, &bp, false))) goto out; numdirpasses = 2; nchstats.ncs_2passes++; } - prevoff = dp->i_offset; + prevoff = results->ulr_offset; endsearch = roundup(dp->i_size, dirblksiz); enduseful = 0; searchloop: - while (dp->i_offset < endsearch) { + while (results->ulr_offset < endsearch) { if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) preempt(); /* * If necessary, get the next directory block. */ - if ((dp->i_offset & bmask) == 0) { + if ((results->ulr_offset & bmask) == 0) { if (bp != NULL) brelse(bp, 0); - error = ufs_blkatoff(vdp, (off_t)dp->i_offset, NULL, - &bp, false); + error = ufs_blkatoff(vdp, (off_t)results->ulr_offset, + NULL, &bp, false); if (error) goto out; entryoffsetinblock = 0; @@ -297,9 +331,9 @@ searchloop: (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; - ufs_dirbad(dp, dp->i_offset, "mangled entry"); + ufs_dirbad(dp, results->ulr_offset, "mangled entry"); i = dirblksiz - (entryoffsetinblock & (dirblksiz - 1)); - dp->i_offset += i; + results->ulr_offset += i; entryoffsetinblock += i; continue; } @@ -314,20 +348,20 @@ searchloop: int size = ufs_rw16(ep->d_reclen, needswap); if (ep->d_ino != 0) - size -= DIRSIZ(FSFMT(vdp), ep, needswap); + size -= UFS_DIRSIZ(FSFMT(vdp), ep, needswap); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; - slotoffset = dp->i_offset; + slotoffset = results->ulr_offset; slotsize = ufs_rw16(ep->d_reclen, needswap); } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) - slotoffset = dp->i_offset; + slotoffset = results->ulr_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; - slotsize = dp->i_offset + + slotsize = results->ulr_offset + ufs_rw16(ep->d_reclen, needswap) - slotoffset; @@ -340,6 +374,8 @@ searchloop: * Check for a name match. */ if (ep->d_ino) { + int namlen; + #if (BYTE_ORDER == LITTLE_ENDIAN) if (FSFMT(vdp) && needswap == 0) namlen = ep->d_type; @@ -359,18 +395,18 @@ foundentry: #endif /* * Save directory entry's inode number and - * reclen in ndp->ni_ufs area, and release - * directory buffer. + * reclen, and release directory buffer. */ if (!FSFMT(vdp) && ep->d_type == DT_WHT) { slotstatus = FOUND; - slotoffset = dp->i_offset; + slotoffset = results->ulr_offset; slotsize = ufs_rw16(ep->d_reclen, needswap); - dp->i_reclen = slotsize; + results->ulr_reclen = slotsize; /* - * This is used to set dp->i_endoff, - * which may be used by ufs_direnter2() + * This is used to set + * results->ulr_endoff, + * which may be used by ufs_direnter() * as a length to truncate the * directory to. Therefore, it must * point past the end of the last @@ -385,20 +421,21 @@ foundentry: * an existing entry anyway. */ enduseful = endsearch; - ap->a_cnp->cn_flags |= ISWHITEOUT; + cnp->cn_flags |= ISWHITEOUT; numdirpasses--; goto notfound; } foundino = ufs_rw32(ep->d_ino, needswap); - dp->i_reclen = ufs_rw16(ep->d_reclen, needswap); + results->ulr_reclen = + ufs_rw16(ep->d_reclen, needswap); goto found; } } - prevoff = dp->i_offset; - dp->i_offset += ufs_rw16(ep->d_reclen, needswap); + prevoff = results->ulr_offset; + results->ulr_offset += ufs_rw16(ep->d_reclen, needswap); entryoffsetinblock += ufs_rw16(ep->d_reclen, needswap); if (ep->d_ino) - enduseful = dp->i_offset; + enduseful = results->ulr_offset; } notfound: /* @@ -407,8 +444,8 @@ notfound: */ if (numdirpasses == 2) { numdirpasses--; - dp->i_offset = 0; - endsearch = dp->i_diroff; + results->ulr_offset = 0; + endsearch = results->ulr_diroff; goto searchloop; } if (bp != NULL) @@ -420,8 +457,8 @@ notfound: */ if ((nameiop == CREATE || nameiop == RENAME || (nameiop == DELETE && - (ap->a_cnp->cn_flags & DOWHITEOUT) && - (ap->a_cnp->cn_flags & ISWHITEOUT))) && + (cnp->cn_flags & DOWHITEOUT) && + (cnp->cn_flags & ISWHITEOUT))) && (flags & ISLASTCN) && dp->i_nlink != 0) { /* * Access for write is interpreted as allowing @@ -433,29 +470,30 @@ notfound: /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, - * then set dp->i_count to 0 indicating + * then set results->ulr_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry - * can be put in the range from dp->i_offset to - * dp->i_offset + dp->i_count. + * can be put in the range from results->ulr_offset to + * results->ulr_offset + results->ulr_count. */ if (slotstatus == NONE) { - dp->i_offset = roundup(dp->i_size, dirblksiz); - dp->i_count = 0; - enduseful = dp->i_offset; + results->ulr_offset = roundup(dp->i_size, dirblksiz); + results->ulr_count = 0; + enduseful = results->ulr_offset; } else if (nameiop == DELETE) { - dp->i_offset = slotoffset; - if ((dp->i_offset & (dirblksiz - 1)) == 0) - dp->i_count = 0; + results->ulr_offset = slotoffset; + if ((results->ulr_offset & (dirblksiz - 1)) == 0) + results->ulr_count = 0; else - dp->i_count = dp->i_offset - prevoff; + results->ulr_count = + results->ulr_offset - prevoff; } else { - dp->i_offset = slotoffset; - dp->i_count = slotsize; + results->ulr_offset = slotoffset; + results->ulr_count = slotsize; if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } - dp->i_endoff = roundup(enduseful, dirblksiz); + results->ulr_endoff = roundup(enduseful, dirblksiz); #if 0 /* commented out by dbj. none of the on disk fields changed */ dp->i_flag |= IN_CHANGE | IN_UPDATE; #endif @@ -466,21 +504,20 @@ notfound: * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. - * The pathname buffer is saved so that the name - * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ - cnp->cn_flags |= SAVENAME; error = EJUSTRETURN; goto out; } /* * Insert name into cache (as non-existent) if appropriate. */ - if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) - cache_enter(vdp, *vpp, cnp); + if (nameiop != CREATE) { + cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, + cnp->cn_flags); + } error = ENOENT; goto out; @@ -491,9 +528,10 @@ found: * Check that directory length properly reflects presence * of this entry. */ - if (dp->i_offset + DIRSIZ(FSFMT(vdp), ep, needswap) > dp->i_size) { - ufs_dirbad(dp, dp->i_offset, "i_size too small"); - dp->i_size = dp->i_offset + DIRSIZ(FSFMT(vdp), ep, needswap); + if (results->ulr_offset + UFS_DIRSIZ(FSFMT(vdp), ep, needswap) > dp->i_size) { + ufs_dirbad(dp, results->ulr_offset, "i_size too small"); + dp->i_size = + results->ulr_offset + UFS_DIRSIZ(FSFMT(vdp), ep, needswap); DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); @@ -506,7 +544,7 @@ found: * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) - dp->i_diroff = dp->i_offset &~ (dirblksiz - 1); + results->ulr_diroff = results->ulr_offset &~ (dirblksiz - 1); /* * If deleting, and at end of pathname, return @@ -515,48 +553,56 @@ found: */ if (nameiop == DELETE && (flags & ISLASTCN)) { /* - * Write access to directory required to delete files. - */ - error = VOP_ACCESS(vdp, VWRITE, cred); - if (error) - goto out; - /* - * Return pointer to current entry in dp->i_offset, + * Return pointer to current entry in results->ulr_offset, * and distance past previous entry (if there - * is a previous entry in this block) in dp->i_count. + * is a previous entry in this block) in results->ulr_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ - if ((dp->i_offset & (dirblksiz - 1)) == 0) - dp->i_count = 0; + if ((results->ulr_offset & (dirblksiz - 1)) == 0) + results->ulr_count = 0; else - dp->i_count = dp->i_offset - prevoff; + results->ulr_count = results->ulr_offset - prevoff; if (dp->i_number == foundino) { - VREF(vdp); - *vpp = vdp; - error = 0; - goto out; + vref(vdp); + tdp = vdp; + } else { + if (flags & ISDOTDOT) + VOP_UNLOCK(vdp); /* race to get the inode */ + error = VFS_VGET(vdp->v_mount, foundino, &tdp); + if (flags & ISDOTDOT) + vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); + if (error) + goto out; } - if (flags & ISDOTDOT) - VOP_UNLOCK(vdp, 0); /* race to get the inode */ - error = VFS_VGET(vdp->v_mount, foundino, &tdp); - if (flags & ISDOTDOT) - vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); - if (error) + /* + * Write access to directory required to delete files. + */ + error = VOP_ACCESS(vdp, VWRITE, cred); + if (error) { + if (dp->i_number == foundino) + vrele(tdp); + else + vput(tdp); goto out; + } /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ - if ((dp->i_mode & ISVTX) && - kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, - NULL) != 0 && - kauth_cred_geteuid(cred) != dp->i_uid && - VTOI(tdp)->i_uid != kauth_cred_geteuid(cred)) { - vput(tdp); - error = EPERM; - goto out; + if (dp->i_mode & ISVTX) { + error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, + tdp, vdp, genfs_can_sticky(cred, dp->i_uid, + VTOI(tdp)->i_uid)); + if (error) { + if (dp->i_number == foundino) + vrele(tdp); + else + vput(tdp); + error = EPERM; + goto out; + } } *vpp = tdp; error = 0; @@ -582,14 +628,13 @@ found: goto out; } if (flags & ISDOTDOT) - VOP_UNLOCK(vdp, 0); /* race to get the inode */ + VOP_UNLOCK(vdp); /* race to get the inode */ error = VFS_VGET(vdp->v_mount, foundino, &tdp); if (flags & ISDOTDOT) vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); if (error) goto out; *vpp = tdp; - cnp->cn_flags |= SAVENAME; error = 0; goto out; } @@ -615,7 +660,7 @@ found: */ pdp = vdp; if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp, 0); /* race to get the inode */ + VOP_UNLOCK(pdp); /* race to get the inode */ error = VFS_VGET(vdp->v_mount, foundino, &tdp); vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); if (error) { @@ -623,7 +668,7 @@ found: } *vpp = tdp; } else if (dp->i_number == foundino) { - VREF(vdp); /* we want ourself, ie "." */ + vref(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { error = VFS_VGET(vdp->v_mount, foundino, &tdp); @@ -635,8 +680,7 @@ found: /* * Insert name into cache if appropriate. */ - if (cnp->cn_flags & MAKEENTRY) - cache_enter(vdp, *vpp, cnp); + cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); error = 0; out: @@ -689,13 +733,13 @@ ufs_dirbadentry(struct vnode *dp, struct ufs_rw16(ep->d_reclen, needswap) > dirblksiz - (entryoffsetinblock & (dirblksiz - 1)) || ufs_rw16(ep->d_reclen, needswap) < - DIRSIZ(FSFMT(dp), ep, needswap) || + UFS_DIRSIZ(FSFMT(dp), ep, needswap) || namlen > FFS_MAXNAMLEN) { /*return (1); */ printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, " "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n", ufs_rw16(ep->d_reclen, needswap), - (u_long)DIRSIZ(FSFMT(dp), ep, needswap), + (u_long)UFS_DIRSIZ(FSFMT(dp), ep, needswap), namlen, dp->v_mount->mnt_flag, entryoffsetinblock, dirblksiz); goto bad; @@ -717,17 +761,13 @@ bad: /* * Construct a new directory entry after a call to namei, using the - * parameters that it left in the componentname argument cnp. The - * argument ip is the inode to which the new directory entry will refer. + * name in the componentname argument cnp. The argument ip is the + * inode to which the new directory entry will refer. */ void ufs_makedirentry(struct inode *ip, struct componentname *cnp, struct direct *newdirp) { -#ifdef DIAGNOSTIC - if ((cnp->cn_flags & SAVENAME) == 0) - panic("makedirentry: missing name"); -#endif newdirp->d_ino = ip->i_number; newdirp->d_namlen = cnp->cn_namelen; memcpy(newdirp->d_name, cnp->cn_nameptr, (size_t)cnp->cn_namelen); @@ -740,15 +780,36 @@ ufs_makedirentry(struct inode *ip, struc /* * Write a directory entry after a call to namei, using the parameters - * that it left in nameidata. The argument dirp is the new directory - * entry contents. Dvp is a pointer to the directory to be written, - * which was left locked by namei. Remaining parameters (dp->i_offset, - * dp->i_count) indicate how the space for the new entry is to be obtained. - * Non-null bp indicates that a directory is being created (for the - * soft dependency code). + * that ufs_lookup left in nameidata and in the ufs_lookup_results. + * + * DVP is the directory to be updated. It must be locked. + * ULR is the ufs_lookup_results structure from the final lookup step. + * TVP is not used. (XXX: why is it here? remove it) + * DIRP is the new directory entry contents. + * CNP is the componentname from the final lookup step. + * NEWDIRBP is not used and (XXX) should be removed. The previous + * comment here said it was used by the now-removed softupdates code. + * + * The link count of the target inode is *not* incremented; the + * caller does that. + * + * If ulr->ulr_count is 0, ufs_lookup did not find space to insert the + * directory entry. ulr_offset, which is the place to put the entry, + * should be on a block boundary (and should be at the end of the + * directory AFAIK) and a fresh block is allocated to put the new + * directory entry in. + * + * If ulr->ulr_count is not zero, ufs_lookup found a slot to insert + * the entry into. This slot ranges from ulr_offset to ulr_offset + + * ulr_count. However, this slot may already be partially populated + * requiring compaction. See notes below. + * + * Furthermore, if ulr_count is not zero and ulr_endoff is not the + * same as i_size, the directory is truncated to size ulr_endoff. */ int -ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, +ufs_direnter(struct vnode *dvp, const struct ufs_lookup_results *ulr, + struct vnode *tvp, struct direct *dirp, struct componentname *cnp, struct buf *newdirbp) { kauth_cred_t cr; @@ -772,22 +833,22 @@ ufs_direnter(struct vnode *dvp, struct v l = curlwp; dp = VTOI(dvp); - newentrysize = DIRSIZ(0, dirp, 0); + newentrysize = UFS_DIRSIZ(0, dirp, 0); - if (dp->i_count == 0) { + if (ulr->ulr_count == 0) { /* - * If dp->i_count is 0, then namei could find no - * space in the directory. Here, dp->i_offset will + * If ulr_count is 0, then namei could find no + * space in the directory. Here, ulr_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ - if (dp->i_offset & (dirblksiz - 1)) + if (ulr->ulr_offset & (dirblksiz - 1)) panic("ufs_direnter: newblk"); - if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, dirblksiz, + if ((error = UFS_BALLOC(dvp, (off_t)ulr->ulr_offset, dirblksiz, cr, B_CLRBUF | B_SYNC, &bp)) != 0) { return (error); } - dp->i_size = dp->i_offset + dirblksiz; + dp->i_size = ulr->ulr_offset + dirblksiz; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(dvp, dp->i_size); @@ -804,17 +865,17 @@ ufs_direnter(struct vnode *dvp, struct v dirp->d_type = tmp; } } - blkoff = dp->i_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); + blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); memcpy((char *)bp->b_data + blkoff, dirp, newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { - ufsdirhash_newblk(dp, dp->i_offset); - ufsdirhash_add(dp, dirp, dp->i_offset); + ufsdirhash_newblk(dp, ulr->ulr_offset); + ufsdirhash_add(dp, dirp, ulr->ulr_offset); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, - dp->i_offset); + ulr->ulr_offset); } #endif - error = VOP_BWRITE(bp); + error = VOP_BWRITE(bp->b_vp, bp); vfs_timestamp(&ts); ret = UFS_UPDATE(dvp, &ts, &ts, UPDATE_DIROP); if (error == 0) @@ -823,8 +884,8 @@ ufs_direnter(struct vnode *dvp, struct v } /* - * If dp->i_count is non-zero, then namei found space for the new - * entry in the range dp->i_offset to dp->i_offset + dp->i_count + * If ulr_count is non-zero, then namei found space for the new + * entry in the range ulr_offset to ulr_offset + ulr_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable @@ -838,8 +899,12 @@ ufs_direnter(struct vnode *dvp, struct v * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ - if (dp->i_offset + dp->i_count > dp->i_size) { - dp->i_size = dp->i_offset + dp->i_count; + if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { +#ifdef DIAGNOSTIC + printf("ufs_direnter: reached 4.2-only block, " + "not supposed to happen\n"); +#endif + dp->i_size = ulr->ulr_offset + ulr->ulr_count; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); @@ -847,20 +912,20 @@ ufs_direnter(struct vnode *dvp, struct v /* * Get the block containing the space for the new directory entry. */ - error = ufs_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, &bp, true); + error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); if (error) { return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei - * arranged that compacting the region dp->i_offset to - * dp->i_offset + dp->i_count would yield the space. + * arranged that compacting the region ulr_offset to + * ulr_offset + ulr_count would yield the space. */ ep = (struct direct *)dirbuf; - dsize = (ep->d_ino != 0) ? DIRSIZ(FSFMT(dvp), ep, needswap) : 0; + dsize = (ep->d_ino != 0) ? UFS_DIRSIZ(FSFMT(dvp), ep, needswap) : 0; spacefree = ufs_rw16(ep->d_reclen, needswap) - dsize; - for (loc = ufs_rw16(ep->d_reclen, needswap); loc < dp->i_count; ) { + for (loc = ufs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) { uint16_t reclen; nep = (struct direct *)(dirbuf + loc); @@ -885,13 +950,13 @@ ufs_direnter(struct vnode *dvp, struct v dsize = 0; continue; } - dsize = DIRSIZ(FSFMT(dvp), nep, needswap); + dsize = UFS_DIRSIZ(FSFMT(dvp), nep, needswap); spacefree += reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, - dp->i_offset + ((char *)nep - dirbuf), - dp->i_offset + ((char *)ep - dirbuf)); + ulr->ulr_offset + ((char *)nep - dirbuf), + ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy((void *)ep, (void *)nep, dsize); } @@ -905,7 +970,7 @@ ufs_direnter(struct vnode *dvp, struct v * copy in the new entry, and write out the block. */ if (ep->d_ino == 0 || - (ufs_rw32(ep->d_ino, needswap) == WINO && + (ufs_rw32(ep->d_ino, needswap) == UFS_WINO && memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) panic("ufs_direnter: compact1"); @@ -933,16 +998,16 @@ ufs_direnter(struct vnode *dvp, struct v #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) - ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); + ufsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy((void *)ep, (void *)dirp, (u_int)newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, dirbuf - - (dp->i_offset & (dirblksiz - 1)), - dp->i_offset & ~(dirblksiz - 1)); + (ulr->ulr_offset & (dirblksiz - 1)), + ulr->ulr_offset & ~(dirblksiz - 1)); #endif - error = VOP_BWRITE(bp); + error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed @@ -951,31 +1016,55 @@ ufs_direnter(struct vnode *dvp, struct v * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ - if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { + if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) - ufsdirhash_dirtrunc(dp, dp->i_endoff); + ufsdirhash_dirtrunc(dp, ulr->ulr_endoff); #endif - (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr); + (void) UFS_TRUNCATE(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); } UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); return (error); } /* - * Remove a directory entry after a call to namei, using - * the parameters which it left in nameidata. The entry - * dp->i_offset contains the offset into the directory of the - * entry to be eliminated. The dp->i_count field contains the - * size of the previous record in the directory. If this - * is 0, the first entry is being deleted, so we need only - * zero the inode number to mark the entry as free. If the - * entry is not the first in the directory, we must reclaim - * the space of the now empty record by adding the record size - * to the size of the previous entry. + * Remove a directory entry after a call to namei, using the + * parameters that ufs_lookup left in nameidata and in the + * ufs_lookup_results. + * + * DVP is the directory to be updated. It must be locked. + * ULR is the ufs_lookup_results structure from the final lookup step. + * IP, if not null, is the inode being unlinked. + * FLAGS may contain DOWHITEOUT. + * ISRMDIR is not used and (XXX) should be removed. + * + * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout + * instead of being cleared. + * + * ulr->ulr_offset contains the position of the directory entry + * to be removed. + * + * ulr->ulr_reclen contains the size of the directory entry to be + * removed. + * + * ulr->ulr_count contains the size of the *previous* directory + * entry. This allows finding it, for free space management. If + * ulr_count is 0, the target entry is at the beginning of the + * directory. (Does this ever happen? The first entry should be ".", + * which should only be removed at rmdir time. Does rmdir come here + * to clear out the "." and ".." entries? Perhaps, but I doubt it.) + * + * The space is marked free by adding it to the record length (not + * name length) of the preceding entry. If the first entry becomes + * free, it is marked free by setting the inode number to 0. + * + * The link count of IP is decremented. Note that this is not the + * inverse behavior of ufs_direnter, which does not adjust link + * counts. Sigh. */ int -ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) +ufs_dirremove(struct vnode *dvp, const struct ufs_lookup_results *ulr, + struct inode *ip, int flags, int isrmdir) { struct inode *dp = VTOI(dvp); struct direct *ep; @@ -989,33 +1078,33 @@ ufs_dirremove(struct vnode *dvp, struct if (flags & DOWHITEOUT) { /* - * Whiteout entry: set d_ino to WINO. + * Whiteout entry: set d_ino to UFS_WINO. */ - error = ufs_blkatoff(dvp, (off_t)dp->i_offset, (void *)&ep, + error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep, &bp, true); if (error) return (error); - ep->d_ino = ufs_rw32(WINO, needswap); + ep->d_ino = ufs_rw32(UFS_WINO, needswap); ep->d_type = DT_WHT; goto out; } if ((error = ufs_blkatoff(dvp, - (off_t)(dp->i_offset - dp->i_count), (void *)&ep, &bp, true)) != 0) + (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0) return (error); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact - * that `ep' is the previous entry when dp->i_count != 0. + * that `ep' is the previous entry when ulr_count != 0. */ if (dp->i_dirhash != NULL) - ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : + ufsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : (struct direct *)((char *)ep + - ufs_rw16(ep->d_reclen, needswap)), dp->i_offset); + ufs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset); #endif - if (dp->i_count == 0) { + if (ulr->ulr_count == 0) { /* * First entry in block: set d_ino to zero. */ @@ -1025,7 +1114,7 @@ ufs_dirremove(struct vnode *dvp, struct * Collapse new free space into previous entry. */ ep->d_reclen = - ufs_rw16(ufs_rw16(ep->d_reclen, needswap) + dp->i_reclen, + ufs_rw16(ufs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen, needswap); } @@ -1033,8 +1122,8 @@ ufs_dirremove(struct vnode *dvp, struct if (dp->i_dirhash != NULL) { int dirblksiz = ip->i_ump->um_dirblksiz; ufsdirhash_checkblock(dp, (char *)ep - - ((dp->i_offset - dp->i_count) & (dirblksiz - 1)), - dp->i_offset & ~(dirblksiz - 1)); + ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), + ulr->ulr_offset & ~(dirblksiz - 1)); } #endif @@ -1045,9 +1134,15 @@ out: ip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(ITOV(ip), NULL, NULL, 0); } - error = VOP_BWRITE(bp); + /* + * XXX did it ever occur to anyone that it might be a good + * idea to restore ip->i_nlink if this fails? Or something? + * Currently on error return from this function the state of + * ip->i_nlink depends on what happened, and callers + * definitely do not take this into account. + */ + error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; -#ifdef FFS /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed @@ -1055,19 +1150,30 @@ out: */ if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_nlink == 0) - ffs_snapgone(ip); + UFS_SNAPGONE(ITOV(ip)); UFS_WAPBL_UPDATE(dvp, NULL, NULL, 0); -#endif return (error); } /* - * Rewrite an existing directory entry to point at the inode - * supplied. The parameters describing the directory entry are - * set up by a call to namei. + * Rewrite an existing directory entry to point at the inode supplied. + * + * DP is the directory to update. + * OFFSET is the position of the entry in question. It may come + * from ulr_offset of a ufs_lookup_results. + * OIP is the old inode the directory previously pointed to. + * NEWINUM is the number of the new inode. + * NEWTYPE is the new value for the type field of the directory entry. + * (This is ignored if the fs doesn't support that.) + * ISRMDIR is not used and (XXX) should be removed. + * IFLAGS are added to DP's inode flags. + * + * The link count of OIP is decremented. Note that the link count of + * the new inode is *not* incremented. Yay for symmetry. */ int -ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype, +ufs_dirrewrite(struct inode *dp, off_t offset, + struct inode *oip, ino_t newinum, int newtype, int isrmdir, int iflags) { struct buf *bp; @@ -1075,7 +1181,7 @@ ufs_dirrewrite(struct inode *dp, struct struct vnode *vdp = ITOV(dp); int error; - error = ufs_blkatoff(vdp, (off_t)dp->i_offset, (void *)&ep, &bp, true); + error = ufs_blkatoff(vdp, offset, (void *)&ep, &bp, true); if (error) return (error); ep->d_ino = ufs_rw32(newinum, UFS_MPNEEDSWAP(dp->i_ump)); @@ -1085,18 +1191,16 @@ ufs_dirrewrite(struct inode *dp, struct DIP_ASSIGN(oip, nlink, oip->i_nlink); oip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(ITOV(oip), NULL, NULL, UPDATE_DIROP); - error = VOP_BWRITE(bp); + error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= iflags; -#ifdef FFS /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) - ffs_snapgone(oip); + UFS_SNAPGONE(ITOV(oip)); UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); -#endif return (error); } @@ -1134,7 +1238,7 @@ ufs_dirempty(struct inode *ip, ino_t par if (dp->d_reclen == 0) return (0); /* skip empty entries */ - if (dp->d_ino == 0 || ufs_rw32(dp->d_ino, needswap) == WINO) + if (dp->d_ino == 0 || ufs_rw32(dp->d_ino, needswap) == UFS_WINO) continue; /* accept only "." and ".." */ #if (BYTE_ORDER == LITTLE_ENDIAN) @@ -1176,7 +1280,7 @@ ufs_dirempty(struct inode *ip, ino_t par int ufs_checkpath(struct inode *source, struct inode *target, kauth_cred_t cred) { - struct vnode *vp = ITOV(target); + struct vnode *nextvp, *vp; int error, rootino, namlen; struct dirtemplate dirbuf; const int needswap = UFS_MPNEEDSWAP(target->i_ump); @@ -1186,7 +1290,7 @@ ufs_checkpath(struct inode *source, stru error = EEXIST; goto out; } - rootino = ROOTINO; + rootino = UFS_ROOTINO; error = 0; if (target->i_number == rootino) goto out; @@ -1224,13 +1328,15 @@ ufs_checkpath(struct inode *source, stru } if (ufs_rw32(dirbuf.dotdot_ino, needswap) == rootino) break; - vput(vp); + VOP_UNLOCK(vp); error = VFS_VGET(vp->v_mount, - ufs_rw32(dirbuf.dotdot_ino, needswap), &vp); + ufs_rw32(dirbuf.dotdot_ino, needswap), &nextvp); + vrele(vp); if (error) { vp = NULL; break; } + vp = nextvp; } out: @@ -1241,12 +1347,135 @@ out: return (error); } +/* + * Extract the inode number of ".." from a directory. + * Helper for ufs_parentcheck. + */ +static int +ufs_readdotdot(struct vnode *vp, int needswap, kauth_cred_t cred, ino_t *result) +{ + struct dirtemplate dirbuf; + int namlen, error; + + error = vn_rdwr(UIO_READ, vp, &dirbuf, + sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED, cred, NULL, NULL); + if (error) { + return error; + } + +#if (BYTE_ORDER == LITTLE_ENDIAN) + if (FSFMT(vp) && needswap == 0) + namlen = dirbuf.dotdot_type; + else + namlen = dirbuf.dotdot_namlen; +#else + if (FSFMT(vp) && needswap != 0) + namlen = dirbuf.dotdot_type; + else + namlen = dirbuf.dotdot_namlen; +#endif + if (namlen != 2 || + dirbuf.dotdot_name[0] != '.' || + dirbuf.dotdot_name[1] != '.') { + printf("ufs_readdotdot: directory %llu contains " + "garbage instead of ..\n", + (unsigned long long) VTOI(vp)->i_number); + return ENOTDIR; + } + *result = ufs_rw32(dirbuf.dotdot_ino, needswap); + return 0; +} + +/* + * Check if LOWER is a descendent of UPPER. If we find UPPER, return + * nonzero in FOUND and return a reference to the immediate descendent + * of UPPER in UPPERCHILD. If we don't find UPPER (that is, if we + * reach the volume root and that isn't UPPER), return zero in FOUND + * and null in UPPERCHILD. + * + * Neither UPPER nor LOWER should be locked. + * + * On error (such as a permissions error checking up the directory + * tree) fail entirely. + * + * Note that UPPER and LOWER must be on the same volume, and because + * we inspect only that volume NEEDSWAP can be constant. + */ +int +ufs_parentcheck(struct vnode *upper, struct vnode *lower, kauth_cred_t cred, + int *found_ret, struct vnode **upperchild_ret) +{ + const int needswap = UFS_MPNEEDSWAP(VTOI(lower)->i_ump); + ino_t upper_ino, found_ino; + struct vnode *current, *next; + int error; + + if (upper == lower) { + vref(upper); + *found_ret = 1; + *upperchild_ret = upper; + return 0; + } + if (VTOI(lower)->i_number == UFS_ROOTINO) { + *found_ret = 0; + *upperchild_ret = NULL; + return 0; + } + + upper_ino = VTOI(upper)->i_number; + + current = lower; + vref(current); + vn_lock(current, LK_EXCLUSIVE | LK_RETRY); + + for (;;) { + error = ufs_readdotdot(current, needswap, cred, &found_ino); + if (error) { + vput(current); + return error; + } + if (found_ino == upper_ino) { + VOP_UNLOCK(current); + *found_ret = 1; + *upperchild_ret = current; + return 0; + } + if (found_ino == UFS_ROOTINO) { + vput(current); + *found_ret = 0; + *upperchild_ret = NULL; + return 0; + } + VOP_UNLOCK(current); + error = VFS_VGET(current->v_mount, found_ino, &next); + if (error) { + vrele(current); + return error; + } + KASSERT(VOP_ISLOCKED(next)); + if (next->v_type != VDIR) { + printf("ufs_parentcheck: inode %llu reached via .. of " + "inode %llu is not a directory\n", + (unsigned long long)VTOI(next)->i_number, + (unsigned long long)VTOI(current)->i_number); + vput(next); + vrele(current); + return ENOTDIR; + } + vrele(current); + current = next; + } + + return 0; +} + #define UFS_DIRRABLKS 0 int ufs_dirrablks = UFS_DIRRABLKS; /* * ufs_blkatoff: Return buffer with the contents of block "offset" from - * the beginning of directory "vp". If "res" is non-zero, fill it in with + * the beginning of directory "vp". If "res" is non-NULL, fill it in with * a pointer to the remaining space in the directory. If the caller intends * to modify the buffer returned, "modify" must be true. */ @@ -1294,7 +1523,6 @@ ufs_blkatoff(struct vnode *vp, off_t off error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp); if (error != 0) { - brelse(bp, 0); *bpp = NULL; goto out; }