Annotation of src/sys/ufs/ffs/ffs_snapshot.c, Revision 1.46.6.3
1.46.6.3! joerg 1: /* $NetBSD: ffs_snapshot.c,v 1.46.6.2 2007/09/03 16:49:15 jmcneill Exp $ */
1.18 thorpej 2:
1.1 hannken 3: /*
4: * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
5: *
6: * Further information about snapshots can be obtained from:
7: *
8: * Marshall Kirk McKusick http://www.mckusick.com/softdep/
9: * 1614 Oxford Street mckusick@mckusick.com
10: * Berkeley, CA 94709-1608 +1-510-843-9542
11: * USA
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: *
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: *
23: * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
24: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26: * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
27: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: *
35: * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
36: *
37: * from FreeBSD: ffs_snapshot.c,v 1.79 2004/02/13 02:02:06 kuriyama Exp
38: */
39:
40: #include <sys/cdefs.h>
1.46.6.3! joerg 41: __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.46.6.2 2007/09/03 16:49:15 jmcneill Exp $");
1.8 hannken 42:
43: #if defined(_KERNEL_OPT)
44: #include "opt_ffs.h"
45: #endif
1.1 hannken 46:
47: #include <sys/param.h>
48: #include <sys/kernel.h>
49: #include <sys/systm.h>
50: #include <sys/conf.h>
51: #include <sys/buf.h>
52: #include <sys/proc.h>
53: #include <sys/namei.h>
54: #include <sys/sched.h>
55: #include <sys/stat.h>
56: #include <sys/malloc.h>
57: #include <sys/mount.h>
58: #include <sys/resource.h>
59: #include <sys/resourcevar.h>
60: #include <sys/vnode.h>
1.29 elad 61: #include <sys/kauth.h>
1.40 hannken 62: #include <sys/fstrans.h>
1.1 hannken 63:
64: #include <miscfs/specfs/specdev.h>
65:
66: #include <ufs/ufs/quota.h>
67: #include <ufs/ufs/ufsmount.h>
68: #include <ufs/ufs/inode.h>
69: #include <ufs/ufs/ufs_extern.h>
70: #include <ufs/ufs/ufs_bswap.h>
71:
72: #include <ufs/ffs/fs.h>
73: #include <ufs/ffs/ffs_extern.h>
74:
75: /* FreeBSD -> NetBSD conversion */
1.31 ad 76: #define KERNCRED lwp0.l_cred
1.1 hannken 77: #define ufs1_daddr_t int32_t
78: #define ufs2_daddr_t int64_t
79: #define ufs_lbn_t daddr_t
80: #define VI_MTX(v) (&(v)->v_interlock)
81: #define VI_LOCK(v) simple_lock(&(v)->v_interlock)
82: #define VI_UNLOCK(v) simple_unlock(&(v)->v_interlock)
83: #define MNT_ILOCK(v) simple_lock(&mntvnode_slock)
84: #define MNT_IUNLOCK(v) simple_unlock(&mntvnode_slock)
85:
1.10 hannken 86: #if !defined(FFS_NO_SNAPSHOT)
1.43 christos 87: static int cgaccount(int, struct vnode *, void *, int);
1.1 hannken 88: static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
89: int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
90: ufs_lbn_t, int), int);
91: static int indiracct_ufs1(struct vnode *, struct vnode *, int,
92: ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
93: int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
94: ufs_lbn_t, int), int);
95: static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
96: struct fs *, ufs_lbn_t, int);
97: static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
98: struct fs *, ufs_lbn_t, int);
99: static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
100: struct fs *, ufs_lbn_t, int);
101: static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
102: int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
103: ufs_lbn_t, int), int);
104: static int indiracct_ufs2(struct vnode *, struct vnode *, int,
105: ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
106: int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
107: ufs_lbn_t, int), int);
108: static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
109: struct fs *, ufs_lbn_t, int);
110: static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
111: struct fs *, ufs_lbn_t, int);
112: static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
113: struct fs *, ufs_lbn_t, int);
1.43 christos 114: static int readvnblk(struct vnode *, void *, ufs2_daddr_t);
1.10 hannken 115: #endif /* !defined(FFS_NO_SNAPSHOT) */
116:
1.46.6.1 jmcneill 117: static void si_mount_dtor(void *);
118: static struct snap_info *si_mount_init(struct mount *);
1.1 hannken 119: static int ffs_copyonwrite(void *, struct buf *);
1.43 christos 120: static int readfsblk(struct vnode *, void *, ufs2_daddr_t);
121: static int writevnblk(struct vnode *, void *, ufs2_daddr_t);
1.4 hannken 122: static inline int cow_enter(void);
123: static inline void cow_leave(int);
1.1 hannken 124: static inline ufs2_daddr_t db_get(struct inode *, int);
125: static inline void db_assign(struct inode *, int, ufs2_daddr_t);
1.43 christos 126: static inline ufs2_daddr_t idb_get(struct inode *, void *, int);
127: static inline void idb_assign(struct inode *, void *, int, ufs2_daddr_t);
1.1 hannken 128:
1.46.6.1 jmcneill 129: struct snap_info {
1.46.6.2 jmcneill 130: kmutex_t si_lock; /* Lock this snapinfo */
131: struct lock si_vnlock; /* Snapshot vnode common lock */
1.46.6.1 jmcneill 132: TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */
133: daddr_t *si_snapblklist; /* Snapshot block hints list */
1.46.6.2 jmcneill 134: uint32_t si_gen; /* Incremented on change */
1.46.6.1 jmcneill 135: };
136:
1.1 hannken 137: #ifdef DEBUG
138: static int snapdebug = 0;
139: #endif
1.46.6.1 jmcneill 140: static kmutex_t si_mount_init_lock;
141: static specificdata_key_t si_mount_data_key;
142:
143: void
144: ffs_snapshot_init(void)
145: {
146: int error;
147:
148: error = mount_specific_key_create(&si_mount_data_key, si_mount_dtor);
149: KASSERT(error == 0);
150: mutex_init(&si_mount_init_lock, MUTEX_DEFAULT, IPL_NONE);
151: }
152:
153: void
154: ffs_snapshot_fini(void)
155: {
156: mount_specific_key_delete(si_mount_data_key);
157: mutex_destroy(&si_mount_init_lock);
158: }
159:
160: static void
161: si_mount_dtor(void *arg)
162: {
163: struct snap_info *si = arg;
164:
165: KASSERT(TAILQ_EMPTY(&si->si_snapshots));
1.46.6.2 jmcneill 166: mutex_destroy(&si->si_lock);
1.46.6.1 jmcneill 167: KASSERT(si->si_snapblklist == NULL);
168: free(si, M_MOUNT);
169: }
170:
171: static struct snap_info *
172: si_mount_init(struct mount *mp)
173: {
174: struct snap_info *new;
175:
176: mutex_enter(&si_mount_init_lock);
177:
178: if ((new = mount_getspecific(mp, si_mount_data_key)) != NULL) {
179: mutex_exit(&si_mount_init_lock);
180: return new;
181: }
182:
183: new = malloc(sizeof(*new), M_MOUNT, M_WAITOK);
184: TAILQ_INIT(&new->si_snapshots);
1.46.6.2 jmcneill 185: mutex_init(&new->si_lock, MUTEX_DEFAULT, IPL_NONE);
186: new->si_gen = 0;
1.46.6.1 jmcneill 187: new->si_snapblklist = NULL;
188: mount_setspecific(mp, si_mount_data_key, new);
189: mutex_exit(&si_mount_init_lock);
190: return new;
191: }
1.1 hannken 192:
193: /*
194: * Create a snapshot file and initialize it for the filesystem.
1.4 hannken 195: * Vnode is locked on entry and return.
1.1 hannken 196: */
197: int
1.36 christos 198: ffs_snapshot(struct mount *mp, struct vnode *vp,
199: struct timespec *ctime)
1.1 hannken 200: {
1.10 hannken 201: #if defined(FFS_NO_SNAPSHOT)
202: return EOPNOTSUPP;
203: }
204: #else /* defined(FFS_NO_SNAPSHOT) */
1.1 hannken 205: ufs2_daddr_t numblks, blkno, *blkp, snaplistsize = 0, *snapblklist;
206: int error, ns, cg, snaploc;
1.15 hannken 207: int i, s, size, len, loc;
1.1 hannken 208: int flag = mp->mnt_flag;
209: struct timeval starttime;
210: #ifdef DEBUG
211: struct timeval endtime;
212: #endif
213: struct timespec ts;
214: long redo = 0;
215: int32_t *lp;
216: void *space;
1.43 christos 217: void *sbbuf = NULL;
1.46.6.1 jmcneill 218: struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs;
1.23 christos 219: struct lwp *l = curlwp;
1.1 hannken 220: struct inode *ip, *xp;
1.15 hannken 221: struct buf *bp, *ibp, *nbp;
1.1 hannken 222: struct vattr vat;
1.35 reinoud 223: struct vnode *xvp, *nvp, *devvp;
1.46.6.1 jmcneill 224: struct snap_info *si;
1.1 hannken 225:
226: ns = UFS_FSNEEDSWAP(fs);
1.46.6.1 jmcneill 227: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
228: si = si_mount_init(mp);
1.1 hannken 229: /*
230: * Need to serialize access to snapshot code per filesystem.
231: */
232: /*
233: * If the vnode already is a snapshot, return.
234: */
235: if (VTOI(vp)->i_flags & SF_SNAPSHOT) {
236: if (ctime) {
237: ctime->tv_sec = DIP(VTOI(vp), mtime);
238: ctime->tv_nsec = DIP(VTOI(vp), mtimensec);
239: }
240: return 0;
241: }
242: /*
1.9 hannken 243: * Check mount, exclusive reference and owner.
1.1 hannken 244: */
1.4 hannken 245: if (vp->v_mount != mp)
1.1 hannken 246: return EXDEV;
1.4 hannken 247: if (vp->v_usecount != 1 || vp->v_writecount != 0)
1.1 hannken 248: return EBUSY;
1.31 ad 249: if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
1.39 elad 250: NULL) != 0 &&
1.31 ad 251: VTOI(vp)->i_uid != kauth_cred_geteuid(l->l_cred))
1.9 hannken 252: return EACCES;
253:
1.1 hannken 254: if (vp->v_size != 0) {
1.23 christos 255: error = ffs_truncate(vp, 0, 0, NOCRED, l);
1.4 hannken 256: if (error)
1.1 hannken 257: return error;
258: }
259: /*
260: * Assign a snapshot slot in the superblock.
261: */
262: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
263: if (fs->fs_snapinum[snaploc] == 0)
264: break;
265: if (snaploc == FSMAXSNAP)
266: return (ENOSPC);
267: ip = VTOI(vp);
268: devvp = ip->i_devvp;
269: /*
1.16 hannken 270: * Write an empty list of preallocated blocks to the end of
271: * the snapshot to set size to at least that of the filesystem.
1.1 hannken 272: */
273: numblks = howmany(fs->fs_size, fs->fs_frag);
1.16 hannken 274: blkno = 1;
275: blkno = ufs_rw64(blkno, ns);
1.1 hannken 276: error = vn_rdwr(UIO_WRITE, vp,
1.43 christos 277: (void *)&blkno, sizeof(blkno), lblktosize(fs, (off_t)numblks),
1.31 ad 278: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, l->l_cred, NULL, NULL);
1.1 hannken 279: if (error)
280: goto out;
281: /*
282: * Preallocate critical data structures so that we can copy
283: * them in without further allocation after we suspend all
284: * operations on the filesystem. We would like to just release
285: * the allocated buffers without writing them since they will
286: * be filled in below once we are ready to go, but this upsets
287: * the soft update code, so we go ahead and write the new buffers.
288: *
289: * Allocate all indirect blocks and mark all of them as not
290: * needing to be copied.
291: */
292: for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
1.22 yamt 293: error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno),
1.31 ad 294: fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
1.1 hannken 295: if (error)
296: goto out;
1.15 hannken 297: bawrite(ibp);
1.1 hannken 298: }
299: /*
300: * Allocate copies for the superblock and its summary information.
301: */
1.22 yamt 302: error = ffs_balloc(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED,
1.15 hannken 303: 0, &nbp);
304: if (error)
305: goto out;
306: bawrite(nbp);
1.1 hannken 307: blkno = fragstoblks(fs, fs->fs_csaddr);
1.15 hannken 308: len = howmany(fs->fs_cssize, fs->fs_bsize);
309: for (loc = 0; loc < len; loc++) {
1.22 yamt 310: error = ffs_balloc(vp, lblktosize(fs, (off_t)(blkno + loc)),
1.15 hannken 311: fs->fs_bsize, KERNCRED, 0, &nbp);
312: if (error)
1.1 hannken 313: goto out;
1.15 hannken 314: bawrite(nbp);
315: }
1.1 hannken 316: /*
317: * Copy all the cylinder group maps. Although the
318: * filesystem is still active, we hope that only a few
319: * cylinder groups will change between now and when we
320: * suspend operations. Thus, we will be able to quickly
321: * touch up the few cylinder groups that changed during
322: * the suspension period.
323: */
324: len = howmany(fs->fs_ncg, NBBY);
1.25 christos 325: fs->fs_active = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO);
1.1 hannken 326: for (cg = 0; cg < fs->fs_ncg; cg++) {
1.22 yamt 327: if ((error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
1.15 hannken 328: fs->fs_bsize, KERNCRED, 0, &nbp)) != 0)
1.1 hannken 329: goto out;
1.15 hannken 330: error = cgaccount(cg, vp, nbp->b_data, 1);
331: bawrite(nbp);
332: if (error)
1.1 hannken 333: goto out;
334: }
335: /*
336: * Change inode to snapshot type file.
337: */
338: ip->i_flags |= SF_SNAPSHOT;
339: DIP_ASSIGN(ip, flags, ip->i_flags);
340: ip->i_flag |= IN_CHANGE | IN_UPDATE;
341: /*
342: * Ensure that the snapshot is completely on disk.
343: * Since we have marked it as a snapshot it is safe to
344: * unlock it as no process will be allowed to write to it.
345: */
1.23 christos 346: if ((error = VOP_FSYNC(vp, KERNCRED, FSYNC_WAIT, 0, 0, l)) != 0)
1.1 hannken 347: goto out;
348: VOP_UNLOCK(vp, 0);
349: /*
350: * All allocations are done, so we can now snapshot the system.
351: *
352: * Suspend operation on filesystem.
353: */
1.40 hannken 354: if ((error = vfs_suspend(vp->v_mount, 0)) != 0) {
1.1 hannken 355: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
356: goto out;
357: }
358: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.30 kardel 359: getmicrotime(&starttime);
1.1 hannken 360: /*
361: * First, copy all the cylinder group maps that have changed.
362: */
363: for (cg = 0; cg < fs->fs_ncg; cg++) {
364: if (ACTIVECG_ISSET(fs, cg))
365: continue;
366: redo++;
1.22 yamt 367: if ((error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
1.15 hannken 368: fs->fs_bsize, KERNCRED, 0, &nbp)) != 0)
1.1 hannken 369: goto out1;
1.15 hannken 370: error = cgaccount(cg, vp, nbp->b_data, 2);
371: bawrite(nbp);
372: if (error)
1.1 hannken 373: goto out1;
374: }
375: /*
376: * Grab a copy of the superblock and its summary information.
377: * We delay writing it until the suspension is released below.
378: */
1.16 hannken 379: sbbuf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.1 hannken 380: loc = blkoff(fs, fs->fs_sblockloc);
381: if (loc > 0)
1.43 christos 382: memset(sbbuf, 0, loc);
383: copy_fs = (struct fs *)((char *)sbbuf + loc);
1.1 hannken 384: bcopy(fs, copy_fs, fs->fs_sbsize);
385: size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
386: if (fs->fs_sbsize < size)
1.43 christos 387: memset((char *)sbbuf + loc + fs->fs_sbsize, 0,
388: size - fs->fs_sbsize);
1.1 hannken 389: size = blkroundup(fs, fs->fs_cssize);
390: if (fs->fs_contigsumsize > 0)
391: size += fs->fs_ncg * sizeof(int32_t);
392: space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
393: copy_fs->fs_csp = space;
394: bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize);
1.12 yamt 395: space = (char *)space + fs->fs_cssize;
1.1 hannken 396: loc = howmany(fs->fs_cssize, fs->fs_fsize);
397: i = fs->fs_frag - loc % fs->fs_frag;
398: len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
399: if (len > 0) {
400: if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc),
401: len, KERNCRED, &bp)) != 0) {
1.46.6.3! joerg 402: brelse(bp, 0);
1.1 hannken 403: free(copy_fs->fs_csp, M_UFSMNT);
404: goto out1;
405: }
406: bcopy(bp->b_data, space, (u_int)len);
1.12 yamt 407: space = (char *)space + len;
1.46.6.3! joerg 408: brelse(bp, BC_INVAL | BC_NOCACHE);
1.1 hannken 409: }
410: if (fs->fs_contigsumsize > 0) {
411: copy_fs->fs_maxcluster = lp = space;
412: for (i = 0; i < fs->fs_ncg; i++)
413: *lp++ = fs->fs_contigsumsize;
414: }
415: /*
416: * We must check for active files that have been unlinked
417: * (e.g., with a zero link count). We have to expunge all
418: * trace of these files from the snapshot so that they are
419: * not reclaimed prematurely by fsck or unnecessarily dumped.
420: * We turn off the MNTK_SUSPENDED flag to avoid a panic from
421: * spec_strategy about writing on a suspended filesystem.
422: * Note that we skip unlinked snapshot files as they will
423: * be handled separately below.
424: *
425: * We also calculate the needed size for the snapshot list.
426: */
427: snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
428: FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
429: MNT_ILOCK(mp);
430: loop:
1.35 reinoud 431: /*
432: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
433: * and vclean() can be called indirectly
434: */
435: for (xvp = TAILQ_FIRST(&mp->mnt_vnodelist); xvp; xvp = nvp) {
1.1 hannken 436: /*
437: * Make sure this vnode wasn't reclaimed in getnewvnode().
438: * Start over if it has (it won't be on the list anymore).
439: */
440: if (xvp->v_mount != mp)
441: goto loop;
442: VI_LOCK(xvp);
1.35 reinoud 443: nvp = TAILQ_NEXT(xvp, v_mntvnodes);
1.1 hannken 444: MNT_IUNLOCK(mp);
1.46.6.3! joerg 445: if ((xvp->v_iflag & VI_XLOCK) ||
1.1 hannken 446: xvp->v_usecount == 0 || xvp->v_type == VNON ||
447: (VTOI(xvp)->i_flags & SF_SNAPSHOT)) {
448: VI_UNLOCK(xvp);
449: MNT_ILOCK(mp);
450: continue;
451: }
1.40 hannken 452: VI_UNLOCK(xvp);
1.1 hannken 453: #ifdef DEBUG
454: if (snapdebug)
455: vprint("ffs_snapshot: busy vnode", xvp);
456: #endif
1.31 ad 457: if (VOP_GETATTR(xvp, &vat, l->l_cred, l) == 0 &&
1.1 hannken 458: vat.va_nlink > 0) {
459: MNT_ILOCK(mp);
460: continue;
461: }
462: xp = VTOI(xvp);
463: if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) {
464: MNT_ILOCK(mp);
465: continue;
466: }
467: /*
468: * If there is a fragment, clear it here.
469: */
470: blkno = 0;
471: loc = howmany(xp->i_size, fs->fs_bsize) - 1;
472: if (loc < NDADDR) {
473: len = fragroundup(fs, blkoff(fs, xp->i_size));
1.5 hannken 474: if (len > 0 && len < fs->fs_bsize) {
1.1 hannken 475: ffs_blkfree(copy_fs, vp, db_get(xp, loc),
476: len, xp->i_number);
477: blkno = db_get(xp, loc);
478: db_assign(xp, loc, 0);
479: }
480: }
481: snaplistsize += 1;
482: if (xp->i_ump->um_fstype == UFS1)
483: error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
484: BLK_NOCOPY);
485: else
486: error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
487: BLK_NOCOPY);
488: if (blkno)
489: db_assign(xp, loc, blkno);
1.6 hannken 490: if (!error)
491: error = ffs_freefile(copy_fs, vp, xp->i_number,
492: xp->i_mode);
1.1 hannken 493: if (error) {
494: free(copy_fs->fs_csp, M_UFSMNT);
495: goto out1;
496: }
497: MNT_ILOCK(mp);
498: }
499: MNT_IUNLOCK(mp);
500: /*
501: * If there already exist snapshots on this filesystem, grab a
502: * reference to their shared lock. If this is the first snapshot
503: * on this filesystem, we need to allocate a lock for the snapshots
504: * to share. In either case, acquire the snapshot lock and give
505: * up our original private lock.
506: */
1.46.6.2 jmcneill 507: mutex_enter(&si->si_lock);
1.46.6.1 jmcneill 508: if ((xp = TAILQ_FIRST(&si->si_snapshots)) != NULL) {
1.1 hannken 509: VI_LOCK(vp);
1.46.6.2 jmcneill 510: vp->v_vnlock = ITOV(xp)->v_vnlock;
1.1 hannken 511: } else {
1.46.6.2 jmcneill 512: lockinit(&si->si_vnlock, PVFS, "snaplk", 0, LK_CANRECURSE);
1.1 hannken 513: VI_LOCK(vp);
1.46.6.2 jmcneill 514: vp->v_vnlock = &si->si_vnlock;
1.1 hannken 515: }
1.46.6.2 jmcneill 516: mutex_exit(&si->si_lock);
1.1 hannken 517: vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY);
518: lockmgr(&vp->v_lock, LK_RELEASE, NULL);
519: /*
520: * If this is the first snapshot on this filesystem, then we need
521: * to allocate the space for the list of preallocated snapshot blocks.
522: * This list will be refined below, but this preliminary one will
523: * keep us out of deadlock until the full one is ready.
524: */
525: if (xp == NULL) {
1.25 christos 526: snapblklist = malloc(
1.1 hannken 527: snaplistsize * sizeof(ufs2_daddr_t), M_UFSMNT, M_WAITOK);
528: blkp = &snapblklist[1];
1.15 hannken 529: *blkp++ = lblkno(fs, fs->fs_sblockloc);
1.1 hannken 530: blkno = fragstoblks(fs, fs->fs_csaddr);
531: for (cg = 0; cg < fs->fs_ncg; cg++) {
1.15 hannken 532: if (fragstoblks(fs, cgtod(fs, cg)) > blkno)
1.1 hannken 533: break;
1.15 hannken 534: *blkp++ = fragstoblks(fs, cgtod(fs, cg));
1.1 hannken 535: }
536: len = howmany(fs->fs_cssize, fs->fs_bsize);
537: for (loc = 0; loc < len; loc++)
1.15 hannken 538: *blkp++ = blkno + loc;
1.1 hannken 539: for (; cg < fs->fs_ncg; cg++)
1.15 hannken 540: *blkp++ = fragstoblks(fs, cgtod(fs, cg));
541: snapblklist[0] = blkp - snapblklist;
1.46.6.2 jmcneill 542: mutex_enter(&si->si_lock);
1.46.6.1 jmcneill 543: if (si->si_snapblklist != NULL)
1.1 hannken 544: panic("ffs_snapshot: non-empty list");
1.46.6.1 jmcneill 545: si->si_snapblklist = snapblklist;
1.46.6.2 jmcneill 546: } else
547: mutex_enter(&si->si_lock);
1.1 hannken 548: /*
549: * Record snapshot inode. Since this is the newest snapshot,
550: * it must be placed at the end of the list.
551: */
552: fs->fs_snapinum[snaploc] = ip->i_number;
553: if (ip->i_nextsnap.tqe_prev != 0)
1.19 christos 554: panic("ffs_snapshot: %llu already on list",
555: (unsigned long long)ip->i_number);
1.46.6.1 jmcneill 556: TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
1.1 hannken 557: if (xp == NULL)
1.46.6.3! joerg 558: fscow_establish(mp, ffs_copyonwrite, devvp);
1.46.6.2 jmcneill 559: si->si_gen++;
560: mutex_exit(&si->si_lock);
1.46.6.3! joerg 561: vp->v_vflag |= VV_SYSTEM;
1.1 hannken 562: out1:
563: /*
564: * Resume operation on filesystem.
565: */
1.40 hannken 566: vfs_resume(vp->v_mount);
1.1 hannken 567: /*
568: * Set the mtime to the time the snapshot has been taken.
569: */
1.21 yamt 570: TIMEVAL_TO_TIMESPEC(&starttime, &ts);
1.1 hannken 571: if (ctime)
572: *ctime = ts;
573: DIP_ASSIGN(ip, mtime, ts.tv_sec);
574: DIP_ASSIGN(ip, mtimensec, ts.tv_nsec);
575: ip->i_flag |= IN_CHANGE | IN_UPDATE;
576:
577: #ifdef DEBUG
578: if (starttime.tv_sec > 0) {
1.30 kardel 579: getmicrotime(&endtime);
1.1 hannken 580: timersub(&endtime, &starttime, &endtime);
581: printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
582: vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
583: endtime.tv_usec / 1000, redo, fs->fs_ncg);
584: }
585: #endif
586: if (error)
587: goto out;
588: /*
589: * Copy allocation information from all the snapshots in
590: * this snapshot and then expunge them from its view.
591: */
1.46.6.1 jmcneill 592: TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) {
1.1 hannken 593: if (xp == ip)
594: break;
595: if (xp->i_ump->um_fstype == UFS1)
596: error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
597: BLK_SNAP);
598: else
599: error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
600: BLK_SNAP);
1.46.6.2 jmcneill 601: if (error == 0 && xp->i_ffs_effnlink == 0)
602: error = ffs_freefile(copy_fs, vp,
603: xp->i_number, xp->i_mode);
1.1 hannken 604: if (error) {
605: fs->fs_snapinum[snaploc] = 0;
606: goto done;
607: }
608: }
609: /*
610: * Allocate space for the full list of preallocated snapshot blocks.
611: */
1.25 christos 612: snapblklist = malloc(snaplistsize * sizeof(ufs2_daddr_t),
1.1 hannken 613: M_UFSMNT, M_WAITOK);
614: ip->i_snapblklist = &snapblklist[1];
615: /*
616: * Expunge the blocks used by the snapshots from the set of
617: * blocks marked as used in the snapshot bitmaps. Also, collect
618: * the list of allocated blocks in i_snapblklist.
619: */
620: if (ip->i_ump->um_fstype == UFS1)
621: error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP);
622: else
623: error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP);
624: if (error) {
625: fs->fs_snapinum[snaploc] = 0;
626: FREE(snapblklist, M_UFSMNT);
627: goto done;
628: }
629: if (snaplistsize < ip->i_snapblklist - snapblklist)
630: panic("ffs_snapshot: list too small");
631: snaplistsize = ip->i_snapblklist - snapblklist;
1.15 hannken 632: snapblklist[0] = snaplistsize;
633: ip->i_snapblklist = &snapblklist[0];
1.1 hannken 634: /*
635: * Write out the list of allocated blocks to the end of the snapshot.
636: */
1.15 hannken 637: for (i = 0; i < snaplistsize; i++)
638: snapblklist[i] = ufs_rw64(snapblklist[i], ns);
1.43 christos 639: error = vn_rdwr(UIO_WRITE, vp, (void *)snapblklist,
1.16 hannken 640: snaplistsize*sizeof(ufs2_daddr_t), lblktosize(fs, (off_t)numblks),
1.31 ad 641: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, l->l_cred, NULL, NULL);
1.15 hannken 642: for (i = 0; i < snaplistsize; i++)
643: snapblklist[i] = ufs_rw64(snapblklist[i], ns);
1.1 hannken 644: if (error) {
645: fs->fs_snapinum[snaploc] = 0;
646: FREE(snapblklist, M_UFSMNT);
647: goto done;
648: }
649: /*
650: * Write the superblock and its summary information
651: * to the snapshot.
652: */
653: blkno = fragstoblks(fs, fs->fs_csaddr);
654: len = howmany(fs->fs_cssize, fs->fs_bsize);
655: space = copy_fs->fs_csp;
1.8 hannken 656: #ifdef FFS_EI
1.1 hannken 657: if (ns) {
658: ffs_sb_swap(copy_fs, copy_fs);
659: ffs_csum_swap(space, space, fs->fs_cssize);
660: }
1.8 hannken 661: #endif
1.1 hannken 662: for (loc = 0; loc < len; loc++) {
1.15 hannken 663: error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp);
664: if (error) {
1.46.6.3! joerg 665: brelse(nbp, 0);
1.1 hannken 666: fs->fs_snapinum[snaploc] = 0;
667: FREE(snapblklist, M_UFSMNT);
668: goto done;
669: }
1.15 hannken 670: bcopy(space, nbp->b_data, fs->fs_bsize);
1.1 hannken 671: space = (char *)space + fs->fs_bsize;
1.15 hannken 672: bawrite(nbp);
1.1 hannken 673: }
674: /*
675: * As this is the newest list, it is the most inclusive, so
1.15 hannken 676: * should replace the previous list. If this is the first snapshot
677: * free the preliminary list.
1.1 hannken 678: */
1.46.6.2 jmcneill 679: mutex_enter(&si->si_lock);
1.46.6.1 jmcneill 680: space = si->si_snapblklist;
681: si->si_snapblklist = snapblklist;
682: if (TAILQ_FIRST(&si->si_snapshots) == ip)
1.1 hannken 683: FREE(space, M_UFSMNT);
1.46.6.2 jmcneill 684: si->si_gen++;
685: mutex_exit(&si->si_lock);
1.1 hannken 686: done:
687: free(copy_fs->fs_csp, M_UFSMNT);
1.15 hannken 688: if (!error) {
689: error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
690: KERNCRED, &nbp);
691: if (error) {
1.46.6.3! joerg 692: brelse(nbp, 0);
1.15 hannken 693: fs->fs_snapinum[snaploc] = 0;
694: }
1.16 hannken 695: bcopy(sbbuf, nbp->b_data, fs->fs_bsize);
1.15 hannken 696: bawrite(nbp);
697: }
1.1 hannken 698: out:
1.4 hannken 699: /*
1.15 hannken 700: * Invalidate and free all pages on the snapshot vnode.
701: * All metadata has been written through the buffer cache.
702: * Clean all dirty buffers now to avoid UBC inconsistencies.
1.4 hannken 703: */
704: if (!error) {
705: simple_lock(&vp->v_interlock);
706: error = VOP_PUTPAGES(vp, 0, 0,
707: PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
708: }
1.15 hannken 709: if (!error) {
710: s = splbio();
711: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
712: nbp = LIST_NEXT(bp, b_vnbufs);
713: simple_lock(&bp->b_interlock);
714: splx(s);
715: if ((bp->b_flags & (B_DELWRI|B_BUSY)) != B_DELWRI)
716: panic("ffs_snapshot: not dirty or busy, bp %p",
717: bp);
718: bp->b_flags |= B_BUSY|B_VFLUSH;
719: if (LIST_FIRST(&bp->b_dep) == NULL)
720: bp->b_flags |= B_NOCACHE;
721: simple_unlock(&bp->b_interlock);
722: bwrite(bp);
723: s = splbio();
724: }
725: simple_lock(&global_v_numoutput_slock);
726: while (vp->v_numoutput) {
1.46.6.3! joerg 727: vp->v_iflag |= VI_BWAIT;
1.43 christos 728: ltsleep((void *)&vp->v_numoutput, PRIBIO+1,
1.15 hannken 729: "snapflushbuf", 0, &global_v_numoutput_slock);
730: }
731: simple_unlock(&global_v_numoutput_slock);
732: splx(s);
733: }
1.16 hannken 734: if (sbbuf)
735: free(sbbuf, M_UFSMNT);
1.1 hannken 736: if (fs->fs_active != 0) {
737: FREE(fs->fs_active, M_DEVBUF);
738: fs->fs_active = 0;
739: }
740: mp->mnt_flag = flag;
741: if (error)
1.23 christos 742: (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, l);
1.1 hannken 743: else
744: vref(vp);
745: return (error);
746: }
747:
748: /*
749: * Copy a cylinder group map. All the unallocated blocks are marked
750: * BLK_NOCOPY so that the snapshot knows that it need not copy them
751: * if they are later written. If passno is one, then this is a first
752: * pass, so only setting needs to be done. If passno is 2, then this
753: * is a revision to a previous pass which must be undone as the
754: * replacement pass is done.
755: */
756: static int
1.43 christos 757: cgaccount(int cg, struct vnode *vp, void *data, int passno)
1.1 hannken 758: {
759: struct buf *bp, *ibp;
760: struct inode *ip;
761: struct cg *cgp;
762: struct fs *fs;
763: ufs2_daddr_t base, numblks;
764: int error, len, loc, ns, indiroff;
765:
766: ip = VTOI(vp);
767: fs = ip->i_fs;
768: ns = UFS_FSNEEDSWAP(fs);
769: error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
770: (int)fs->fs_cgsize, KERNCRED, &bp);
771: if (error) {
1.46.6.3! joerg 772: brelse(bp, 0);
1.1 hannken 773: return (error);
774: }
775: cgp = (struct cg *)bp->b_data;
776: if (!cg_chkmagic(cgp, ns)) {
1.46.6.3! joerg 777: brelse(bp, 0);
1.1 hannken 778: return (EIO);
779: }
780: ACTIVECG_SET(fs, cg);
781:
782: bcopy(bp->b_data, data, fs->fs_cgsize);
1.46.6.3! joerg 783: brelse(bp, 0);
1.1 hannken 784: if (fs->fs_cgsize < fs->fs_bsize)
1.43 christos 785: memset((char *)data + fs->fs_cgsize, 0,
1.1 hannken 786: fs->fs_bsize - fs->fs_cgsize);
787: numblks = howmany(fs->fs_size, fs->fs_frag);
788: len = howmany(fs->fs_fpg, fs->fs_frag);
789: base = cg * fs->fs_fpg / fs->fs_frag;
790: if (base + len >= numblks)
791: len = numblks - base - 1;
792: loc = 0;
793: if (base < NDADDR) {
794: for ( ; loc < NDADDR; loc++) {
795: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
796: db_assign(ip, loc, BLK_NOCOPY);
797: else if (db_get(ip, loc) == BLK_NOCOPY) {
798: if (passno == 2)
799: db_assign(ip, loc, 0);
800: else if (passno == 1)
801: panic("ffs_snapshot: lost direct block");
802: }
803: }
804: }
1.22 yamt 805: if ((error = ffs_balloc(vp, lblktosize(fs, (off_t)(base + loc)),
1.11 perry 806: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp)) != 0)
1.1 hannken 807: return (error);
808: indiroff = (base + loc - NDADDR) % NINDIR(fs);
809: for ( ; loc < len; loc++, indiroff++) {
810: if (indiroff >= NINDIR(fs)) {
1.15 hannken 811: bawrite(ibp);
1.22 yamt 812: if ((error = ffs_balloc(vp,
1.1 hannken 813: lblktosize(fs, (off_t)(base + loc)),
814: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp)) != 0)
815: return (error);
816: indiroff = 0;
817: }
818: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
819: idb_assign(ip, ibp->b_data, indiroff, BLK_NOCOPY);
820: else if (idb_get(ip, ibp->b_data, indiroff) == BLK_NOCOPY) {
821: if (passno == 2)
822: idb_assign(ip, ibp->b_data, indiroff, 0);
823: else if (passno == 1)
824: panic("ffs_snapshot: lost indirect block");
825: }
826: }
1.15 hannken 827: bdwrite(ibp);
1.1 hannken 828: return (0);
829: }
830:
831: /*
832: * Before expunging a snapshot inode, note all the
833: * blocks that it claims with BLK_SNAP so that fsck will
834: * be able to account for those blocks properly and so
835: * that this snapshot knows that it need not copy them
836: * if the other snapshot holding them is freed. This code
837: * is reproduced once each for UFS1 and UFS2.
838: */
839: static int
1.18 thorpej 840: expunge_ufs1(struct vnode *snapvp, struct inode *cancelip, struct fs *fs,
841: int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
842: struct fs *, ufs_lbn_t, int),
843: int expungetype)
1.1 hannken 844: {
1.4 hannken 845: int i, s, error, ns, indiroff;
1.1 hannken 846: ufs_lbn_t lbn, rlbn;
847: ufs2_daddr_t len, blkno, numblks, blksperindir;
848: struct ufs1_dinode *dip;
849: struct buf *bp;
1.43 christos 850: void *bf;
1.1 hannken 851:
852: ns = UFS_FSNEEDSWAP(fs);
853: /*
854: * Prepare to expunge the inode. If its inode block has not
855: * yet been copied, then allocate and fill the copy.
856: */
857: lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
858: blkno = 0;
859: if (lbn < NDADDR) {
860: blkno = db_get(VTOI(snapvp), lbn);
861: } else {
1.4 hannken 862: s = cow_enter();
1.22 yamt 863: error = ffs_balloc(snapvp, lblktosize(fs, (off_t)lbn),
1.1 hannken 864: fs->fs_bsize, KERNCRED, B_METAONLY, &bp);
1.4 hannken 865: cow_leave(s);
1.1 hannken 866: if (error)
867: return (error);
868: indiroff = (lbn - NDADDR) % NINDIR(fs);
869: blkno = idb_get(VTOI(snapvp), bp->b_data, indiroff);
1.46.6.3! joerg 870: brelse(bp, 0);
1.1 hannken 871: }
1.17 christos 872: bf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.1 hannken 873: if (blkno != 0)
1.17 christos 874: error = readvnblk(snapvp, bf, lbn);
1.1 hannken 875: else
1.17 christos 876: error = readfsblk(snapvp, bf, lbn);
1.1 hannken 877: if (error) {
1.17 christos 878: free(bf, M_UFSMNT);
1.1 hannken 879: return error;
880: }
881: /*
882: * Set a snapshot inode to be a zero length file, regular files
1.46.6.2 jmcneill 883: * or unlinked snapshots to be completely unallocated.
1.1 hannken 884: */
1.17 christos 885: dip = (struct ufs1_dinode *)bf + ino_to_fsbo(fs, cancelip->i_number);
1.46.6.2 jmcneill 886: if (expungetype == BLK_NOCOPY || cancelip->i_ffs_effnlink == 0)
1.1 hannken 887: dip->di_mode = 0;
888: dip->di_size = 0;
889: dip->di_blocks = 0;
890: dip->di_flags =
891: ufs_rw32(ufs_rw32(dip->di_flags, ns) & ~SF_SNAPSHOT, ns);
892: bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t));
1.17 christos 893: error = writevnblk(snapvp, bf, lbn);
894: free(bf, M_UFSMNT);
1.1 hannken 895: if (error)
896: return error;
897: /*
898: * Now go through and expunge all the blocks in the file
899: * using the function requested.
900: */
901: numblks = howmany(cancelip->i_size, fs->fs_bsize);
902: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs1_db[0],
903: &cancelip->i_ffs1_db[NDADDR], fs, 0, expungetype)))
904: return (error);
905: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs1_ib[0],
906: &cancelip->i_ffs1_ib[NIADDR], fs, -1, expungetype)))
907: return (error);
908: blksperindir = 1;
909: lbn = -NDADDR;
910: len = numblks - NDADDR;
911: rlbn = NDADDR;
912: for (i = 0; len > 0 && i < NIADDR; i++) {
913: error = indiracct_ufs1(snapvp, ITOV(cancelip), i,
914: ufs_rw32(cancelip->i_ffs1_ib[i], ns), lbn, rlbn, len,
915: blksperindir, fs, acctfunc, expungetype);
916: if (error)
917: return (error);
918: blksperindir *= NINDIR(fs);
919: lbn -= blksperindir + 1;
920: len -= blksperindir;
921: rlbn += blksperindir;
922: }
923: return (0);
924: }
925:
926: /*
927: * Descend an indirect block chain for vnode cancelvp accounting for all
928: * its indirect blocks in snapvp.
1.11 perry 929: */
1.1 hannken 930: static int
1.18 thorpej 931: indiracct_ufs1(struct vnode *snapvp, struct vnode *cancelvp, int level,
932: ufs1_daddr_t blkno, ufs_lbn_t lbn, ufs_lbn_t rlbn, ufs_lbn_t remblks,
933: ufs_lbn_t blksperindir, struct fs *fs,
934: int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
935: struct fs *, ufs_lbn_t, int),
936: int expungetype)
1.1 hannken 937: {
938: int error, ns, num, i;
939: ufs_lbn_t subblksperindir;
940: struct indir indirs[NIADDR + 2];
941: ufs1_daddr_t last, *bap;
942: struct buf *bp;
943:
944: ns = UFS_FSNEEDSWAP(fs);
945:
946: if (blkno == 0) {
947: if (expungetype == BLK_NOCOPY)
948: return (0);
949: panic("indiracct_ufs1: missing indir");
950: }
951: if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
952: return (error);
953: if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
954: panic("indiracct_ufs1: botched params");
955: /*
956: * We have to expand bread here since it will deadlock looking
957: * up the block number for any blocks that are not in the cache.
958: */
959: bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
960: bp->b_blkno = fsbtodb(fs, blkno);
961: if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
962: (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
1.46.6.3! joerg 963: brelse(bp, 0);
1.1 hannken 964: return (error);
965: }
966: /*
967: * Account for the block pointers in this indirect block.
968: */
969: last = howmany(remblks, blksperindir);
970: if (last > NINDIR(fs))
971: last = NINDIR(fs);
1.25 christos 972: bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
1.43 christos 973: bcopy(bp->b_data, (void *)bap, fs->fs_bsize);
1.46.6.3! joerg 974: brelse(bp, 0);
1.1 hannken 975: error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
976: level == 0 ? rlbn : -1, expungetype);
977: if (error || level == 0)
978: goto out;
979: /*
980: * Account for the block pointers in each of the indirect blocks
981: * in the levels below us.
982: */
983: subblksperindir = blksperindir / NINDIR(fs);
984: for (lbn++, level--, i = 0; i < last; i++) {
985: error = indiracct_ufs1(snapvp, cancelvp, level,
986: ufs_rw32(bap[i], ns), lbn, rlbn, remblks, subblksperindir,
987: fs, acctfunc, expungetype);
988: if (error)
989: goto out;
990: rlbn += blksperindir;
991: lbn -= blksperindir;
992: remblks -= blksperindir;
993: }
994: out:
995: FREE(bap, M_DEVBUF);
996: return (error);
997: }
998:
999: /*
1000: * Do both snap accounting and map accounting.
1001: */
1002: static int
1.18 thorpej 1003: fullacct_ufs1(struct vnode *vp, ufs1_daddr_t *oldblkp, ufs1_daddr_t *lastblkp,
1004: struct fs *fs, ufs_lbn_t lblkno,
1005: int exptype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1006: {
1007: int error;
1008:
1009: if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
1010: return (error);
1011: return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype));
1012: }
1013:
1014: /*
1015: * Identify a set of blocks allocated in a snapshot inode.
1016: */
1017: static int
1.18 thorpej 1018: snapacct_ufs1(struct vnode *vp, ufs1_daddr_t *oldblkp, ufs1_daddr_t *lastblkp,
1.36 christos 1019: struct fs *fs, ufs_lbn_t lblkno,
1.18 thorpej 1020: int expungetype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1021: {
1022: struct inode *ip = VTOI(vp);
1023: ufs1_daddr_t blkno, *blkp;
1024: ufs_lbn_t lbn;
1025: struct buf *ibp;
1026: int error, ns;
1027:
1028: ns = UFS_FSNEEDSWAP(fs);
1029:
1030: for ( ; oldblkp < lastblkp; oldblkp++) {
1031: blkno = ufs_rw32(*oldblkp, ns);
1032: if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
1033: continue;
1034: lbn = fragstoblks(fs, blkno);
1035: if (lbn < NDADDR) {
1036: blkp = &ip->i_ffs1_db[lbn];
1037: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1038: } else {
1.22 yamt 1039: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1040: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1041: if (error)
1042: return (error);
1043: blkp = &((ufs1_daddr_t *)(ibp->b_data))
1044: [(lbn - NDADDR) % NINDIR(fs)];
1045: }
1046: /*
1047: * If we are expunging a snapshot vnode and we
1048: * find a block marked BLK_NOCOPY, then it is
1049: * one that has been allocated to this snapshot after
1050: * we took our current snapshot and can be ignored.
1051: */
1052: blkno = ufs_rw32(*blkp, ns);
1053: if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
1054: if (lbn >= NDADDR)
1.46.6.3! joerg 1055: brelse(ibp, 0);
1.1 hannken 1056: } else {
1057: if (blkno != 0)
1058: panic("snapacct_ufs1: bad block");
1059: *blkp = ufs_rw32(expungetype, ns);
1060: if (lbn >= NDADDR)
1.15 hannken 1061: bdwrite(ibp);
1.1 hannken 1062: }
1063: }
1064: return (0);
1065: }
1066:
1067: /*
1068: * Account for a set of blocks allocated in a snapshot inode.
1069: */
1070: static int
1.18 thorpej 1071: mapacct_ufs1(struct vnode *vp, ufs1_daddr_t *oldblkp, ufs1_daddr_t *lastblkp,
1072: struct fs *fs, ufs_lbn_t lblkno, int expungetype)
1.1 hannken 1073: {
1074: ufs1_daddr_t blkno;
1075: struct inode *ip;
1076: ino_t inum;
1077: int acctit, ns;
1078:
1079: ns = UFS_FSNEEDSWAP(fs);
1080: ip = VTOI(vp);
1081: inum = ip->i_number;
1082: if (lblkno == -1)
1083: acctit = 0;
1084: else
1085: acctit = 1;
1086: for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
1087: blkno = ufs_rw32(*oldblkp, ns);
1088: if (blkno == 0 || blkno == BLK_NOCOPY)
1089: continue;
1090: if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
1.15 hannken 1091: *ip->i_snapblklist++ = lblkno;
1.1 hannken 1092: if (blkno == BLK_SNAP)
1093: blkno = blkstofrags(fs, lblkno);
1094: ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
1095: }
1096: return (0);
1097: }
1098:
1099: /*
1100: * Before expunging a snapshot inode, note all the
1101: * blocks that it claims with BLK_SNAP so that fsck will
1102: * be able to account for those blocks properly and so
1103: * that this snapshot knows that it need not copy them
1104: * if the other snapshot holding them is freed. This code
1105: * is reproduced once each for UFS1 and UFS2.
1106: */
1107: static int
1.18 thorpej 1108: expunge_ufs2(struct vnode *snapvp, struct inode *cancelip, struct fs *fs,
1109: int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
1110: struct fs *, ufs_lbn_t, int),
1111: int expungetype)
1.1 hannken 1112: {
1.4 hannken 1113: int i, s, error, ns, indiroff;
1.1 hannken 1114: ufs_lbn_t lbn, rlbn;
1115: ufs2_daddr_t len, blkno, numblks, blksperindir;
1116: struct ufs2_dinode *dip;
1117: struct buf *bp;
1.43 christos 1118: void *bf;
1.1 hannken 1119:
1120: ns = UFS_FSNEEDSWAP(fs);
1121: /*
1122: * Prepare to expunge the inode. If its inode block has not
1123: * yet been copied, then allocate and fill the copy.
1124: */
1125: lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
1126: blkno = 0;
1127: if (lbn < NDADDR) {
1128: blkno = db_get(VTOI(snapvp), lbn);
1129: } else {
1.4 hannken 1130: s = cow_enter();
1.22 yamt 1131: error = ffs_balloc(snapvp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1132: fs->fs_bsize, KERNCRED, B_METAONLY, &bp);
1.4 hannken 1133: cow_leave(s);
1.1 hannken 1134: if (error)
1135: return (error);
1136: indiroff = (lbn - NDADDR) % NINDIR(fs);
1137: blkno = idb_get(VTOI(snapvp), bp->b_data, indiroff);
1.46.6.3! joerg 1138: brelse(bp, 0);
1.1 hannken 1139: }
1.17 christos 1140: bf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.1 hannken 1141: if (blkno != 0)
1.17 christos 1142: error = readvnblk(snapvp, bf, lbn);
1.1 hannken 1143: else
1.17 christos 1144: error = readfsblk(snapvp, bf, lbn);
1.1 hannken 1145: if (error) {
1.17 christos 1146: free(bf, M_UFSMNT);
1.1 hannken 1147: return error;
1148: }
1149: /*
1150: * Set a snapshot inode to be a zero length file, regular files
1.46.6.2 jmcneill 1151: * or unlinked snapshots to be completely unallocated.
1.1 hannken 1152: */
1.17 christos 1153: dip = (struct ufs2_dinode *)bf + ino_to_fsbo(fs, cancelip->i_number);
1.46.6.2 jmcneill 1154: if (expungetype == BLK_NOCOPY || cancelip->i_ffs_effnlink == 0)
1.1 hannken 1155: dip->di_mode = 0;
1156: dip->di_size = 0;
1157: dip->di_blocks = 0;
1158: dip->di_flags =
1159: ufs_rw32(ufs_rw32(dip->di_flags, ns) & ~SF_SNAPSHOT, ns);
1160: bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t));
1.17 christos 1161: error = writevnblk(snapvp, bf, lbn);
1162: free(bf, M_UFSMNT);
1.1 hannken 1163: if (error)
1164: return error;
1165: /*
1166: * Now go through and expunge all the blocks in the file
1167: * using the function requested.
1168: */
1169: numblks = howmany(cancelip->i_size, fs->fs_bsize);
1170: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs2_db[0],
1171: &cancelip->i_ffs2_db[NDADDR], fs, 0, expungetype)))
1172: return (error);
1173: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs2_ib[0],
1174: &cancelip->i_ffs2_ib[NIADDR], fs, -1, expungetype)))
1175: return (error);
1176: blksperindir = 1;
1177: lbn = -NDADDR;
1178: len = numblks - NDADDR;
1179: rlbn = NDADDR;
1180: for (i = 0; len > 0 && i < NIADDR; i++) {
1181: error = indiracct_ufs2(snapvp, ITOV(cancelip), i,
1182: ufs_rw64(cancelip->i_ffs2_ib[i], ns), lbn, rlbn, len,
1183: blksperindir, fs, acctfunc, expungetype);
1184: if (error)
1185: return (error);
1186: blksperindir *= NINDIR(fs);
1187: lbn -= blksperindir + 1;
1188: len -= blksperindir;
1189: rlbn += blksperindir;
1190: }
1191: return (0);
1192: }
1193:
1194: /*
1195: * Descend an indirect block chain for vnode cancelvp accounting for all
1196: * its indirect blocks in snapvp.
1.11 perry 1197: */
1.1 hannken 1198: static int
1.18 thorpej 1199: indiracct_ufs2(struct vnode *snapvp, struct vnode *cancelvp, int level,
1200: ufs2_daddr_t blkno, ufs_lbn_t lbn, ufs_lbn_t rlbn, ufs_lbn_t remblks,
1201: ufs_lbn_t blksperindir, struct fs *fs,
1202: int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
1203: struct fs *, ufs_lbn_t, int),
1204: int expungetype)
1.1 hannken 1205: {
1206: int error, ns, num, i;
1207: ufs_lbn_t subblksperindir;
1208: struct indir indirs[NIADDR + 2];
1209: ufs2_daddr_t last, *bap;
1210: struct buf *bp;
1211:
1212: ns = UFS_FSNEEDSWAP(fs);
1213:
1214: if (blkno == 0) {
1215: if (expungetype == BLK_NOCOPY)
1216: return (0);
1217: panic("indiracct_ufs2: missing indir");
1218: }
1219: if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
1220: return (error);
1221: if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
1222: panic("indiracct_ufs2: botched params");
1223: /*
1224: * We have to expand bread here since it will deadlock looking
1225: * up the block number for any blocks that are not in the cache.
1226: */
1227: bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
1228: bp->b_blkno = fsbtodb(fs, blkno);
1229: if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
1230: (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
1.46.6.3! joerg 1231: brelse(bp, 0);
1.1 hannken 1232: return (error);
1233: }
1234: /*
1235: * Account for the block pointers in this indirect block.
1236: */
1237: last = howmany(remblks, blksperindir);
1238: if (last > NINDIR(fs))
1239: last = NINDIR(fs);
1.25 christos 1240: bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
1.43 christos 1241: bcopy(bp->b_data, (void *)bap, fs->fs_bsize);
1.46.6.3! joerg 1242: brelse(bp, 0);
1.1 hannken 1243: error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
1244: level == 0 ? rlbn : -1, expungetype);
1245: if (error || level == 0)
1246: goto out;
1247: /*
1248: * Account for the block pointers in each of the indirect blocks
1249: * in the levels below us.
1250: */
1251: subblksperindir = blksperindir / NINDIR(fs);
1252: for (lbn++, level--, i = 0; i < last; i++) {
1253: error = indiracct_ufs2(snapvp, cancelvp, level,
1254: ufs_rw64(bap[i], ns), lbn, rlbn, remblks, subblksperindir,
1255: fs, acctfunc, expungetype);
1256: if (error)
1257: goto out;
1258: rlbn += blksperindir;
1259: lbn -= blksperindir;
1260: remblks -= blksperindir;
1261: }
1262: out:
1263: FREE(bap, M_DEVBUF);
1264: return (error);
1265: }
1266:
1267: /*
1268: * Do both snap accounting and map accounting.
1269: */
1270: static int
1.18 thorpej 1271: fullacct_ufs2(struct vnode *vp, ufs2_daddr_t *oldblkp, ufs2_daddr_t *lastblkp,
1272: struct fs *fs, ufs_lbn_t lblkno,
1273: int exptype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1274: {
1275: int error;
1276:
1277: if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
1278: return (error);
1279: return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype));
1280: }
1281:
1282: /*
1283: * Identify a set of blocks allocated in a snapshot inode.
1284: */
1285: static int
1.18 thorpej 1286: snapacct_ufs2(struct vnode *vp, ufs2_daddr_t *oldblkp, ufs2_daddr_t *lastblkp,
1.36 christos 1287: struct fs *fs, ufs_lbn_t lblkno,
1.18 thorpej 1288: int expungetype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1289: {
1290: struct inode *ip = VTOI(vp);
1291: ufs2_daddr_t blkno, *blkp;
1292: ufs_lbn_t lbn;
1293: struct buf *ibp;
1294: int error, ns;
1295:
1296: ns = UFS_FSNEEDSWAP(fs);
1297:
1298: for ( ; oldblkp < lastblkp; oldblkp++) {
1299: blkno = ufs_rw64(*oldblkp, ns);
1300: if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
1301: continue;
1302: lbn = fragstoblks(fs, blkno);
1303: if (lbn < NDADDR) {
1304: blkp = &ip->i_ffs2_db[lbn];
1305: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1306: } else {
1.22 yamt 1307: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1308: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1309: if (error)
1310: return (error);
1311: blkp = &((ufs2_daddr_t *)(ibp->b_data))
1312: [(lbn - NDADDR) % NINDIR(fs)];
1313: }
1314: /*
1315: * If we are expunging a snapshot vnode and we
1316: * find a block marked BLK_NOCOPY, then it is
1317: * one that has been allocated to this snapshot after
1318: * we took our current snapshot and can be ignored.
1319: */
1320: blkno = ufs_rw64(*blkp, ns);
1321: if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
1322: if (lbn >= NDADDR)
1.46.6.3! joerg 1323: brelse(ibp, 0);
1.1 hannken 1324: } else {
1325: if (blkno != 0)
1326: panic("snapacct_ufs2: bad block");
1327: *blkp = ufs_rw64(expungetype, ns);
1328: if (lbn >= NDADDR)
1.15 hannken 1329: bdwrite(ibp);
1.1 hannken 1330: }
1331: }
1332: return (0);
1333: }
1334:
1335: /*
1336: * Account for a set of blocks allocated in a snapshot inode.
1337: */
1338: static int
1.18 thorpej 1339: mapacct_ufs2(struct vnode *vp, ufs2_daddr_t *oldblkp, ufs2_daddr_t *lastblkp,
1340: struct fs *fs, ufs_lbn_t lblkno, int expungetype)
1.1 hannken 1341: {
1342: ufs2_daddr_t blkno;
1343: struct inode *ip;
1344: ino_t inum;
1345: int acctit, ns;
1346:
1347: ns = UFS_FSNEEDSWAP(fs);
1348: ip = VTOI(vp);
1349: inum = ip->i_number;
1350: if (lblkno == -1)
1351: acctit = 0;
1352: else
1353: acctit = 1;
1354: for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
1355: blkno = ufs_rw64(*oldblkp, ns);
1356: if (blkno == 0 || blkno == BLK_NOCOPY)
1357: continue;
1358: if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
1.15 hannken 1359: *ip->i_snapblklist++ = lblkno;
1.1 hannken 1360: if (blkno == BLK_SNAP)
1361: blkno = blkstofrags(fs, lblkno);
1362: ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
1363: }
1364: return (0);
1365: }
1.10 hannken 1366: #endif /* defined(FFS_NO_SNAPSHOT) */
1.1 hannken 1367:
1368: /*
1369: * Decrement extra reference on snapshot when last name is removed.
1370: * It will not be freed until the last open reference goes away.
1371: */
1372: void
1.18 thorpej 1373: ffs_snapgone(struct inode *ip)
1.1 hannken 1374: {
1.46.6.1 jmcneill 1375: struct mount *mp = ip->i_devvp->v_specmountpoint;
1.1 hannken 1376: struct inode *xp;
1377: struct fs *fs;
1.46.6.1 jmcneill 1378: struct snap_info *si;
1.1 hannken 1379: int snaploc;
1380:
1.46.6.1 jmcneill 1381: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1382: return;
1.1 hannken 1383: /*
1384: * Find snapshot in incore list.
1385: */
1.46.6.2 jmcneill 1386: mutex_enter(&si->si_lock);
1.46.6.1 jmcneill 1387: TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap)
1.1 hannken 1388: if (xp == ip)
1389: break;
1.46.6.2 jmcneill 1390: mutex_exit(&si->si_lock);
1.1 hannken 1391: if (xp != NULL)
1392: vrele(ITOV(ip));
1393: #ifdef DEBUG
1394: else if (snapdebug)
1.19 christos 1395: printf("ffs_snapgone: lost snapshot vnode %llu\n",
1396: (unsigned long long)ip->i_number);
1.1 hannken 1397: #endif
1398: /*
1399: * Delete snapshot inode from superblock. Keep list dense.
1400: */
1.46.6.2 jmcneill 1401: mutex_enter(&si->si_lock);
1.1 hannken 1402: fs = ip->i_fs;
1403: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
1404: if (fs->fs_snapinum[snaploc] == ip->i_number)
1405: break;
1406: if (snaploc < FSMAXSNAP) {
1407: for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
1408: if (fs->fs_snapinum[snaploc] == 0)
1409: break;
1410: fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
1411: }
1412: fs->fs_snapinum[snaploc - 1] = 0;
1413: }
1.46.6.2 jmcneill 1414: si->si_gen++;
1415: mutex_exit(&si->si_lock);
1.1 hannken 1416: }
1417:
1418: /*
1419: * Prepare a snapshot file for being removed.
1420: */
1421: void
1.18 thorpej 1422: ffs_snapremove(struct vnode *vp)
1.1 hannken 1423: {
1.15 hannken 1424: struct inode *ip = VTOI(vp), *xp;
1.1 hannken 1425: struct vnode *devvp = ip->i_devvp;
1426: struct fs *fs = ip->i_fs;
1.46.6.1 jmcneill 1427: struct mount *mp = devvp->v_specmountpoint;
1.1 hannken 1428: struct lock *lkp;
1429: struct buf *ibp;
1.46.6.1 jmcneill 1430: struct snap_info *si;
1.15 hannken 1431: ufs2_daddr_t numblks, blkno, dblk;
1.1 hannken 1432: int error, ns, loc, last;
1433:
1.46.6.1 jmcneill 1434: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1435: return;
1.1 hannken 1436: ns = UFS_FSNEEDSWAP(fs);
1437: /*
1438: * If active, delete from incore list (this snapshot may
1439: * already have been in the process of being deleted, so
1440: * would not have been active).
1441: *
1442: * Clear copy-on-write flag if last snapshot.
1443: */
1444: if (ip->i_nextsnap.tqe_prev != 0) {
1.46.6.2 jmcneill 1445: mutex_enter(&si->si_lock);
1446: lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
1.46.6.1 jmcneill 1447: TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap);
1.1 hannken 1448: ip->i_nextsnap.tqe_prev = 0;
1.46.6.2 jmcneill 1449: VI_LOCK(vp);
1.1 hannken 1450: lkp = vp->v_vnlock;
1451: vp->v_vnlock = &vp->v_lock;
1.46.6.2 jmcneill 1452: lockmgr(lkp, LK_RELEASE | LK_INTERLOCK, VI_MTX(vp));
1.46.6.1 jmcneill 1453: if (TAILQ_FIRST(&si->si_snapshots) != 0) {
1.15 hannken 1454: /* Roll back the list of preallocated blocks. */
1.46.6.1 jmcneill 1455: xp = TAILQ_LAST(&si->si_snapshots, inodelst);
1456: si->si_snapblklist = xp->i_snapblklist;
1.1 hannken 1457: } else {
1.46.6.1 jmcneill 1458: si->si_snapblklist = 0;
1.46.6.2 jmcneill 1459: si->si_gen++;
1460: mutex_exit(&si->si_lock);
1461: lockmgr(lkp, LK_DRAIN, NULL);
1.1 hannken 1462: lockmgr(lkp, LK_RELEASE, NULL);
1.46.6.3! joerg 1463: fscow_disestablish(mp, ffs_copyonwrite, devvp);
1.46.6.2 jmcneill 1464: mutex_enter(&si->si_lock);
1.1 hannken 1465: }
1.46.6.2 jmcneill 1466: si->si_gen++;
1467: mutex_exit(&si->si_lock);
1.15 hannken 1468: FREE(ip->i_snapblklist, M_UFSMNT);
1469: ip->i_snapblklist = NULL;
1.1 hannken 1470: }
1471: /*
1472: * Clear all BLK_NOCOPY fields. Pass any block claims to other
1473: * snapshots that want them (see ffs_snapblkfree below).
1474: */
1475: for (blkno = 1; blkno < NDADDR; blkno++) {
1476: dblk = db_get(ip, blkno);
1477: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1478: db_assign(ip, blkno, 0);
1479: else if ((dblk == blkstofrags(fs, blkno) &&
1480: ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
1481: ip->i_number))) {
1482: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1483: db_assign(ip, blkno, 0);
1484: }
1485: }
1486: numblks = howmany(ip->i_size, fs->fs_bsize);
1487: for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
1.22 yamt 1488: error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno),
1.1 hannken 1489: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1490: if (error)
1491: continue;
1492: if (fs->fs_size - blkno > NINDIR(fs))
1493: last = NINDIR(fs);
1494: else
1495: last = fs->fs_size - blkno;
1496: for (loc = 0; loc < last; loc++) {
1497: dblk = idb_get(ip, ibp->b_data, loc);
1498: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1499: idb_assign(ip, ibp->b_data, loc, 0);
1500: else if (dblk == blkstofrags(fs, blkno) &&
1501: ffs_snapblkfree(fs, ip->i_devvp, dblk,
1502: fs->fs_bsize, ip->i_number)) {
1503: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1504: idb_assign(ip, ibp->b_data, loc, 0);
1505: }
1506: }
1.15 hannken 1507: bawrite(ibp);
1.1 hannken 1508: }
1509: /*
1510: * Clear snapshot flag and drop reference.
1511: */
1512: ip->i_flags &= ~SF_SNAPSHOT;
1513: DIP_ASSIGN(ip, flags, ip->i_flags);
1514: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1515: }
1516:
1517: /*
1518: * Notification that a block is being freed. Return zero if the free
1519: * should be allowed to proceed. Return non-zero if the snapshot file
1520: * wants to claim the block. The block will be claimed if it is an
1521: * uncopied part of one of the snapshots. It will be freed if it is
1522: * either a BLK_NOCOPY or has already been copied in all of the snapshots.
1523: * If a fragment is being freed, then all snapshots that care about
1524: * it must make a copy since a snapshot file can only claim full sized
1525: * blocks. Note that if more than one snapshot file maps the block,
1526: * we can pick one at random to claim it. Since none of the snapshots
1527: * can change, we are assurred that they will all see the same unmodified
1528: * image. When deleting a snapshot file (see ffs_snapremove above), we
1529: * must push any of these claimed blocks to one of the other snapshots
1530: * that maps it. These claimed blocks are easily identified as they will
1531: * have a block number equal to their logical block number within the
1532: * snapshot. A copied block can never have this property because they
1533: * must always have been allocated from a BLK_NOCOPY location.
1534: */
1535: int
1.18 thorpej 1536: ffs_snapblkfree(struct fs *fs, struct vnode *devvp, ufs2_daddr_t bno,
1.36 christos 1537: long size, ino_t inum)
1.1 hannken 1538: {
1.46.6.1 jmcneill 1539: struct mount *mp = devvp->v_specmountpoint;
1.1 hannken 1540: struct buf *ibp;
1541: struct inode *ip;
1.46.6.2 jmcneill 1542: struct vnode *vp = NULL;
1.46.6.1 jmcneill 1543: struct snap_info *si;
1.43 christos 1544: void *saved_data = NULL;
1.1 hannken 1545: ufs_lbn_t lbn;
1546: ufs2_daddr_t blkno;
1.46.6.2 jmcneill 1547: uint32_t gen;
1.4 hannken 1548: int s, indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0;
1.1 hannken 1549:
1.46.6.1 jmcneill 1550: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1551: return 0;
1.1 hannken 1552: lbn = fragstoblks(fs, bno);
1.46.6.2 jmcneill 1553: mutex_enter(&si->si_lock);
1.1 hannken 1554: retry:
1.46.6.2 jmcneill 1555: gen = si->si_gen;
1.46.6.1 jmcneill 1556: TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
1.1 hannken 1557: vp = ITOV(ip);
1.46.6.2 jmcneill 1558: if (snapshot_locked == 0) {
1559: mutex_exit(&si->si_lock);
1560: if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_SLEEPFAIL) != 0) {
1561: mutex_enter(&si->si_lock);
1562: goto retry;
1563: }
1564: mutex_enter(&si->si_lock);
1565: snapshot_locked = 1;
1566: if (gen != si->si_gen)
1567: goto retry;
1568: }
1.1 hannken 1569: /*
1570: * Lookup block being written.
1571: */
1572: if (lbn < NDADDR) {
1573: blkno = db_get(ip, lbn);
1574: } else {
1.46.6.2 jmcneill 1575: mutex_exit(&si->si_lock);
1.4 hannken 1576: s = cow_enter();
1.22 yamt 1577: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1578: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1.4 hannken 1579: cow_leave(s);
1.46.6.2 jmcneill 1580: if (error) {
1581: mutex_enter(&si->si_lock);
1.1 hannken 1582: break;
1.46.6.2 jmcneill 1583: }
1.1 hannken 1584: indiroff = (lbn - NDADDR) % NINDIR(fs);
1585: blkno = idb_get(ip, ibp->b_data, indiroff);
1.46.6.2 jmcneill 1586: mutex_enter(&si->si_lock);
1587: if (gen != si->si_gen) {
1.46.6.3! joerg 1588: brelse(ibp, 0);
1.46.6.2 jmcneill 1589: goto retry;
1590: }
1.1 hannken 1591: }
1592: /*
1593: * Check to see if block needs to be copied.
1594: */
1595: if (blkno == 0) {
1596: /*
1597: * A block that we map is being freed. If it has not
1598: * been claimed yet, we will claim or copy it (below).
1599: */
1600: claimedblk = 1;
1601: } else if (blkno == BLK_SNAP) {
1602: /*
1603: * No previous snapshot claimed the block,
1604: * so it will be freed and become a BLK_NOCOPY
1605: * (don't care) for us.
1606: */
1607: if (claimedblk)
1608: panic("snapblkfree: inconsistent block type");
1609: if (lbn < NDADDR) {
1610: db_assign(ip, lbn, BLK_NOCOPY);
1611: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1612: } else {
1613: idb_assign(ip, ibp->b_data, indiroff,
1614: BLK_NOCOPY);
1.46.6.2 jmcneill 1615: mutex_exit(&si->si_lock);
1.1 hannken 1616: bwrite(ibp);
1.46.6.2 jmcneill 1617: mutex_enter(&si->si_lock);
1618: if (gen != si->si_gen)
1619: goto retry;
1.1 hannken 1620: }
1621: continue;
1622: } else /* BLK_NOCOPY or default */ {
1623: /*
1624: * If the snapshot has already copied the block
1625: * (default), or does not care about the block,
1626: * it is not needed.
1627: */
1.27 bouyer 1628: if (lbn >= NDADDR)
1.46.6.3! joerg 1629: brelse(ibp, 0);
1.1 hannken 1630: continue;
1631: }
1632: /*
1633: * If this is a full size block, we will just grab it
1634: * and assign it to the snapshot inode. Otherwise we
1635: * will proceed to copy it. See explanation for this
1636: * routine as to why only a single snapshot needs to
1637: * claim this block.
1638: */
1639: if (size == fs->fs_bsize) {
1640: #ifdef DEBUG
1641: if (snapdebug)
1.19 christos 1642: printf("%s %llu lbn %" PRId64
1643: "from inum %llu\n",
1644: "Grabonremove: snapino",
1645: (unsigned long long)ip->i_number,
1646: lbn, (unsigned long long)inum);
1.1 hannken 1647: #endif
1.46.6.2 jmcneill 1648: mutex_exit(&si->si_lock);
1.1 hannken 1649: if (lbn < NDADDR) {
1650: db_assign(ip, lbn, bno);
1651: } else {
1652: idb_assign(ip, ibp->b_data, indiroff, bno);
1653: bwrite(ibp);
1654: }
1655: DIP_ADD(ip, blocks, btodb(size));
1656: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1657: VOP_UNLOCK(vp, 0);
1658: return (1);
1659: }
1660: if (lbn >= NDADDR)
1.46.6.3! joerg 1661: brelse(ibp, 0);
1.1 hannken 1662: #ifdef DEBUG
1663: if (snapdebug)
1.19 christos 1664: printf("%s%llu lbn %" PRId64 " %s %llu size %ld\n",
1665: "Copyonremove: snapino ",
1666: (unsigned long long)ip->i_number,
1667: lbn, "for inum", (unsigned long long)inum, size);
1.1 hannken 1668: #endif
1669: /*
1670: * If we have already read the old block contents, then
1671: * simply copy them to the new block. Note that we need
1672: * to synchronously write snapshots that have not been
1673: * unlinked, and hence will be visible after a crash,
1674: * to ensure their integrity.
1675: */
1.46.6.2 jmcneill 1676: mutex_exit(&si->si_lock);
1677: if (saved_data == NULL) {
1678: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1679: if ((error = readfsblk(vp, saved_data, lbn)) != 0) {
1680: free(saved_data, M_UFSMNT);
1681: saved_data = NULL;
1682: mutex_enter(&si->si_lock);
1.1 hannken 1683: break;
1.46.6.2 jmcneill 1684: }
1.1 hannken 1685: }
1.46.6.2 jmcneill 1686: error = writevnblk(vp, saved_data, lbn);
1687: mutex_enter(&si->si_lock);
1688: if (error)
1.1 hannken 1689: break;
1.46.6.2 jmcneill 1690: if (gen != si->si_gen)
1691: goto retry;
1.1 hannken 1692: }
1.46.6.2 jmcneill 1693: mutex_exit(&si->si_lock);
1694: if (saved_data)
1.1 hannken 1695: free(saved_data, M_UFSMNT);
1696: /*
1697: * If we have been unable to allocate a block in which to do
1698: * the copy, then return non-zero so that the fragment will
1699: * not be freed. Although space will be lost, the snapshot
1700: * will stay consistent.
1701: */
1702: if (snapshot_locked)
1703: VOP_UNLOCK(vp, 0);
1704: return (error);
1705: }
1706:
1707: /*
1708: * Associate snapshot files when mounting.
1709: */
1710: void
1.18 thorpej 1711: ffs_snapshot_mount(struct mount *mp)
1.1 hannken 1712: {
1.46.6.1 jmcneill 1713: struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
1714: struct fs *fs = VFSTOUFS(mp)->um_fs;
1.31 ad 1715: struct lwp *l = curlwp;
1.1 hannken 1716: struct vnode *vp;
1717: struct inode *ip, *xp;
1.46.6.1 jmcneill 1718: struct snap_info *si;
1.1 hannken 1719: ufs2_daddr_t snaplistsize, *snapblklist;
1.15 hannken 1720: int i, error, ns, snaploc, loc;
1.1 hannken 1721:
1.46 hannken 1722: /*
1723: * No persistent snapshots on apple ufs file systems.
1724: */
1.46.6.1 jmcneill 1725: if (UFS_MPISAPPLEUFS(VFSTOUFS(mp)))
1.46 hannken 1726: return;
1727:
1.46.6.1 jmcneill 1728: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1729: si = si_mount_init(mp);
1.1 hannken 1730: ns = UFS_FSNEEDSWAP(fs);
1731: /*
1.22 yamt 1732: * XXX The following needs to be set before ffs_truncate or
1.1 hannken 1733: * VOP_READ can be called.
1734: */
1735: mp->mnt_stat.f_iosize = fs->fs_bsize;
1736: /*
1737: * Process each snapshot listed in the superblock.
1738: */
1739: vp = NULL;
1.46.6.2 jmcneill 1740: mutex_enter(&si->si_lock);
1.1 hannken 1741: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
1742: if (fs->fs_snapinum[snaploc] == 0)
1743: break;
1744: if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc],
1745: &vp)) != 0) {
1746: printf("ffs_snapshot_mount: vget failed %d\n", error);
1747: continue;
1748: }
1749: ip = VTOI(vp);
1750: if ((ip->i_flags & SF_SNAPSHOT) == 0) {
1751: printf("ffs_snapshot_mount: non-snapshot inode %d\n",
1752: fs->fs_snapinum[snaploc]);
1753: vput(vp);
1754: vp = NULL;
1755: for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
1756: if (fs->fs_snapinum[loc] == 0)
1757: break;
1758: fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
1759: }
1760: fs->fs_snapinum[loc - 1] = 0;
1761: snaploc--;
1762: continue;
1763: }
1.15 hannken 1764:
1765: /*
1766: * Read the block hints list. Use an empty list on
1767: * read errors.
1768: */
1769: error = vn_rdwr(UIO_READ, vp,
1.43 christos 1770: (void *)&snaplistsize, sizeof(snaplistsize),
1.15 hannken 1771: lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1772: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
1.31 ad 1773: l->l_cred, NULL, NULL);
1.15 hannken 1774: if (error) {
1775: printf("ffs_snapshot_mount: read_1 failed %d\n", error);
1776: snaplistsize = 1;
1777: } else
1778: snaplistsize = ufs_rw64(snaplistsize, ns);
1.25 christos 1779: snapblklist = malloc(
1.15 hannken 1780: snaplistsize * sizeof(ufs2_daddr_t), M_UFSMNT, M_WAITOK);
1781: if (error)
1782: snapblklist[0] = 1;
1783: else {
1.43 christos 1784: error = vn_rdwr(UIO_READ, vp, (void *)snapblklist,
1.15 hannken 1785: snaplistsize * sizeof(ufs2_daddr_t),
1786: lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1787: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
1.31 ad 1788: l->l_cred, NULL, NULL);
1.15 hannken 1789: for (i = 0; i < snaplistsize; i++)
1790: snapblklist[i] = ufs_rw64(snapblklist[i], ns);
1791: if (error) {
1792: printf("ffs_snapshot_mount: read_2 failed %d\n",
1793: error);
1794: snapblklist[0] = 1;
1795: }
1796: }
1797: ip->i_snapblklist = &snapblklist[0];
1798:
1.1 hannken 1799: /*
1800: * If there already exist snapshots on this filesystem, grab a
1801: * reference to their shared lock. If this is the first snapshot
1802: * on this filesystem, we need to allocate a lock for the
1803: * snapshots to share. In either case, acquire the snapshot
1804: * lock and give up our original private lock.
1805: */
1.46.6.1 jmcneill 1806: if ((xp = TAILQ_FIRST(&si->si_snapshots)) != NULL) {
1.1 hannken 1807: VI_LOCK(vp);
1.46.6.2 jmcneill 1808: vp->v_vnlock = ITOV(xp)->v_vnlock;
1.1 hannken 1809: } else {
1.46.6.2 jmcneill 1810: lockinit(&si->si_vnlock, PVFS, "snaplk",
1811: 0, LK_CANRECURSE);
1.1 hannken 1812: VI_LOCK(vp);
1.46.6.2 jmcneill 1813: vp->v_vnlock = &si->si_vnlock;
1.1 hannken 1814: }
1815: vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY);
1816: lockmgr(&vp->v_lock, LK_RELEASE, NULL);
1817: /*
1818: * Link it onto the active snapshot list.
1819: */
1820: if (ip->i_nextsnap.tqe_prev != 0)
1.19 christos 1821: panic("ffs_snapshot_mount: %llu already on list",
1822: (unsigned long long)ip->i_number);
1.1 hannken 1823: else
1.46.6.1 jmcneill 1824: TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
1.46.6.3! joerg 1825: vp->v_vflag |= VV_SYSTEM;
1.1 hannken 1826: VOP_UNLOCK(vp, 0);
1827: }
1828: /*
1829: * No usable snapshots found.
1830: */
1.46.6.2 jmcneill 1831: if (vp == NULL) {
1832: mutex_exit(&si->si_lock);
1.1 hannken 1833: return;
1.46.6.2 jmcneill 1834: }
1.1 hannken 1835: /*
1.15 hannken 1836: * Attach the block hints list. We always want to
1.1 hannken 1837: * use the list from the newest snapshot.
1.15 hannken 1838: */
1.46.6.1 jmcneill 1839: xp = TAILQ_LAST(&si->si_snapshots, inodelst);
1840: si->si_snapblklist = xp->i_snapblklist;
1.46.6.3! joerg 1841: fscow_establish(mp, ffs_copyonwrite, devvp);
1.46.6.2 jmcneill 1842: si->si_gen++;
1843: mutex_exit(&si->si_lock);
1.1 hannken 1844: }
1845:
1846: /*
1847: * Disassociate snapshot files when unmounting.
1848: */
1849: void
1.18 thorpej 1850: ffs_snapshot_unmount(struct mount *mp)
1.1 hannken 1851: {
1.46.6.1 jmcneill 1852: struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
1.1 hannken 1853: struct inode *xp;
1.46.6.2 jmcneill 1854: struct vnode *vp = NULL;
1.46.6.1 jmcneill 1855: struct snap_info *si;
1.1 hannken 1856:
1.46.6.1 jmcneill 1857: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1858: return;
1.46.6.2 jmcneill 1859: mutex_enter(&si->si_lock);
1.46.6.1 jmcneill 1860: while ((xp = TAILQ_FIRST(&si->si_snapshots)) != 0) {
1.1 hannken 1861: vp = ITOV(xp);
1862: vp->v_vnlock = &vp->v_lock;
1.46.6.1 jmcneill 1863: TAILQ_REMOVE(&si->si_snapshots, xp, i_nextsnap);
1.1 hannken 1864: xp->i_nextsnap.tqe_prev = 0;
1.46.6.1 jmcneill 1865: if (xp->i_snapblklist == si->si_snapblklist)
1866: si->si_snapblklist = NULL;
1.15 hannken 1867: FREE(xp->i_snapblklist, M_UFSMNT);
1.46.6.2 jmcneill 1868: if (xp->i_ffs_effnlink > 0) {
1869: si->si_gen++;
1870: mutex_exit(&si->si_lock);
1.1 hannken 1871: vrele(vp);
1.46.6.2 jmcneill 1872: mutex_enter(&si->si_lock);
1873: }
1.1 hannken 1874: }
1.46.6.2 jmcneill 1875: if (vp)
1.46.6.3! joerg 1876: fscow_disestablish(mp, ffs_copyonwrite, devvp);
1.46.6.2 jmcneill 1877: si->si_gen++;
1878: mutex_exit(&si->si_lock);
1.1 hannken 1879: }
1880:
1881: /*
1882: * Check for need to copy block that is about to be written,
1883: * copying the block if necessary.
1884: */
1885: static int
1.18 thorpej 1886: ffs_copyonwrite(void *v, struct buf *bp)
1.1 hannken 1887: {
1888: struct buf *ibp;
1889: struct fs *fs;
1890: struct inode *ip;
1.46.6.2 jmcneill 1891: struct vnode *devvp = v, *vp = NULL;
1.46.6.1 jmcneill 1892: struct mount *mp = devvp->v_specmountpoint;
1893: struct snap_info *si;
1.43 christos 1894: void *saved_data = NULL;
1.1 hannken 1895: ufs2_daddr_t lbn, blkno, *snapblklist;
1.46.6.2 jmcneill 1896: uint32_t gen;
1.4 hannken 1897: int lower, upper, mid, s, ns, indiroff, snapshot_locked = 0, error = 0;
1.1 hannken 1898:
1899: /*
1900: * Check for valid snapshots.
1901: */
1.46.6.1 jmcneill 1902: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1903: return 0;
1.46.6.2 jmcneill 1904: mutex_enter(&si->si_lock);
1.46.6.1 jmcneill 1905: ip = TAILQ_FIRST(&si->si_snapshots);
1.1 hannken 1906: if (ip == NULL) {
1.46.6.2 jmcneill 1907: mutex_exit(&si->si_lock);
1.11 perry 1908: return 0;
1.1 hannken 1909: }
1910: /*
1911: * First check to see if it is in the preallocated list.
1912: * By doing this check we avoid several potential deadlocks.
1913: */
1914: fs = ip->i_fs;
1915: ns = UFS_FSNEEDSWAP(fs);
1916: lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
1.46.6.1 jmcneill 1917: snapblklist = si->si_snapblklist;
1918: upper = si->si_snapblklist[0] - 1;
1.1 hannken 1919: lower = 1;
1920: while (lower <= upper) {
1921: mid = (lower + upper) / 2;
1.15 hannken 1922: if (snapblklist[mid] == lbn)
1.1 hannken 1923: break;
1.15 hannken 1924: if (snapblklist[mid] < lbn)
1.1 hannken 1925: lower = mid + 1;
1926: else
1927: upper = mid - 1;
1928: }
1929: if (lower <= upper) {
1.46.6.2 jmcneill 1930: mutex_exit(&si->si_lock);
1.1 hannken 1931: return 0;
1932: }
1933: /*
1934: * Not in the precomputed list, so check the snapshots.
1935: */
1936: retry:
1.46.6.2 jmcneill 1937: gen = si->si_gen;
1.46.6.1 jmcneill 1938: TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
1.1 hannken 1939: vp = ITOV(ip);
1940: /*
1941: * We ensure that everything of our own that needs to be
1942: * copied will be done at the time that ffs_snapshot is
1943: * called. Thus we can skip the check here which can
1.22 yamt 1944: * deadlock in doing the lookup in ffs_balloc.
1.1 hannken 1945: */
1946: if (bp->b_vp == vp)
1947: continue;
1.46.6.2 jmcneill 1948: if (snapshot_locked == 0) {
1949: mutex_exit(&si->si_lock);
1950: if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_SLEEPFAIL) != 0) {
1951: mutex_enter(&si->si_lock);
1952: goto retry;
1953: }
1954: mutex_enter(&si->si_lock);
1955: snapshot_locked = 1;
1956: if (gen != si->si_gen)
1957: goto retry;
1958: }
1.1 hannken 1959: /*
1960: * Check to see if block needs to be copied. We do not have
1961: * to hold the snapshot lock while doing this lookup as it
1962: * will never require any additional allocations for the
1963: * snapshot inode.
1964: */
1965: if (lbn < NDADDR) {
1966: blkno = db_get(ip, lbn);
1967: } else {
1.46.6.2 jmcneill 1968: mutex_exit(&si->si_lock);
1.4 hannken 1969: s = cow_enter();
1.22 yamt 1970: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1971: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1.4 hannken 1972: cow_leave(s);
1.46.6.2 jmcneill 1973: if (error) {
1974: mutex_enter(&si->si_lock);
1.1 hannken 1975: break;
1.46.6.2 jmcneill 1976: }
1.1 hannken 1977: indiroff = (lbn - NDADDR) % NINDIR(fs);
1978: blkno = idb_get(ip, ibp->b_data, indiroff);
1.46.6.3! joerg 1979: brelse(ibp, 0);
1.46.6.2 jmcneill 1980: mutex_enter(&si->si_lock);
1981: if (gen != si->si_gen)
1982: goto retry;
1.1 hannken 1983: }
1984: #ifdef DIAGNOSTIC
1985: if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
1986: panic("ffs_copyonwrite: bad copy block");
1987: #endif
1988: if (blkno != 0)
1989: continue;
1.4 hannken 1990: #ifdef DIAGNOSTIC
1.41 ad 1991: if (curlwp->l_pflag & LP_UFSCOW)
1.4 hannken 1992: printf("ffs_copyonwrite: recursive call\n");
1993: #endif
1.1 hannken 1994: /*
1995: * Allocate the block into which to do the copy. Since
1996: * multiple processes may all try to copy the same block,
1997: * we have to recheck our need to do a copy if we sleep
1998: * waiting for the lock.
1999: *
2000: * Because all snapshots on a filesystem share a single
2001: * lock, we ensure that we will never be in competition
2002: * with another process to allocate a block.
2003: */
2004: #ifdef DEBUG
2005: if (snapdebug) {
1.19 christos 2006: printf("Copyonwrite: snapino %llu lbn %" PRId64 " for ",
2007: (unsigned long long)ip->i_number, lbn);
1.1 hannken 2008: if (bp->b_vp == devvp)
2009: printf("fs metadata");
2010: else
1.19 christos 2011: printf("inum %llu", (unsigned long long)
2012: VTOI(bp->b_vp)->i_number);
1.4 hannken 2013: printf(" lblkno %" PRId64 "\n", bp->b_lblkno);
1.1 hannken 2014: }
2015: #endif
2016: /*
2017: * If we have already read the old block contents, then
2018: * simply copy them to the new block. Note that we need
2019: * to synchronously write snapshots that have not been
2020: * unlinked, and hence will be visible after a crash,
2021: * to ensure their integrity.
2022: */
1.46.6.2 jmcneill 2023: mutex_exit(&si->si_lock);
2024: if (saved_data == NULL) {
2025: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
2026: if ((error = readfsblk(vp, saved_data, lbn)) != 0) {
2027: free(saved_data, M_UFSMNT);
2028: saved_data = NULL;
2029: mutex_enter(&si->si_lock);
1.1 hannken 2030: break;
1.46.6.2 jmcneill 2031: }
1.1 hannken 2032: }
1.46.6.2 jmcneill 2033: error = writevnblk(vp, saved_data, lbn);
2034: mutex_enter(&si->si_lock);
2035: if (error)
1.1 hannken 2036: break;
1.46.6.2 jmcneill 2037: if (gen != si->si_gen)
2038: goto retry;
1.1 hannken 2039: }
2040: /*
2041: * Note that we need to synchronously write snapshots that
2042: * have not been unlinked, and hence will be visible after
2043: * a crash, to ensure their integrity.
2044: */
1.46.6.2 jmcneill 2045: mutex_exit(&si->si_lock);
2046: if (saved_data)
1.1 hannken 2047: free(saved_data, M_UFSMNT);
2048: if (snapshot_locked)
2049: VOP_UNLOCK(vp, 0);
2050: return error;
2051: }
2052:
2053: /*
2054: * Read the specified block from disk. Vp is usually a snapshot vnode.
2055: */
2056: static int
1.43 christos 2057: readfsblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
1.1 hannken 2058: {
1.24 yamt 2059: int error;
1.1 hannken 2060: struct inode *ip = VTOI(vp);
2061: struct fs *fs = ip->i_fs;
2062: struct buf *nbp;
2063:
1.24 yamt 2064: nbp = getiobuf();
1.1 hannken 2065: nbp->b_flags = B_READ;
2066: nbp->b_bcount = nbp->b_bufsize = fs->fs_bsize;
2067: nbp->b_error = 0;
2068: nbp->b_data = data;
2069: nbp->b_blkno = nbp->b_rawblkno = fsbtodb(fs, blkstofrags(fs, lbn));
2070: nbp->b_proc = NULL;
2071: nbp->b_dev = ip->i_devvp->v_rdev;
2072: nbp->b_vp = NULLVP;
2073:
1.44 ad 2074: bdev_strategy(nbp);
1.1 hannken 2075:
2076: error = biowait(nbp);
2077:
1.24 yamt 2078: putiobuf(nbp);
1.1 hannken 2079:
2080: return error;
2081: }
2082:
1.37 christos 2083: #if !defined(FFS_NO_SNAPSHOT)
1.1 hannken 2084: /*
1.4 hannken 2085: * Read the specified block. Bypass UBC to prevent deadlocks.
1.1 hannken 2086: */
2087: static int
1.43 christos 2088: readvnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
1.1 hannken 2089: {
1.4 hannken 2090: int error;
2091: daddr_t bn;
2092: off_t offset;
1.1 hannken 2093: struct inode *ip = VTOI(vp);
2094: struct fs *fs = ip->i_fs;
2095:
1.4 hannken 2096: error = VOP_BMAP(vp, lbn, NULL, &bn, NULL);
2097: if (error)
2098: return error;
1.1 hannken 2099:
1.4 hannken 2100: if (bn != (daddr_t)-1) {
2101: offset = dbtob(bn);
2102: simple_lock(&vp->v_interlock);
2103: error = VOP_PUTPAGES(vp, trunc_page(offset),
2104: round_page(offset+fs->fs_bsize),
2105: PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
1.1 hannken 2106: if (error)
1.4 hannken 2107: return error;
2108:
2109: return readfsblk(vp, data, fragstoblks(fs, dbtofsb(fs, bn)));
1.1 hannken 2110: }
2111:
1.4 hannken 2112: bzero(data, fs->fs_bsize);
2113:
2114: return 0;
1.1 hannken 2115: }
1.37 christos 2116: #endif /* !defined(FFS_NO_SNAPSHOT) */
1.1 hannken 2117:
2118: /*
1.4 hannken 2119: * Write the specified block. Bypass UBC to prevent deadlocks.
1.1 hannken 2120: */
2121: static int
1.43 christos 2122: writevnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
1.1 hannken 2123: {
1.4 hannken 2124: int s, error;
2125: off_t offset;
2126: struct buf *bp;
1.1 hannken 2127: struct inode *ip = VTOI(vp);
2128: struct fs *fs = ip->i_fs;
2129:
1.4 hannken 2130: offset = lblktosize(fs, (off_t)lbn);
2131: s = cow_enter();
2132: simple_lock(&vp->v_interlock);
2133: error = VOP_PUTPAGES(vp, trunc_page(offset),
2134: round_page(offset+fs->fs_bsize), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
2135: if (error == 0)
1.22 yamt 2136: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.4 hannken 2137: fs->fs_bsize, KERNCRED, B_SYNC, &bp);
2138: cow_leave(s);
1.1 hannken 2139: if (error)
2140: return error;
2141:
1.4 hannken 2142: bcopy(data, bp->b_data, fs->fs_bsize);
2143: bp->b_flags |= B_NOCACHE;
2144:
2145: return bwrite(bp);
2146: }
2147:
2148: /*
1.41 ad 2149: * Set/reset lwp's LP_UFSCOW flag.
1.4 hannken 2150: * May be called recursive.
2151: */
2152: static inline int
2153: cow_enter(void)
2154: {
2155: struct lwp *l = curlwp;
2156:
1.41 ad 2157: if (l->l_pflag & LP_UFSCOW) {
1.4 hannken 2158: return 0;
2159: } else {
1.41 ad 2160: l->l_pflag |= LP_UFSCOW;
2161: return LP_UFSCOW;
1.1 hannken 2162: }
1.4 hannken 2163: }
2164:
2165: static inline void
2166: cow_leave(int flag)
2167: {
2168: struct lwp *l = curlwp;
1.1 hannken 2169:
1.41 ad 2170: l->l_pflag &= ~flag;
1.1 hannken 2171: }
2172:
2173: /*
2174: * Get/Put direct block from inode or buffer containing disk addresses. Take
2175: * care for fs type (UFS1/UFS2) and byte swapping. These functions should go
2176: * into a global include.
2177: */
2178: static inline ufs2_daddr_t
2179: db_get(struct inode *ip, int loc)
2180: {
2181: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2182: return ufs_rw32(ip->i_ffs1_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2183: else
1.2 hannken 2184: return ufs_rw64(ip->i_ffs2_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2185: }
2186:
2187: static inline void
2188: db_assign(struct inode *ip, int loc, ufs2_daddr_t val)
2189: {
2190: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2191: ip->i_ffs1_db[loc] = ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2192: else
1.2 hannken 2193: ip->i_ffs2_db[loc] = ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2194: }
2195:
2196: static inline ufs2_daddr_t
1.43 christos 2197: idb_get(struct inode *ip, void *bf, int loc)
1.1 hannken 2198: {
2199: if (ip->i_ump->um_fstype == UFS1)
1.17 christos 2200: return ufs_rw32(((ufs1_daddr_t *)(bf))[loc],
1.2 hannken 2201: UFS_IPNEEDSWAP(ip));
1.1 hannken 2202: else
1.17 christos 2203: return ufs_rw64(((ufs2_daddr_t *)(bf))[loc],
1.2 hannken 2204: UFS_IPNEEDSWAP(ip));
1.1 hannken 2205: }
2206:
2207: static inline void
1.43 christos 2208: idb_assign(struct inode *ip, void *bf, int loc, ufs2_daddr_t val)
1.1 hannken 2209: {
2210: if (ip->i_ump->um_fstype == UFS1)
1.17 christos 2211: ((ufs1_daddr_t *)(bf))[loc] =
1.2 hannken 2212: ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2213: else
1.17 christos 2214: ((ufs2_daddr_t *)(bf))[loc] =
1.2 hannken 2215: ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2216: }
CVSweb <webmaster@jp.NetBSD.org>