Annotation of src/sys/ufs/ffs/ffs_snapshot.c, Revision 1.5
1.1 hannken 1: /*
2: * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
3: *
4: * Further information about snapshots can be obtained from:
5: *
6: * Marshall Kirk McKusick http://www.mckusick.com/softdep/
7: * 1614 Oxford Street mckusick@mckusick.com
8: * Berkeley, CA 94709-1608 +1-510-843-9542
9: * USA
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: *
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: *
21: * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
22: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24: * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
25: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31: * SUCH DAMAGE.
32: *
33: * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
34: *
35: * from FreeBSD: ffs_snapshot.c,v 1.79 2004/02/13 02:02:06 kuriyama Exp
36: */
37:
38: #include <sys/cdefs.h>
1.5 ! hannken 39: __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.4 2004/06/20 18:55:58 hannken Exp $");
1.1 hannken 40:
41: #include <sys/param.h>
42: #include <sys/kernel.h>
43: #include <sys/systm.h>
44: #include <sys/conf.h>
45: #include <sys/buf.h>
46: #include <sys/proc.h>
47: #include <sys/namei.h>
48: #include <sys/sched.h>
49: #include <sys/stat.h>
50: #include <sys/malloc.h>
51: #include <sys/mount.h>
52: #include <sys/resource.h>
53: #include <sys/resourcevar.h>
54: #include <sys/vnode.h>
55:
56: #include <miscfs/specfs/specdev.h>
57:
58: #include <ufs/ufs/quota.h>
59: #include <ufs/ufs/ufsmount.h>
60: #include <ufs/ufs/inode.h>
61: #include <ufs/ufs/ufs_extern.h>
62: #include <ufs/ufs/ufs_bswap.h>
63:
64: #include <ufs/ffs/fs.h>
65: #include <ufs/ffs/ffs_extern.h>
66:
67: /* FreeBSD -> NetBSD conversion */
68: #define KERNCRED proc0.p_ucred
69: #define ufs1_daddr_t int32_t
70: #define ufs2_daddr_t int64_t
71: #define ufs_lbn_t daddr_t
72: #define VI_MTX(v) (&(v)->v_interlock)
73: #define VI_LOCK(v) simple_lock(&(v)->v_interlock)
74: #define VI_UNLOCK(v) simple_unlock(&(v)->v_interlock)
75: #define MNT_ILOCK(v) simple_lock(&mntvnode_slock)
76: #define MNT_IUNLOCK(v) simple_unlock(&mntvnode_slock)
77:
78: static int cgaccount(int, struct vnode *, caddr_t, int);
79: static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
80: int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
81: ufs_lbn_t, int), int);
82: static int indiracct_ufs1(struct vnode *, struct vnode *, int,
83: ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
84: int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
85: ufs_lbn_t, int), int);
86: static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
87: struct fs *, ufs_lbn_t, int);
88: static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
89: struct fs *, ufs_lbn_t, int);
90: static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
91: struct fs *, ufs_lbn_t, int);
92: static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
93: int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
94: ufs_lbn_t, int), int);
95: static int indiracct_ufs2(struct vnode *, struct vnode *, int,
96: ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
97: int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
98: ufs_lbn_t, int), int);
99: static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
100: struct fs *, ufs_lbn_t, int);
101: static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
102: struct fs *, ufs_lbn_t, int);
103: static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
104: struct fs *, ufs_lbn_t, int);
105: static int ffs_copyonwrite(void *, struct buf *);
106: static int readfsblk(struct vnode *, caddr_t, ufs2_daddr_t);
107: static int readvnblk(struct vnode *, caddr_t, ufs2_daddr_t);
108: static int writevnblk(struct vnode *, caddr_t, ufs2_daddr_t);
1.4 hannken 109: static inline int cow_enter(void);
110: static inline void cow_leave(int);
1.1 hannken 111: static inline ufs2_daddr_t db_get(struct inode *, int);
112: static inline void db_assign(struct inode *, int, ufs2_daddr_t);
113: static inline ufs2_daddr_t idb_get(struct inode *, caddr_t, int);
114: static inline void idb_assign(struct inode *, caddr_t, int, ufs2_daddr_t);
115:
116: #ifdef DEBUG
117: static int snapdebug = 0;
118: #endif
119:
120: /*
121: * Create a snapshot file and initialize it for the filesystem.
1.4 hannken 122: * Vnode is locked on entry and return.
1.1 hannken 123: */
124: int
125: ffs_snapshot(mp, vp, ctime)
126: struct mount *mp;
127: struct vnode *vp;
128: struct timespec *ctime;
129: {
130: ufs2_daddr_t numblks, blkno, *blkp, snaplistsize = 0, *snapblklist;
131: int error, ns, cg, snaploc;
132: int i, size, len, loc;
133: int flag = mp->mnt_flag;
134: struct timeval starttime;
135: #ifdef DEBUG
136: struct timeval endtime;
137: #endif
138: struct timespec ts;
139: long redo = 0;
140: int32_t *lp;
141: void *space;
142: caddr_t cgbuf;
143: struct ufsmount *ump = VFSTOUFS(mp);
144: struct fs *copy_fs = NULL, *fs = ump->um_fs;
145: struct proc *p = curproc;
146: struct inode *ip, *xp;
147: struct buf *bp, *ibp;
148: struct vattr vat;
149: struct vnode *xvp, *nvp, *devvp;
150: struct vop_vfree_args args;
151:
152: ns = UFS_FSNEEDSWAP(fs);
153: /*
154: * Need to serialize access to snapshot code per filesystem.
155: */
156: /*
157: * If the vnode already is a snapshot, return.
158: */
159: if (VTOI(vp)->i_flags & SF_SNAPSHOT) {
160: if (ctime) {
161: ctime->tv_sec = DIP(VTOI(vp), mtime);
162: ctime->tv_nsec = DIP(VTOI(vp), mtimensec);
163: }
164: return 0;
165: }
166: /*
167: * Check mount and check for exclusive reference.
168: */
1.4 hannken 169: if (vp->v_mount != mp)
1.1 hannken 170: return EXDEV;
1.4 hannken 171: if (vp->v_usecount != 1 || vp->v_writecount != 0)
1.1 hannken 172: return EBUSY;
173: if (vp->v_size != 0) {
174: error = VOP_TRUNCATE(vp, 0, 0, NOCRED, p);
1.4 hannken 175: if (error)
1.1 hannken 176: return error;
177: }
178: /*
179: * Assign a snapshot slot in the superblock.
180: */
181: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
182: if (fs->fs_snapinum[snaploc] == 0)
183: break;
184: if (snaploc == FSMAXSNAP)
185: return (ENOSPC);
186: ip = VTOI(vp);
187: devvp = ip->i_devvp;
188: /*
189: * Allocate and copy the last block contents so as to be able
190: * to set size to that of the filesystem.
191: */
192: numblks = howmany(fs->fs_size, fs->fs_frag);
193: cgbuf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
194: if ((error = readfsblk(vp, cgbuf, numblks - 1)) != 0)
195: goto out;
196: error = vn_rdwr(UIO_WRITE, vp,
197: cgbuf, fs->fs_bsize, lblktosize(fs, (off_t)(numblks - 1)),
198: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, p->p_ucred, NULL, p);
199: if (error)
200: goto out;
201: /*
202: * Preallocate critical data structures so that we can copy
203: * them in without further allocation after we suspend all
204: * operations on the filesystem. We would like to just release
205: * the allocated buffers without writing them since they will
206: * be filled in below once we are ready to go, but this upsets
207: * the soft update code, so we go ahead and write the new buffers.
208: *
209: * Allocate all indirect blocks and mark all of them as not
210: * needing to be copied.
211: */
212: for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
213: error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
214: fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
215: if (error)
216: goto out;
217: bwrite(ibp);
218: }
219: /*
220: * Allocate copies for the superblock and its summary information.
221: */
222: bzero(cgbuf, fs->fs_bsize);
223: blkno = lblkno(fs, fs->fs_sblockloc);
224: for (loc = 0; loc < howmany(fs->fs_sbsize, fs->fs_bsize); loc++)
225: if ((error = writevnblk(vp, cgbuf, blkno + loc)) != 0)
226: goto out;
227: blkno = fragstoblks(fs, fs->fs_csaddr);
228: for (loc = 0; loc < howmany(fs->fs_cssize, fs->fs_bsize); loc++)
229: if ((error = writevnblk(vp, cgbuf, blkno + loc)) != 0)
230: goto out;
231: /*
232: * Allocate all cylinder group blocks.
233: */
234: for (cg = 0; cg < fs->fs_ncg; cg++)
235: if ((error = writevnblk(vp, cgbuf,
236: fragstoblks(fs, cgtod(fs, cg)))) != 0)
237: goto out;
238: /*
239: * Copy all the cylinder group maps. Although the
240: * filesystem is still active, we hope that only a few
241: * cylinder groups will change between now and when we
242: * suspend operations. Thus, we will be able to quickly
243: * touch up the few cylinder groups that changed during
244: * the suspension period.
245: */
246: len = howmany(fs->fs_ncg, NBBY);
247: MALLOC(fs->fs_active, u_char *, len, M_DEVBUF, M_WAITOK | M_ZERO);
248: for (cg = 0; cg < fs->fs_ncg; cg++) {
249: if ((error = cgaccount(cg, vp, cgbuf, 1)) != 0)
250: goto out;
251: if ((error = writevnblk(vp, cgbuf,
252: fragstoblks(fs, cgtod(fs, cg)))) != 0)
253: goto out;
254: }
255: /*
256: * Change inode to snapshot type file.
257: */
258: ip->i_flags |= SF_SNAPSHOT;
259: DIP_ASSIGN(ip, flags, ip->i_flags);
260: ip->i_flag |= IN_CHANGE | IN_UPDATE;
261: /*
262: * Ensure that the snapshot is completely on disk.
263: * Since we have marked it as a snapshot it is safe to
264: * unlock it as no process will be allowed to write to it.
265: */
266: if ((error = VOP_FSYNC(vp, KERNCRED, FSYNC_WAIT, 0, 0, p)) != 0)
267: goto out;
268: VOP_UNLOCK(vp, 0);
269: /*
270: * All allocations are done, so we can now snapshot the system.
271: *
272: * Suspend operation on filesystem.
273: */
274: if ((error = vfs_write_suspend(vp->v_mount, PUSER|PCATCH, 0)) != 0) {
275: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
276: goto out;
277: }
278: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
279: microtime(&starttime);
280: /*
281: * First, copy all the cylinder group maps that have changed.
282: */
283: for (cg = 0; cg < fs->fs_ncg; cg++) {
284: if (ACTIVECG_ISSET(fs, cg))
285: continue;
286: redo++;
287: if ((error = cgaccount(cg, vp, cgbuf, 2)) != 0)
288: goto out1;
289: if ((error = writevnblk(vp, cgbuf,
290: fragstoblks(fs, cgtod(fs, cg)))) != 0)
291: goto out1;
292: }
293: /*
294: * Grab a copy of the superblock and its summary information.
295: * We delay writing it until the suspension is released below.
296: */
297: loc = blkoff(fs, fs->fs_sblockloc);
298: if (loc > 0)
299: bzero(&cgbuf[0], loc);
300: copy_fs = (struct fs *)(cgbuf + loc);
301: bcopy(fs, copy_fs, fs->fs_sbsize);
302: size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
303: if (fs->fs_sbsize < size)
304: bzero(&cgbuf[loc + fs->fs_sbsize], size - fs->fs_sbsize);
305: size = blkroundup(fs, fs->fs_cssize);
306: if (fs->fs_contigsumsize > 0)
307: size += fs->fs_ncg * sizeof(int32_t);
308: space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
309: copy_fs->fs_csp = space;
310: bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize);
311: (char *)space += fs->fs_cssize;
312: loc = howmany(fs->fs_cssize, fs->fs_fsize);
313: i = fs->fs_frag - loc % fs->fs_frag;
314: len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
315: if (len > 0) {
316: if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc),
317: len, KERNCRED, &bp)) != 0) {
318: brelse(bp);
319: free(copy_fs->fs_csp, M_UFSMNT);
320: goto out1;
321: }
322: bcopy(bp->b_data, space, (u_int)len);
323: (char *)space += len;
324: bp->b_flags |= B_INVAL | B_NOCACHE;
325: brelse(bp);
326: }
327: if (fs->fs_contigsumsize > 0) {
328: copy_fs->fs_maxcluster = lp = space;
329: for (i = 0; i < fs->fs_ncg; i++)
330: *lp++ = fs->fs_contigsumsize;
331: }
332: /*
333: * We must check for active files that have been unlinked
334: * (e.g., with a zero link count). We have to expunge all
335: * trace of these files from the snapshot so that they are
336: * not reclaimed prematurely by fsck or unnecessarily dumped.
337: * We turn off the MNTK_SUSPENDED flag to avoid a panic from
338: * spec_strategy about writing on a suspended filesystem.
339: * Note that we skip unlinked snapshot files as they will
340: * be handled separately below.
341: *
342: * We also calculate the needed size for the snapshot list.
343: */
344: snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
345: FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
346: MNT_ILOCK(mp);
347: loop:
348: for (xvp = LIST_FIRST(&mp->mnt_vnodelist); xvp; xvp = nvp) {
349: /*
350: * Make sure this vnode wasn't reclaimed in getnewvnode().
351: * Start over if it has (it won't be on the list anymore).
352: */
353: if (xvp->v_mount != mp)
354: goto loop;
355: nvp = LIST_NEXT(xvp, v_mntvnodes);
356: VI_LOCK(xvp);
357: MNT_IUNLOCK(mp);
358: if ((xvp->v_flag & VXLOCK) ||
359: xvp->v_usecount == 0 || xvp->v_type == VNON ||
360: (VTOI(xvp)->i_flags & SF_SNAPSHOT)) {
361: VI_UNLOCK(xvp);
362: MNT_ILOCK(mp);
363: continue;
364: }
365: if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) {
366: MNT_ILOCK(mp);
367: goto loop;
368: }
369: #ifdef DEBUG
370: if (snapdebug)
371: vprint("ffs_snapshot: busy vnode", xvp);
372: #endif
373: if (VOP_GETATTR(xvp, &vat, p->p_ucred, p) == 0 &&
374: vat.va_nlink > 0) {
375: VOP_UNLOCK(xvp, 0);
376: MNT_ILOCK(mp);
377: continue;
378: }
379: xp = VTOI(xvp);
380: if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) {
381: VOP_UNLOCK(xvp, 0);
382: MNT_ILOCK(mp);
383: continue;
384: }
385: /*
386: * If there is a fragment, clear it here.
387: */
388: blkno = 0;
389: loc = howmany(xp->i_size, fs->fs_bsize) - 1;
390: if (loc < NDADDR) {
391: len = fragroundup(fs, blkoff(fs, xp->i_size));
1.5 ! hannken 392: if (len > 0 && len < fs->fs_bsize) {
1.1 hannken 393: ffs_blkfree(copy_fs, vp, db_get(xp, loc),
394: len, xp->i_number);
395: blkno = db_get(xp, loc);
396: db_assign(xp, loc, 0);
397: }
398: }
399: snaplistsize += 1;
400: if (xp->i_ump->um_fstype == UFS1)
401: error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
402: BLK_NOCOPY);
403: else
404: error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
405: BLK_NOCOPY);
406: if (blkno)
407: db_assign(xp, loc, blkno);
408: if (!error) {
409: args.a_pvp = vp;
410: args.a_ino = xp->i_number;
411: args.a_mode = xp->i_mode;
412: error = ffs_freefile(&args);
413: }
414: VOP_UNLOCK(xvp, 0);
415: if (error) {
416: free(copy_fs->fs_csp, M_UFSMNT);
417: goto out1;
418: }
419: MNT_ILOCK(mp);
420: }
421: MNT_IUNLOCK(mp);
422: /*
423: * If there already exist snapshots on this filesystem, grab a
424: * reference to their shared lock. If this is the first snapshot
425: * on this filesystem, we need to allocate a lock for the snapshots
426: * to share. In either case, acquire the snapshot lock and give
427: * up our original private lock.
428: */
429: VI_LOCK(devvp);
430: if ((xp = TAILQ_FIRST(&ump->um_snapshots)) != NULL) {
431: struct lock *lkp;
432:
433: lkp = ITOV(xp)->v_vnlock;
434: VI_UNLOCK(devvp);
435: VI_LOCK(vp);
436: vp->v_vnlock = lkp;
437: } else {
438: struct lock *lkp;
439:
440: VI_UNLOCK(devvp);
441: MALLOC(lkp, struct lock *, sizeof(struct lock), M_UFSMNT,
442: M_WAITOK);
443: lockinit(lkp, PVFS, "snaplk", 0, LK_CANRECURSE);
444: VI_LOCK(vp);
445: vp->v_vnlock = lkp;
446: }
447: vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY);
448: transferlockers(&vp->v_lock, vp->v_vnlock);
449: lockmgr(&vp->v_lock, LK_RELEASE, NULL);
450: /*
451: * If this is the first snapshot on this filesystem, then we need
452: * to allocate the space for the list of preallocated snapshot blocks.
453: * This list will be refined below, but this preliminary one will
454: * keep us out of deadlock until the full one is ready.
455: */
456: if (xp == NULL) {
457: MALLOC(snapblklist, ufs2_daddr_t *,
458: snaplistsize * sizeof(ufs2_daddr_t), M_UFSMNT, M_WAITOK);
459: blkp = &snapblklist[1];
460: *blkp++ = ufs_rw64(lblkno(fs, fs->fs_sblockloc), ns);
461: blkno = fragstoblks(fs, fs->fs_csaddr);
462: for (cg = 0; cg < fs->fs_ncg; cg++) {
463: if (fragstoblks(fs, cgtod(fs, cg) > blkno))
464: break;
465: *blkp++ = ufs_rw64(fragstoblks(fs, cgtod(fs, cg)), ns);
466: }
467: len = howmany(fs->fs_cssize, fs->fs_bsize);
468: for (loc = 0; loc < len; loc++)
469: *blkp++ = ufs_rw64(blkno + loc, ns);
470: for (; cg < fs->fs_ncg; cg++)
471: *blkp++ = ufs_rw64(fragstoblks(fs, cgtod(fs, cg)), ns);
472: snapblklist[0] = ufs_rw64(blkp - snapblklist, ns);
473: VI_LOCK(devvp);
474: if (ump->um_snapblklist != NULL)
475: panic("ffs_snapshot: non-empty list");
476: ump->um_snapblklist = snapblklist;
477: ump->um_snaplistsize = blkp - snapblklist;
478: VI_UNLOCK(devvp);
479: }
480: /*
481: * Record snapshot inode. Since this is the newest snapshot,
482: * it must be placed at the end of the list.
483: */
484: VI_LOCK(devvp);
485: fs->fs_snapinum[snaploc] = ip->i_number;
486: if (ip->i_nextsnap.tqe_prev != 0)
487: panic("ffs_snapshot: %d already on list", ip->i_number);
488: TAILQ_INSERT_TAIL(&ump->um_snapshots, ip, i_nextsnap);
489: VI_UNLOCK(devvp);
490: if (xp == NULL)
491: vn_cow_establish(devvp, ffs_copyonwrite, devvp);
492: vp->v_flag |= VSYSTEM;
493: out1:
494: /*
495: * Resume operation on filesystem.
496: */
497: vfs_write_resume(vp->v_mount);
498: /*
499: * Set the mtime to the time the snapshot has been taken.
500: */
501: TIMEVAL_TO_TIMESPEC(&starttime, &ts);
502: if (ctime)
503: *ctime = ts;
504: DIP_ASSIGN(ip, mtime, ts.tv_sec);
505: DIP_ASSIGN(ip, mtimensec, ts.tv_nsec);
506: ip->i_flag |= IN_CHANGE | IN_UPDATE;
507:
508: #ifdef DEBUG
509: if (starttime.tv_sec > 0) {
510: microtime(&endtime);
511: timersub(&endtime, &starttime, &endtime);
512: printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
513: vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
514: endtime.tv_usec / 1000, redo, fs->fs_ncg);
515: }
516: #endif
517: if (error)
518: goto out;
519: /*
520: * Copy allocation information from all the snapshots in
521: * this snapshot and then expunge them from its view.
522: */
523: TAILQ_FOREACH(xp, &ump->um_snapshots, i_nextsnap) {
524: if (xp == ip)
525: break;
526: if (xp->i_ump->um_fstype == UFS1)
527: error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
528: BLK_SNAP);
529: else
530: error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
531: BLK_SNAP);
532: if (error) {
533: fs->fs_snapinum[snaploc] = 0;
534: goto done;
535: }
536: }
537: /*
538: * Allocate space for the full list of preallocated snapshot blocks.
539: */
540: MALLOC(snapblklist, ufs2_daddr_t *, snaplistsize * sizeof(ufs2_daddr_t),
541: M_UFSMNT, M_WAITOK);
542: ip->i_snapblklist = &snapblklist[1];
543: /*
544: * Expunge the blocks used by the snapshots from the set of
545: * blocks marked as used in the snapshot bitmaps. Also, collect
546: * the list of allocated blocks in i_snapblklist.
547: */
548: if (ip->i_ump->um_fstype == UFS1)
549: error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP);
550: else
551: error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP);
552: if (error) {
553: fs->fs_snapinum[snaploc] = 0;
554: FREE(snapblklist, M_UFSMNT);
555: goto done;
556: }
557: if (snaplistsize < ip->i_snapblklist - snapblklist)
558: panic("ffs_snapshot: list too small");
559: snaplistsize = ip->i_snapblklist - snapblklist;
560: snapblklist[0] = ufs_rw64(snaplistsize, ns);
561: ip->i_snapblklist = 0;
562: /*
563: * Write out the list of allocated blocks to the end of the snapshot.
564: */
565: error = vn_rdwr(UIO_WRITE, vp,
566: (caddr_t)snapblklist, snaplistsize*sizeof(ufs2_daddr_t), ip->i_size,
567: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, p->p_ucred, NULL, p);
568: if (error) {
569: fs->fs_snapinum[snaploc] = 0;
570: FREE(snapblklist, M_UFSMNT);
571: goto done;
572: }
573: /*
574: * Write the superblock and its summary information
575: * to the snapshot.
576: */
577: blkno = fragstoblks(fs, fs->fs_csaddr);
578: len = howmany(fs->fs_cssize, fs->fs_bsize);
579: space = copy_fs->fs_csp;
580: if (ns) {
581: ffs_sb_swap(copy_fs, copy_fs);
582: ffs_csum_swap(space, space, fs->fs_cssize);
583: }
584: for (loc = 0; loc < len; loc++) {
585: if ((error = writevnblk(vp, space, blkno + loc)) != 0) {
586: fs->fs_snapinum[snaploc] = 0;
587: FREE(snapblklist, M_UFSMNT);
588: goto done;
589: }
590: space = (char *)space + fs->fs_bsize;
591: }
592: /*
593: * As this is the newest list, it is the most inclusive, so
594: * should replace the previous list.
595: */
596: VI_LOCK(devvp);
597: space = ump->um_snapblklist;
598: ump->um_snapblklist = snapblklist;
599: ump->um_snaplistsize = snaplistsize;
600: VI_UNLOCK(devvp);
601: if (space != NULL)
602: FREE(space, M_UFSMNT);
603: done:
604: free(copy_fs->fs_csp, M_UFSMNT);
605: blkno = lblkno(fs, fs->fs_sblockloc);
606: if (error == 0 && (error = writevnblk(vp, cgbuf, blkno)) != 0)
607: fs->fs_snapinum[snaploc] = 0;
608: out:
1.4 hannken 609: /*
610: * All block address modifications are done. Invalidate and free
611: * all pages on the snapshot vnode. Those coming from read ahead
612: * are no longer valid.
613: */
614: if (!error) {
615: simple_lock(&vp->v_interlock);
616: error = VOP_PUTPAGES(vp, 0, 0,
617: PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
618: }
1.1 hannken 619: if (cgbuf)
620: free(cgbuf, M_UFSMNT);
621: if (fs->fs_active != 0) {
622: FREE(fs->fs_active, M_DEVBUF);
623: fs->fs_active = 0;
624: }
625: mp->mnt_flag = flag;
626: if (error)
627: (void) VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, p);
628: else
629: vref(vp);
630: return (error);
631: }
632:
633: /*
634: * Copy a cylinder group map. All the unallocated blocks are marked
635: * BLK_NOCOPY so that the snapshot knows that it need not copy them
636: * if they are later written. If passno is one, then this is a first
637: * pass, so only setting needs to be done. If passno is 2, then this
638: * is a revision to a previous pass which must be undone as the
639: * replacement pass is done.
640: */
641: static int
642: cgaccount(cg, vp, data, passno)
643: int cg;
644: struct vnode *vp;
645: caddr_t data;
646: int passno;
647: {
648: struct buf *bp, *ibp;
649: struct inode *ip;
650: struct cg *cgp;
651: struct fs *fs;
652: ufs2_daddr_t base, numblks;
653: int error, len, loc, ns, indiroff;
654:
655: ip = VTOI(vp);
656: fs = ip->i_fs;
657: ns = UFS_FSNEEDSWAP(fs);
658: error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
659: (int)fs->fs_cgsize, KERNCRED, &bp);
660: if (error) {
661: brelse(bp);
662: return (error);
663: }
664: cgp = (struct cg *)bp->b_data;
665: if (!cg_chkmagic(cgp, ns)) {
666: brelse(bp);
667: return (EIO);
668: }
669: ACTIVECG_SET(fs, cg);
670:
671: bcopy(bp->b_data, data, fs->fs_cgsize);
672: brelse(bp);
673: if (fs->fs_cgsize < fs->fs_bsize)
674: bzero(&data[fs->fs_cgsize],
675: fs->fs_bsize - fs->fs_cgsize);
676: numblks = howmany(fs->fs_size, fs->fs_frag);
677: len = howmany(fs->fs_fpg, fs->fs_frag);
678: base = cg * fs->fs_fpg / fs->fs_frag;
679: if (base + len >= numblks)
680: len = numblks - base - 1;
681: loc = 0;
682: if (base < NDADDR) {
683: for ( ; loc < NDADDR; loc++) {
684: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
685: db_assign(ip, loc, BLK_NOCOPY);
686: else if (db_get(ip, loc) == BLK_NOCOPY) {
687: if (passno == 2)
688: db_assign(ip, loc, 0);
689: else if (passno == 1)
690: panic("ffs_snapshot: lost direct block");
691: }
692: }
693: }
694: if ((error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
695: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp)) != 0)
696: return (error);
697: indiroff = (base + loc - NDADDR) % NINDIR(fs);
698: for ( ; loc < len; loc++, indiroff++) {
699: if (indiroff >= NINDIR(fs)) {
700: bwrite(ibp);
701: if ((error = VOP_BALLOC(vp,
702: lblktosize(fs, (off_t)(base + loc)),
703: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp)) != 0)
704: return (error);
705: indiroff = 0;
706: }
707: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
708: idb_assign(ip, ibp->b_data, indiroff, BLK_NOCOPY);
709: else if (idb_get(ip, ibp->b_data, indiroff) == BLK_NOCOPY) {
710: if (passno == 2)
711: idb_assign(ip, ibp->b_data, indiroff, 0);
712: else if (passno == 1)
713: panic("ffs_snapshot: lost indirect block");
714: }
715: }
716: bwrite(ibp);
717: return (0);
718: }
719:
720: /*
721: * Before expunging a snapshot inode, note all the
722: * blocks that it claims with BLK_SNAP so that fsck will
723: * be able to account for those blocks properly and so
724: * that this snapshot knows that it need not copy them
725: * if the other snapshot holding them is freed. This code
726: * is reproduced once each for UFS1 and UFS2.
727: */
728: static int
729: expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype)
730: struct vnode *snapvp;
731: struct inode *cancelip;
732: struct fs *fs;
733: int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
734: struct fs *, ufs_lbn_t, int);
735: int expungetype;
736: {
1.4 hannken 737: int i, s, error, ns, indiroff;
1.1 hannken 738: ufs_lbn_t lbn, rlbn;
739: ufs2_daddr_t len, blkno, numblks, blksperindir;
740: struct ufs1_dinode *dip;
741: struct buf *bp;
742: caddr_t buf;
743:
744: ns = UFS_FSNEEDSWAP(fs);
745: /*
746: * Prepare to expunge the inode. If its inode block has not
747: * yet been copied, then allocate and fill the copy.
748: */
749: lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
750: blkno = 0;
751: if (lbn < NDADDR) {
752: blkno = db_get(VTOI(snapvp), lbn);
753: } else {
1.4 hannken 754: s = cow_enter();
1.1 hannken 755: error = VOP_BALLOC(snapvp, lblktosize(fs, (off_t)lbn),
756: fs->fs_bsize, KERNCRED, B_METAONLY, &bp);
1.4 hannken 757: cow_leave(s);
1.1 hannken 758: if (error)
759: return (error);
760: indiroff = (lbn - NDADDR) % NINDIR(fs);
761: blkno = idb_get(VTOI(snapvp), bp->b_data, indiroff);
762: brelse(bp);
763: }
764: buf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
765: if (blkno != 0)
766: error = readvnblk(snapvp, buf, lbn);
767: else
768: error = readfsblk(snapvp, buf, lbn);
769: if (error) {
770: free(buf, M_UFSMNT);
771: return error;
772: }
773: /*
774: * Set a snapshot inode to be a zero length file, regular files
775: * to be completely unallocated.
776: */
777: dip = (struct ufs1_dinode *)buf + ino_to_fsbo(fs, cancelip->i_number);
778: if (expungetype == BLK_NOCOPY)
779: dip->di_mode = 0;
780: dip->di_size = 0;
781: dip->di_blocks = 0;
782: dip->di_flags =
783: ufs_rw32(ufs_rw32(dip->di_flags, ns) & ~SF_SNAPSHOT, ns);
784: bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t));
785: error = writevnblk(snapvp, buf, lbn);
786: free(buf, M_UFSMNT);
787: if (error)
788: return error;
789: /*
790: * Now go through and expunge all the blocks in the file
791: * using the function requested.
792: */
793: numblks = howmany(cancelip->i_size, fs->fs_bsize);
794: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs1_db[0],
795: &cancelip->i_ffs1_db[NDADDR], fs, 0, expungetype)))
796: return (error);
797: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs1_ib[0],
798: &cancelip->i_ffs1_ib[NIADDR], fs, -1, expungetype)))
799: return (error);
800: blksperindir = 1;
801: lbn = -NDADDR;
802: len = numblks - NDADDR;
803: rlbn = NDADDR;
804: for (i = 0; len > 0 && i < NIADDR; i++) {
805: error = indiracct_ufs1(snapvp, ITOV(cancelip), i,
806: ufs_rw32(cancelip->i_ffs1_ib[i], ns), lbn, rlbn, len,
807: blksperindir, fs, acctfunc, expungetype);
808: if (error)
809: return (error);
810: blksperindir *= NINDIR(fs);
811: lbn -= blksperindir + 1;
812: len -= blksperindir;
813: rlbn += blksperindir;
814: }
815: return (0);
816: }
817:
818: /*
819: * Descend an indirect block chain for vnode cancelvp accounting for all
820: * its indirect blocks in snapvp.
821: */
822: static int
823: indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
824: blksperindir, fs, acctfunc, expungetype)
825: struct vnode *snapvp;
826: struct vnode *cancelvp;
827: int level;
828: ufs1_daddr_t blkno;
829: ufs_lbn_t lbn;
830: ufs_lbn_t rlbn;
831: ufs_lbn_t remblks;
832: ufs_lbn_t blksperindir;
833: struct fs *fs;
834: int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
835: struct fs *, ufs_lbn_t, int);
836: int expungetype;
837: {
838: int error, ns, num, i;
839: ufs_lbn_t subblksperindir;
840: struct indir indirs[NIADDR + 2];
841: ufs1_daddr_t last, *bap;
842: struct buf *bp;
843:
844: ns = UFS_FSNEEDSWAP(fs);
845:
846: if (blkno == 0) {
847: if (expungetype == BLK_NOCOPY)
848: return (0);
849: panic("indiracct_ufs1: missing indir");
850: }
851: if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
852: return (error);
853: if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
854: panic("indiracct_ufs1: botched params");
855: /*
856: * We have to expand bread here since it will deadlock looking
857: * up the block number for any blocks that are not in the cache.
858: */
859: bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
860: bp->b_blkno = fsbtodb(fs, blkno);
861: if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
862: (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
863: brelse(bp);
864: return (error);
865: }
866: /*
867: * Account for the block pointers in this indirect block.
868: */
869: last = howmany(remblks, blksperindir);
870: if (last > NINDIR(fs))
871: last = NINDIR(fs);
872: MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
873: bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
874: brelse(bp);
875: error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
876: level == 0 ? rlbn : -1, expungetype);
877: if (error || level == 0)
878: goto out;
879: /*
880: * Account for the block pointers in each of the indirect blocks
881: * in the levels below us.
882: */
883: subblksperindir = blksperindir / NINDIR(fs);
884: for (lbn++, level--, i = 0; i < last; i++) {
885: error = indiracct_ufs1(snapvp, cancelvp, level,
886: ufs_rw32(bap[i], ns), lbn, rlbn, remblks, subblksperindir,
887: fs, acctfunc, expungetype);
888: if (error)
889: goto out;
890: rlbn += blksperindir;
891: lbn -= blksperindir;
892: remblks -= blksperindir;
893: }
894: out:
895: FREE(bap, M_DEVBUF);
896: return (error);
897: }
898:
899: /*
900: * Do both snap accounting and map accounting.
901: */
902: static int
903: fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)
904: struct vnode *vp;
905: ufs1_daddr_t *oldblkp, *lastblkp;
906: struct fs *fs;
907: ufs_lbn_t lblkno;
908: int exptype; /* BLK_SNAP or BLK_NOCOPY */
909: {
910: int error;
911:
912: if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
913: return (error);
914: return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype));
915: }
916:
917: /*
918: * Identify a set of blocks allocated in a snapshot inode.
919: */
920: static int
921: snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
922: struct vnode *vp;
923: ufs1_daddr_t *oldblkp, *lastblkp;
924: struct fs *fs;
925: ufs_lbn_t lblkno;
926: int expungetype; /* BLK_SNAP or BLK_NOCOPY */
927: {
928: struct inode *ip = VTOI(vp);
929: ufs1_daddr_t blkno, *blkp;
930: ufs_lbn_t lbn;
931: struct buf *ibp;
932: int error, ns;
933:
934: ns = UFS_FSNEEDSWAP(fs);
935:
936: for ( ; oldblkp < lastblkp; oldblkp++) {
937: blkno = ufs_rw32(*oldblkp, ns);
938: if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
939: continue;
940: lbn = fragstoblks(fs, blkno);
941: if (lbn < NDADDR) {
942: blkp = &ip->i_ffs1_db[lbn];
943: ip->i_flag |= IN_CHANGE | IN_UPDATE;
944: } else {
945: error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
946: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
947: if (error)
948: return (error);
949: blkp = &((ufs1_daddr_t *)(ibp->b_data))
950: [(lbn - NDADDR) % NINDIR(fs)];
951: }
952: /*
953: * If we are expunging a snapshot vnode and we
954: * find a block marked BLK_NOCOPY, then it is
955: * one that has been allocated to this snapshot after
956: * we took our current snapshot and can be ignored.
957: */
958: blkno = ufs_rw32(*blkp, ns);
959: if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
960: if (lbn >= NDADDR)
961: brelse(ibp);
962: } else {
963: if (blkno != 0)
964: panic("snapacct_ufs1: bad block");
965: *blkp = ufs_rw32(expungetype, ns);
966: if (lbn >= NDADDR)
967: bwrite(ibp);
968: }
969: }
970: return (0);
971: }
972:
973: /*
974: * Account for a set of blocks allocated in a snapshot inode.
975: */
976: static int
977: mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
978: struct vnode *vp;
979: ufs1_daddr_t *oldblkp, *lastblkp;
980: struct fs *fs;
981: ufs_lbn_t lblkno;
982: int expungetype;
983: {
984: ufs1_daddr_t blkno;
985: struct inode *ip;
986: ino_t inum;
987: int acctit, ns;
988:
989: ns = UFS_FSNEEDSWAP(fs);
990: ip = VTOI(vp);
991: inum = ip->i_number;
992: if (lblkno == -1)
993: acctit = 0;
994: else
995: acctit = 1;
996: for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
997: blkno = ufs_rw32(*oldblkp, ns);
998: if (blkno == 0 || blkno == BLK_NOCOPY)
999: continue;
1000: if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
1001: *ip->i_snapblklist++ = ufs_rw64(lblkno, ns);
1002: if (blkno == BLK_SNAP)
1003: blkno = blkstofrags(fs, lblkno);
1004: ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
1005: }
1006: return (0);
1007: }
1008:
1009: /*
1010: * Before expunging a snapshot inode, note all the
1011: * blocks that it claims with BLK_SNAP so that fsck will
1012: * be able to account for those blocks properly and so
1013: * that this snapshot knows that it need not copy them
1014: * if the other snapshot holding them is freed. This code
1015: * is reproduced once each for UFS1 and UFS2.
1016: */
1017: static int
1018: expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype)
1019: struct vnode *snapvp;
1020: struct inode *cancelip;
1021: struct fs *fs;
1022: int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
1023: struct fs *, ufs_lbn_t, int);
1024: int expungetype;
1025: {
1.4 hannken 1026: int i, s, error, ns, indiroff;
1.1 hannken 1027: ufs_lbn_t lbn, rlbn;
1028: ufs2_daddr_t len, blkno, numblks, blksperindir;
1029: struct ufs2_dinode *dip;
1030: struct buf *bp;
1031: caddr_t buf;
1032:
1033: ns = UFS_FSNEEDSWAP(fs);
1034: /*
1035: * Prepare to expunge the inode. If its inode block has not
1036: * yet been copied, then allocate and fill the copy.
1037: */
1038: lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
1039: blkno = 0;
1040: if (lbn < NDADDR) {
1041: blkno = db_get(VTOI(snapvp), lbn);
1042: } else {
1.4 hannken 1043: s = cow_enter();
1.1 hannken 1044: error = VOP_BALLOC(snapvp, lblktosize(fs, (off_t)lbn),
1045: fs->fs_bsize, KERNCRED, B_METAONLY, &bp);
1.4 hannken 1046: cow_leave(s);
1.1 hannken 1047: if (error)
1048: return (error);
1049: indiroff = (lbn - NDADDR) % NINDIR(fs);
1050: blkno = idb_get(VTOI(snapvp), bp->b_data, indiroff);
1051: brelse(bp);
1052: }
1053: buf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1054: if (blkno != 0)
1055: error = readvnblk(snapvp, buf, lbn);
1056: else
1057: error = readfsblk(snapvp, buf, lbn);
1058: if (error) {
1059: free(buf, M_UFSMNT);
1060: return error;
1061: }
1062: /*
1063: * Set a snapshot inode to be a zero length file, regular files
1064: * to be completely unallocated.
1065: */
1066: dip = (struct ufs2_dinode *)buf + ino_to_fsbo(fs, cancelip->i_number);
1067: if (expungetype == BLK_NOCOPY)
1068: dip->di_mode = 0;
1069: dip->di_size = 0;
1070: dip->di_blocks = 0;
1071: dip->di_flags =
1072: ufs_rw32(ufs_rw32(dip->di_flags, ns) & ~SF_SNAPSHOT, ns);
1073: bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t));
1074: error = writevnblk(snapvp, buf, lbn);
1075: free(buf, M_UFSMNT);
1076: if (error)
1077: return error;
1078: /*
1079: * Now go through and expunge all the blocks in the file
1080: * using the function requested.
1081: */
1082: numblks = howmany(cancelip->i_size, fs->fs_bsize);
1083: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs2_db[0],
1084: &cancelip->i_ffs2_db[NDADDR], fs, 0, expungetype)))
1085: return (error);
1086: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs2_ib[0],
1087: &cancelip->i_ffs2_ib[NIADDR], fs, -1, expungetype)))
1088: return (error);
1089: blksperindir = 1;
1090: lbn = -NDADDR;
1091: len = numblks - NDADDR;
1092: rlbn = NDADDR;
1093: for (i = 0; len > 0 && i < NIADDR; i++) {
1094: error = indiracct_ufs2(snapvp, ITOV(cancelip), i,
1095: ufs_rw64(cancelip->i_ffs2_ib[i], ns), lbn, rlbn, len,
1096: blksperindir, fs, acctfunc, expungetype);
1097: if (error)
1098: return (error);
1099: blksperindir *= NINDIR(fs);
1100: lbn -= blksperindir + 1;
1101: len -= blksperindir;
1102: rlbn += blksperindir;
1103: }
1104: return (0);
1105: }
1106:
1107: /*
1108: * Descend an indirect block chain for vnode cancelvp accounting for all
1109: * its indirect blocks in snapvp.
1110: */
1111: static int
1112: indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
1113: blksperindir, fs, acctfunc, expungetype)
1114: struct vnode *snapvp;
1115: struct vnode *cancelvp;
1116: int level;
1117: ufs2_daddr_t blkno;
1118: ufs_lbn_t lbn;
1119: ufs_lbn_t rlbn;
1120: ufs_lbn_t remblks;
1121: ufs_lbn_t blksperindir;
1122: struct fs *fs;
1123: int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
1124: struct fs *, ufs_lbn_t, int);
1125: int expungetype;
1126: {
1127: int error, ns, num, i;
1128: ufs_lbn_t subblksperindir;
1129: struct indir indirs[NIADDR + 2];
1130: ufs2_daddr_t last, *bap;
1131: struct buf *bp;
1132:
1133: ns = UFS_FSNEEDSWAP(fs);
1134:
1135: if (blkno == 0) {
1136: if (expungetype == BLK_NOCOPY)
1137: return (0);
1138: panic("indiracct_ufs2: missing indir");
1139: }
1140: if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
1141: return (error);
1142: if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
1143: panic("indiracct_ufs2: botched params");
1144: /*
1145: * We have to expand bread here since it will deadlock looking
1146: * up the block number for any blocks that are not in the cache.
1147: */
1148: bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
1149: bp->b_blkno = fsbtodb(fs, blkno);
1150: if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
1151: (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
1152: brelse(bp);
1153: return (error);
1154: }
1155: /*
1156: * Account for the block pointers in this indirect block.
1157: */
1158: last = howmany(remblks, blksperindir);
1159: if (last > NINDIR(fs))
1160: last = NINDIR(fs);
1161: MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
1162: bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
1163: brelse(bp);
1164: error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
1165: level == 0 ? rlbn : -1, expungetype);
1166: if (error || level == 0)
1167: goto out;
1168: /*
1169: * Account for the block pointers in each of the indirect blocks
1170: * in the levels below us.
1171: */
1172: subblksperindir = blksperindir / NINDIR(fs);
1173: for (lbn++, level--, i = 0; i < last; i++) {
1174: error = indiracct_ufs2(snapvp, cancelvp, level,
1175: ufs_rw64(bap[i], ns), lbn, rlbn, remblks, subblksperindir,
1176: fs, acctfunc, expungetype);
1177: if (error)
1178: goto out;
1179: rlbn += blksperindir;
1180: lbn -= blksperindir;
1181: remblks -= blksperindir;
1182: }
1183: out:
1184: FREE(bap, M_DEVBUF);
1185: return (error);
1186: }
1187:
1188: /*
1189: * Do both snap accounting and map accounting.
1190: */
1191: static int
1192: fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)
1193: struct vnode *vp;
1194: ufs2_daddr_t *oldblkp, *lastblkp;
1195: struct fs *fs;
1196: ufs_lbn_t lblkno;
1197: int exptype; /* BLK_SNAP or BLK_NOCOPY */
1198: {
1199: int error;
1200:
1201: if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
1202: return (error);
1203: return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype));
1204: }
1205:
1206: /*
1207: * Identify a set of blocks allocated in a snapshot inode.
1208: */
1209: static int
1210: snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
1211: struct vnode *vp;
1212: ufs2_daddr_t *oldblkp, *lastblkp;
1213: struct fs *fs;
1214: ufs_lbn_t lblkno;
1215: int expungetype; /* BLK_SNAP or BLK_NOCOPY */
1216: {
1217: struct inode *ip = VTOI(vp);
1218: ufs2_daddr_t blkno, *blkp;
1219: ufs_lbn_t lbn;
1220: struct buf *ibp;
1221: int error, ns;
1222:
1223: ns = UFS_FSNEEDSWAP(fs);
1224:
1225: for ( ; oldblkp < lastblkp; oldblkp++) {
1226: blkno = ufs_rw64(*oldblkp, ns);
1227: if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
1228: continue;
1229: lbn = fragstoblks(fs, blkno);
1230: if (lbn < NDADDR) {
1231: blkp = &ip->i_ffs2_db[lbn];
1232: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1233: } else {
1234: error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
1235: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1236: if (error)
1237: return (error);
1238: blkp = &((ufs2_daddr_t *)(ibp->b_data))
1239: [(lbn - NDADDR) % NINDIR(fs)];
1240: }
1241: /*
1242: * If we are expunging a snapshot vnode and we
1243: * find a block marked BLK_NOCOPY, then it is
1244: * one that has been allocated to this snapshot after
1245: * we took our current snapshot and can be ignored.
1246: */
1247: blkno = ufs_rw64(*blkp, ns);
1248: if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
1249: if (lbn >= NDADDR)
1250: brelse(ibp);
1251: } else {
1252: if (blkno != 0)
1253: panic("snapacct_ufs2: bad block");
1254: *blkp = ufs_rw64(expungetype, ns);
1255: if (lbn >= NDADDR)
1256: bwrite(ibp);
1257: }
1258: }
1259: return (0);
1260: }
1261:
1262: /*
1263: * Account for a set of blocks allocated in a snapshot inode.
1264: */
1265: static int
1266: mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
1267: struct vnode *vp;
1268: ufs2_daddr_t *oldblkp, *lastblkp;
1269: struct fs *fs;
1270: ufs_lbn_t lblkno;
1271: int expungetype;
1272: {
1273: ufs2_daddr_t blkno;
1274: struct inode *ip;
1275: ino_t inum;
1276: int acctit, ns;
1277:
1278: ns = UFS_FSNEEDSWAP(fs);
1279: ip = VTOI(vp);
1280: inum = ip->i_number;
1281: if (lblkno == -1)
1282: acctit = 0;
1283: else
1284: acctit = 1;
1285: for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
1286: blkno = ufs_rw64(*oldblkp, ns);
1287: if (blkno == 0 || blkno == BLK_NOCOPY)
1288: continue;
1289: if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
1290: *ip->i_snapblklist++ = ufs_rw64(lblkno, ns);
1291: if (blkno == BLK_SNAP)
1292: blkno = blkstofrags(fs, lblkno);
1293: ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
1294: }
1295: return (0);
1296: }
1297:
1298: /*
1299: * Decrement extra reference on snapshot when last name is removed.
1300: * It will not be freed until the last open reference goes away.
1301: */
1302: void
1303: ffs_snapgone(ip)
1304: struct inode *ip;
1305: {
1306: struct ufsmount *ump = VFSTOUFS(ip->i_devvp->v_specmountpoint);
1307: struct inode *xp;
1308: struct fs *fs;
1309: int snaploc;
1310:
1311: /*
1312: * Find snapshot in incore list.
1313: */
1314: TAILQ_FOREACH(xp, &ump->um_snapshots, i_nextsnap)
1315: if (xp == ip)
1316: break;
1317: if (xp != NULL)
1318: vrele(ITOV(ip));
1319: #ifdef DEBUG
1320: else if (snapdebug)
1321: printf("ffs_snapgone: lost snapshot vnode %d\n",
1322: ip->i_number);
1323: #endif
1324: /*
1325: * Delete snapshot inode from superblock. Keep list dense.
1326: */
1327: fs = ip->i_fs;
1328: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
1329: if (fs->fs_snapinum[snaploc] == ip->i_number)
1330: break;
1331: if (snaploc < FSMAXSNAP) {
1332: for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
1333: if (fs->fs_snapinum[snaploc] == 0)
1334: break;
1335: fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
1336: }
1337: fs->fs_snapinum[snaploc - 1] = 0;
1338: }
1339: }
1340:
1341: /*
1342: * Prepare a snapshot file for being removed.
1343: */
1344: void
1345: ffs_snapremove(vp)
1346: struct vnode *vp;
1347: {
1348: struct inode *ip = VTOI(vp);
1349: struct vnode *devvp = ip->i_devvp;
1350: struct fs *fs = ip->i_fs;
1351: struct ufsmount *ump = VFSTOUFS(devvp->v_specmountpoint);
1352: struct lock *lkp;
1353: struct buf *ibp;
1354: ufs2_daddr_t numblks, blkno, dblk, *snapblklist;
1355: int error, ns, loc, last;
1356:
1357: ns = UFS_FSNEEDSWAP(fs);
1358: /*
1359: * If active, delete from incore list (this snapshot may
1360: * already have been in the process of being deleted, so
1361: * would not have been active).
1362: *
1363: * Clear copy-on-write flag if last snapshot.
1364: */
1365: if (ip->i_nextsnap.tqe_prev != 0) {
1366: VI_LOCK(devvp);
1367: lockmgr(&vp->v_lock, LK_INTERLOCK | LK_EXCLUSIVE,
1368: VI_MTX(devvp));
1369: VI_LOCK(devvp);
1370: TAILQ_REMOVE(&ump->um_snapshots, ip, i_nextsnap);
1371: ip->i_nextsnap.tqe_prev = 0;
1372: lkp = vp->v_vnlock;
1373: vp->v_vnlock = &vp->v_lock;
1374: lockmgr(lkp, LK_RELEASE, NULL);
1375: if (TAILQ_FIRST(&ump->um_snapshots) != 0) {
1376: VI_UNLOCK(devvp);
1377: } else {
1378: snapblklist = ump->um_snapblklist;
1379: ump->um_snapblklist = 0;
1380: ump->um_snaplistsize = 0;
1381: lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp));
1382: lockmgr(lkp, LK_RELEASE, NULL);
1383: vn_cow_disestablish(devvp, ffs_copyonwrite, devvp);
1384: FREE(lkp, M_UFSMNT);
1385: FREE(snapblklist, M_UFSMNT);
1386: }
1387: }
1388: /*
1389: * Clear all BLK_NOCOPY fields. Pass any block claims to other
1390: * snapshots that want them (see ffs_snapblkfree below).
1391: */
1392: for (blkno = 1; blkno < NDADDR; blkno++) {
1393: dblk = db_get(ip, blkno);
1394: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1395: db_assign(ip, blkno, 0);
1396: else if ((dblk == blkstofrags(fs, blkno) &&
1397: ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
1398: ip->i_number))) {
1399: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1400: db_assign(ip, blkno, 0);
1401: }
1402: }
1403: numblks = howmany(ip->i_size, fs->fs_bsize);
1404: for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
1405: error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
1406: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1407: if (error)
1408: continue;
1409: if (fs->fs_size - blkno > NINDIR(fs))
1410: last = NINDIR(fs);
1411: else
1412: last = fs->fs_size - blkno;
1413: for (loc = 0; loc < last; loc++) {
1414: dblk = idb_get(ip, ibp->b_data, loc);
1415: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1416: idb_assign(ip, ibp->b_data, loc, 0);
1417: else if (dblk == blkstofrags(fs, blkno) &&
1418: ffs_snapblkfree(fs, ip->i_devvp, dblk,
1419: fs->fs_bsize, ip->i_number)) {
1420: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1421: idb_assign(ip, ibp->b_data, loc, 0);
1422: }
1423: }
1424: bwrite(ibp);
1425: }
1426: /*
1427: * Clear snapshot flag and drop reference.
1428: */
1429: ip->i_flags &= ~SF_SNAPSHOT;
1430: DIP_ASSIGN(ip, flags, ip->i_flags);
1431: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1432: }
1433:
1434: /*
1435: * Notification that a block is being freed. Return zero if the free
1436: * should be allowed to proceed. Return non-zero if the snapshot file
1437: * wants to claim the block. The block will be claimed if it is an
1438: * uncopied part of one of the snapshots. It will be freed if it is
1439: * either a BLK_NOCOPY or has already been copied in all of the snapshots.
1440: * If a fragment is being freed, then all snapshots that care about
1441: * it must make a copy since a snapshot file can only claim full sized
1442: * blocks. Note that if more than one snapshot file maps the block,
1443: * we can pick one at random to claim it. Since none of the snapshots
1444: * can change, we are assurred that they will all see the same unmodified
1445: * image. When deleting a snapshot file (see ffs_snapremove above), we
1446: * must push any of these claimed blocks to one of the other snapshots
1447: * that maps it. These claimed blocks are easily identified as they will
1448: * have a block number equal to their logical block number within the
1449: * snapshot. A copied block can never have this property because they
1450: * must always have been allocated from a BLK_NOCOPY location.
1451: */
1452: int
1453: ffs_snapblkfree(fs, devvp, bno, size, inum)
1454: struct fs *fs;
1455: struct vnode *devvp;
1456: ufs2_daddr_t bno;
1457: long size;
1458: ino_t inum;
1459: {
1460: struct ufsmount *ump = VFSTOUFS(devvp->v_specmountpoint);
1461: struct buf *ibp;
1462: struct inode *ip;
1463: struct vnode *vp = NULL, *saved_vp = NULL;
1464: caddr_t saved_data = NULL;
1465: ufs_lbn_t lbn;
1466: ufs2_daddr_t blkno;
1.4 hannken 1467: int s, indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0;
1.1 hannken 1468:
1469: lbn = fragstoblks(fs, bno);
1470: retry:
1471: VI_LOCK(devvp);
1472: TAILQ_FOREACH(ip, &ump->um_snapshots, i_nextsnap) {
1473: vp = ITOV(ip);
1474: /*
1475: * Lookup block being written.
1476: */
1477: if (lbn < NDADDR) {
1478: blkno = db_get(ip, lbn);
1479: } else {
1480: if (snapshot_locked == 0 &&
1481: lockmgr(vp->v_vnlock,
1482: LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
1483: VI_MTX(devvp)) != 0)
1484: goto retry;
1485: snapshot_locked = 1;
1.4 hannken 1486: s = cow_enter();
1.1 hannken 1487: error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
1488: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1.4 hannken 1489: cow_leave(s);
1.1 hannken 1490: if (error)
1491: break;
1492: indiroff = (lbn - NDADDR) % NINDIR(fs);
1493: blkno = idb_get(ip, ibp->b_data, indiroff);
1494: }
1495: /*
1496: * Check to see if block needs to be copied.
1497: */
1498: if (blkno == 0) {
1499: /*
1500: * A block that we map is being freed. If it has not
1501: * been claimed yet, we will claim or copy it (below).
1502: */
1503: claimedblk = 1;
1504: } else if (blkno == BLK_SNAP) {
1505: /*
1506: * No previous snapshot claimed the block,
1507: * so it will be freed and become a BLK_NOCOPY
1508: * (don't care) for us.
1509: */
1510: if (claimedblk)
1511: panic("snapblkfree: inconsistent block type");
1512: if (snapshot_locked == 0 &&
1513: lockmgr(vp->v_vnlock,
1514: LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT,
1515: VI_MTX(devvp)) != 0) {
1516: if (lbn >= NDADDR)
1517: brelse(ibp);
1518: vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL);
1519: goto retry;
1520: }
1521: snapshot_locked = 1;
1522: if (lbn < NDADDR) {
1523: db_assign(ip, lbn, BLK_NOCOPY);
1524: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1525: } else {
1526: idb_assign(ip, ibp->b_data, indiroff,
1527: BLK_NOCOPY);
1528: bwrite(ibp);
1529: }
1530: continue;
1531: } else /* BLK_NOCOPY or default */ {
1532: /*
1533: * If the snapshot has already copied the block
1534: * (default), or does not care about the block,
1535: * it is not needed.
1536: */
1537: if (lbn >= NDADDR)
1538: brelse(ibp);
1539: continue;
1540: }
1541: /*
1542: * If this is a full size block, we will just grab it
1543: * and assign it to the snapshot inode. Otherwise we
1544: * will proceed to copy it. See explanation for this
1545: * routine as to why only a single snapshot needs to
1546: * claim this block.
1547: */
1548: if (snapshot_locked == 0 &&
1549: lockmgr(vp->v_vnlock,
1550: LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT,
1551: VI_MTX(devvp)) != 0) {
1552: if (lbn >= NDADDR)
1553: brelse(ibp);
1554: vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL);
1555: goto retry;
1556: }
1557: snapshot_locked = 1;
1558: if (size == fs->fs_bsize) {
1559: #ifdef DEBUG
1560: if (snapdebug)
1.4 hannken 1561: printf("%s %d lbn %" PRId64 " from inum %d\n",
1.1 hannken 1562: "Grabonremove: snapino", ip->i_number,
1.4 hannken 1563: lbn, inum);
1.1 hannken 1564: #endif
1565: if (lbn < NDADDR) {
1566: db_assign(ip, lbn, bno);
1567: } else {
1568: idb_assign(ip, ibp->b_data, indiroff, bno);
1569: bwrite(ibp);
1570: }
1571: DIP_ADD(ip, blocks, btodb(size));
1572: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1573: VOP_UNLOCK(vp, 0);
1574: return (1);
1575: }
1576: if (lbn >= NDADDR)
1577: brelse(ibp);
1578: #ifdef DEBUG
1579: if (snapdebug)
1.4 hannken 1580: printf("%s%d lbn %" PRId64 " %s %d size %ld\n",
1.1 hannken 1581: "Copyonremove: snapino ", ip->i_number,
1.4 hannken 1582: lbn, "for inum", inum, size);
1.1 hannken 1583: #endif
1584: /*
1585: * If we have already read the old block contents, then
1586: * simply copy them to the new block. Note that we need
1587: * to synchronously write snapshots that have not been
1588: * unlinked, and hence will be visible after a crash,
1589: * to ensure their integrity.
1590: */
1591: if (saved_data) {
1592: error = writevnblk(vp, saved_data, lbn);
1593: if (error)
1594: break;
1595: continue;
1596: }
1597: /*
1598: * Otherwise, read the old block contents into the buffer.
1599: */
1600: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1601: saved_vp = vp;
1602: if ((error = readfsblk(vp, saved_data, lbn)) != 0) {
1603: free(saved_data, M_UFSMNT);
1604: saved_data = NULL;
1605: break;
1606: }
1607: }
1608: /*
1609: * Note that we need to synchronously write snapshots that
1610: * have not been unlinked, and hence will be visible after
1611: * a crash, to ensure their integrity.
1612: */
1613: if (saved_data) {
1614: error = writevnblk(saved_vp, saved_data, lbn);
1615: free(saved_data, M_UFSMNT);
1616: }
1617: /*
1618: * If we have been unable to allocate a block in which to do
1619: * the copy, then return non-zero so that the fragment will
1620: * not be freed. Although space will be lost, the snapshot
1621: * will stay consistent.
1622: */
1623: if (snapshot_locked)
1624: VOP_UNLOCK(vp, 0);
1625: else
1626: VI_UNLOCK(devvp);
1627: return (error);
1628: }
1629:
1630: /*
1631: * Associate snapshot files when mounting.
1632: */
1633: void
1634: ffs_snapshot_mount(mp)
1635: struct mount *mp;
1636: {
1637: struct ufsmount *ump = VFSTOUFS(mp);
1638: struct vnode *devvp = ump->um_devvp;
1639: struct fs *fs = ump->um_fs;
1640: struct proc *p = curproc;
1641: struct vnode *vp;
1642: struct inode *ip, *xp;
1643: ufs2_daddr_t snaplistsize, *snapblklist;
1644: int error, ns, snaploc, loc;
1645:
1646: ns = UFS_FSNEEDSWAP(fs);
1647: /*
1648: * XXX The following needs to be set before VOP_TRUNCATE or
1649: * VOP_READ can be called.
1650: */
1651: mp->mnt_stat.f_iosize = fs->fs_bsize;
1652: /*
1653: * Process each snapshot listed in the superblock.
1654: */
1655: vp = NULL;
1656: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
1657: if (fs->fs_snapinum[snaploc] == 0)
1658: break;
1659: if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc],
1660: &vp)) != 0) {
1661: printf("ffs_snapshot_mount: vget failed %d\n", error);
1662: continue;
1663: }
1664: ip = VTOI(vp);
1665: if ((ip->i_flags & SF_SNAPSHOT) == 0) {
1666: printf("ffs_snapshot_mount: non-snapshot inode %d\n",
1667: fs->fs_snapinum[snaploc]);
1668: vput(vp);
1669: vp = NULL;
1670: for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
1671: if (fs->fs_snapinum[loc] == 0)
1672: break;
1673: fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
1674: }
1675: fs->fs_snapinum[loc - 1] = 0;
1676: snaploc--;
1677: continue;
1678: }
1679: /*
1680: * If there already exist snapshots on this filesystem, grab a
1681: * reference to their shared lock. If this is the first snapshot
1682: * on this filesystem, we need to allocate a lock for the
1683: * snapshots to share. In either case, acquire the snapshot
1684: * lock and give up our original private lock.
1685: */
1686: VI_LOCK(devvp);
1687: if ((xp = TAILQ_FIRST(&ump->um_snapshots)) != NULL) {
1688: struct lock *lkp;
1689:
1690: lkp = ITOV(xp)->v_vnlock;
1691: VI_UNLOCK(devvp);
1692: VI_LOCK(vp);
1693: vp->v_vnlock = lkp;
1694: } else {
1695: struct lock *lkp;
1696:
1697: VI_UNLOCK(devvp);
1698: MALLOC(lkp, struct lock *, sizeof(struct lock),
1699: M_UFSMNT, M_WAITOK);
1700: lockinit(lkp, PVFS, "snaplk", 0, LK_CANRECURSE);
1701: VI_LOCK(vp);
1702: vp->v_vnlock = lkp;
1703: }
1704: vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY);
1705: transferlockers(&vp->v_lock, vp->v_vnlock);
1706: lockmgr(&vp->v_lock, LK_RELEASE, NULL);
1707: /*
1708: * Link it onto the active snapshot list.
1709: */
1710: VI_LOCK(devvp);
1711: if (ip->i_nextsnap.tqe_prev != 0)
1712: panic("ffs_snapshot_mount: %d already on list",
1713: ip->i_number);
1714: else
1715: TAILQ_INSERT_TAIL(&ump->um_snapshots, ip, i_nextsnap);
1716: vp->v_flag |= VSYSTEM;
1717: VI_UNLOCK(devvp);
1718: VOP_UNLOCK(vp, 0);
1719: }
1720: /*
1721: * No usable snapshots found.
1722: */
1723: if (vp == NULL)
1724: return;
1725: /*
1726: * Allocate the space for the block hints list. We always want to
1727: * use the list from the newest snapshot.
1728: */
1729: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1730: error = vn_rdwr(UIO_READ, vp,
1731: (caddr_t)&snaplistsize, sizeof(snaplistsize),
1732: lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1733: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, p->p_ucred, NULL, p);
1734: if (error) {
1735: printf("ffs_snapshot_mount: read_1 failed %d\n", error);
1736: VOP_UNLOCK(vp, 0);
1737: return;
1738: }
1739: snaplistsize = ufs_rw64(snaplistsize, ns);
1740: MALLOC(snapblklist, ufs2_daddr_t *, snaplistsize * sizeof(ufs2_daddr_t),
1741: M_UFSMNT, M_WAITOK);
1742: error = vn_rdwr(UIO_READ, vp,
1743: (caddr_t)snapblklist, snaplistsize * sizeof(ufs2_daddr_t),
1744: lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1745: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, p->p_ucred, NULL, p);
1746: if (error) {
1747: printf("ffs_snapshot_mount: read_2 failed %d\n", error);
1748: VOP_UNLOCK(vp, 0);
1749: FREE(snapblklist, M_UFSMNT);
1750: return;
1751: }
1752: VOP_UNLOCK(vp, 0);
1753: VI_LOCK(devvp);
1754: ump->um_snaplistsize = snaplistsize;
1755: ump->um_snapblklist = snapblklist;
1756: VI_UNLOCK(devvp);
1757: vn_cow_establish(devvp, ffs_copyonwrite, devvp);
1758: }
1759:
1760: /*
1761: * Disassociate snapshot files when unmounting.
1762: */
1763: void
1764: ffs_snapshot_unmount(mp)
1765: struct mount *mp;
1766: {
1767: struct ufsmount *ump = VFSTOUFS(mp);
1768: struct vnode *devvp = ump->um_devvp;
1769: struct lock *lkp = NULL;
1770: struct inode *xp;
1771: struct vnode *vp;
1772:
1773: VI_LOCK(devvp);
1774: while ((xp = TAILQ_FIRST(&ump->um_snapshots)) != 0) {
1775: vp = ITOV(xp);
1776: lkp = vp->v_vnlock;
1777: vp->v_vnlock = &vp->v_lock;
1778: TAILQ_REMOVE(&ump->um_snapshots, xp, i_nextsnap);
1779: xp->i_nextsnap.tqe_prev = 0;
1780: if (xp->i_ffs_effnlink > 0) {
1781: VI_UNLOCK(devvp);
1782: vrele(vp);
1783: VI_LOCK(devvp);
1784: }
1785: }
1786: if (ump->um_snapblklist != NULL) {
1787: FREE(ump->um_snapblklist, M_UFSMNT);
1788: ump->um_snapblklist = NULL;
1789: ump->um_snaplistsize = 0;
1790: }
1791: VI_UNLOCK(devvp);
1792: if (lkp != NULL) {
1793: vn_cow_disestablish(devvp, ffs_copyonwrite, devvp);
1794: FREE(lkp, M_UFSMNT);
1795: }
1796: }
1797:
1798: /*
1799: * Check for need to copy block that is about to be written,
1800: * copying the block if necessary.
1801: */
1802: static int
1803: ffs_copyonwrite(v, bp)
1804: void *v;
1805: struct buf *bp;
1806: {
1807: struct buf *ibp;
1808: struct fs *fs;
1809: struct inode *ip;
1810: struct vnode *devvp = v, *vp = 0, *saved_vp = NULL;
1811: struct ufsmount *ump = VFSTOUFS(devvp->v_specmountpoint);
1812: caddr_t saved_data = NULL;
1813: ufs2_daddr_t lbn, blkno, *snapblklist;
1.4 hannken 1814: int lower, upper, mid, s, ns, indiroff, snapshot_locked = 0, error = 0;
1.1 hannken 1815:
1816: /*
1817: * Check for valid snapshots.
1818: */
1819: VI_LOCK(devvp);
1820: ip = TAILQ_FIRST(&ump->um_snapshots);
1821: if (ip == NULL) {
1822: VI_UNLOCK(devvp);
1823: return 0;
1824: }
1825: /*
1826: * First check to see if it is in the preallocated list.
1827: * By doing this check we avoid several potential deadlocks.
1828: */
1829: fs = ip->i_fs;
1830: ns = UFS_FSNEEDSWAP(fs);
1831: lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
1832: snapblklist = ump->um_snapblklist;
1833: upper = ump->um_snaplistsize - 1;
1834: lower = 1;
1835: while (lower <= upper) {
1836: mid = (lower + upper) / 2;
1837: if (ufs_rw64(snapblklist[mid], ns) == lbn)
1838: break;
1839: if (ufs_rw64(snapblklist[mid], ns) < lbn)
1840: lower = mid + 1;
1841: else
1842: upper = mid - 1;
1843: }
1844: if (lower <= upper) {
1845: VI_UNLOCK(devvp);
1846: return 0;
1847: }
1848: /*
1849: * Not in the precomputed list, so check the snapshots.
1850: */
1851: retry:
1852: TAILQ_FOREACH(ip, &ump->um_snapshots, i_nextsnap) {
1853: vp = ITOV(ip);
1854: /*
1855: * We ensure that everything of our own that needs to be
1856: * copied will be done at the time that ffs_snapshot is
1857: * called. Thus we can skip the check here which can
1858: * deadlock in doing the lookup in VOP_BALLOC.
1859: */
1860: if (bp->b_vp == vp)
1861: continue;
1862: /*
1863: * Check to see if block needs to be copied. We do not have
1864: * to hold the snapshot lock while doing this lookup as it
1865: * will never require any additional allocations for the
1866: * snapshot inode.
1867: */
1868: if (lbn < NDADDR) {
1869: blkno = db_get(ip, lbn);
1870: } else {
1871: if (snapshot_locked == 0 &&
1872: lockmgr(vp->v_vnlock,
1873: LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
1874: VI_MTX(devvp)) != 0) {
1875: VI_LOCK(devvp);
1876: goto retry;
1877: }
1878: snapshot_locked = 1;
1.4 hannken 1879: s = cow_enter();
1.1 hannken 1880: error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
1881: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1.4 hannken 1882: cow_leave(s);
1.1 hannken 1883: if (error)
1884: break;
1885: indiroff = (lbn - NDADDR) % NINDIR(fs);
1886: blkno = idb_get(ip, ibp->b_data, indiroff);
1887: brelse(ibp);
1888: }
1889: #ifdef DIAGNOSTIC
1890: if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
1891: panic("ffs_copyonwrite: bad copy block");
1892: #endif
1893: if (blkno != 0)
1894: continue;
1.4 hannken 1895: #ifdef DIAGNOSTIC
1896: if (curlwp->l_flag & L_COWINPROGRESS)
1897: printf("ffs_copyonwrite: recursive call\n");
1898: #endif
1.1 hannken 1899: /*
1900: * Allocate the block into which to do the copy. Since
1901: * multiple processes may all try to copy the same block,
1902: * we have to recheck our need to do a copy if we sleep
1903: * waiting for the lock.
1904: *
1905: * Because all snapshots on a filesystem share a single
1906: * lock, we ensure that we will never be in competition
1907: * with another process to allocate a block.
1908: */
1909: if (snapshot_locked == 0 &&
1910: lockmgr(vp->v_vnlock,
1911: LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
1912: VI_MTX(devvp)) != 0) {
1913: VI_LOCK(devvp);
1914: goto retry;
1915: }
1916: snapshot_locked = 1;
1917: #ifdef DEBUG
1918: if (snapdebug) {
1.4 hannken 1919: printf("Copyonwrite: snapino %d lbn %" PRId64 " for ",
1920: ip->i_number, lbn);
1.1 hannken 1921: if (bp->b_vp == devvp)
1922: printf("fs metadata");
1923: else
1924: printf("inum %d", VTOI(bp->b_vp)->i_number);
1.4 hannken 1925: printf(" lblkno %" PRId64 "\n", bp->b_lblkno);
1.1 hannken 1926: }
1927: #endif
1928: /*
1929: * If we have already read the old block contents, then
1930: * simply copy them to the new block. Note that we need
1931: * to synchronously write snapshots that have not been
1932: * unlinked, and hence will be visible after a crash,
1933: * to ensure their integrity.
1934: */
1935: if (saved_data) {
1936: error = writevnblk(vp, saved_data, lbn);
1937: if (error)
1938: break;
1939: continue;
1940: }
1941: /*
1942: * Otherwise, read the old block contents into the buffer.
1943: */
1944: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1945: saved_vp = vp;
1946: if ((error = readfsblk(vp, saved_data, lbn)) != 0) {
1947: free(saved_data, M_UFSMNT);
1948: saved_data = NULL;
1949: break;
1950: }
1951: }
1952: /*
1953: * Note that we need to synchronously write snapshots that
1954: * have not been unlinked, and hence will be visible after
1955: * a crash, to ensure their integrity.
1956: */
1957: if (saved_data) {
1958: error = writevnblk(saved_vp, saved_data, lbn);
1959: free(saved_data, M_UFSMNT);
1960: }
1961: if (snapshot_locked)
1962: VOP_UNLOCK(vp, 0);
1963: else
1964: VI_UNLOCK(devvp);
1965: return error;
1966: }
1967:
1968: /*
1969: * Read the specified block from disk. Vp is usually a snapshot vnode.
1970: */
1971: static int
1972: readfsblk(vp, data, lbn)
1973: struct vnode *vp;
1974: caddr_t data;
1975: ufs2_daddr_t lbn;
1976: {
1977: int s, error;
1978: struct inode *ip = VTOI(vp);
1979: struct fs *fs = ip->i_fs;
1980: struct buf *nbp;
1981:
1982: s = splbio();
1983: nbp = pool_get(&bufpool, PR_WAITOK);
1984: splx(s);
1985:
1986: BUF_INIT(nbp);
1987: nbp->b_flags = B_READ;
1988: nbp->b_bcount = nbp->b_bufsize = fs->fs_bsize;
1989: nbp->b_error = 0;
1990: nbp->b_data = data;
1991: nbp->b_blkno = nbp->b_rawblkno = fsbtodb(fs, blkstofrags(fs, lbn));
1992: nbp->b_proc = NULL;
1993: nbp->b_dev = ip->i_devvp->v_rdev;
1994: nbp->b_vp = NULLVP;
1995:
1996: DEV_STRATEGY(nbp);
1997:
1998: error = biowait(nbp);
1999:
2000: s = splbio();
2001: pool_put(&bufpool, nbp);
2002: splx(s);
2003:
2004: return error;
2005: }
2006:
2007: /*
1.4 hannken 2008: * Read the specified block. Bypass UBC to prevent deadlocks.
1.1 hannken 2009: */
2010: static int
2011: readvnblk(vp, data, lbn)
2012: struct vnode *vp;
2013: caddr_t data;
2014: ufs2_daddr_t lbn;
2015: {
1.4 hannken 2016: int error;
2017: daddr_t bn;
2018: off_t offset;
1.1 hannken 2019: struct inode *ip = VTOI(vp);
2020: struct fs *fs = ip->i_fs;
2021:
1.4 hannken 2022: error = VOP_BMAP(vp, lbn, NULL, &bn, NULL);
2023: if (error)
2024: return error;
1.1 hannken 2025:
1.4 hannken 2026: if (bn != (daddr_t)-1) {
2027: offset = dbtob(bn);
2028: simple_lock(&vp->v_interlock);
2029: error = VOP_PUTPAGES(vp, trunc_page(offset),
2030: round_page(offset+fs->fs_bsize),
2031: PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
1.1 hannken 2032: if (error)
1.4 hannken 2033: return error;
2034:
2035: return readfsblk(vp, data, fragstoblks(fs, dbtofsb(fs, bn)));
1.1 hannken 2036: }
2037:
1.4 hannken 2038: bzero(data, fs->fs_bsize);
2039:
2040: return 0;
1.1 hannken 2041: }
2042:
2043: /*
1.4 hannken 2044: * Write the specified block. Bypass UBC to prevent deadlocks.
1.1 hannken 2045: */
2046: static int
2047: writevnblk(vp, data, lbn)
2048: struct vnode *vp;
2049: caddr_t data;
2050: ufs2_daddr_t lbn;
2051: {
1.4 hannken 2052: int s, error;
2053: off_t offset;
2054: struct buf *bp;
1.1 hannken 2055: struct inode *ip = VTOI(vp);
2056: struct fs *fs = ip->i_fs;
2057:
1.4 hannken 2058: offset = lblktosize(fs, (off_t)lbn);
2059: s = cow_enter();
2060: simple_lock(&vp->v_interlock);
2061: error = VOP_PUTPAGES(vp, trunc_page(offset),
2062: round_page(offset+fs->fs_bsize), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
2063: if (error == 0)
2064: error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
2065: fs->fs_bsize, KERNCRED, B_SYNC, &bp);
2066: cow_leave(s);
1.1 hannken 2067: if (error)
2068: return error;
2069:
1.4 hannken 2070: bcopy(data, bp->b_data, fs->fs_bsize);
2071: bp->b_flags |= B_NOCACHE;
2072:
2073: return bwrite(bp);
2074: }
2075:
2076: /*
2077: * Set/reset lwp's L_COWINPROGRESS flag.
2078: * May be called recursive.
2079: */
2080: static inline int
2081: cow_enter(void)
2082: {
2083: struct lwp *l = curlwp;
2084:
2085: if (l->l_flag & L_COWINPROGRESS) {
2086: return 0;
2087: } else {
2088: l->l_flag |= L_COWINPROGRESS;
2089: return L_COWINPROGRESS;
1.1 hannken 2090: }
1.4 hannken 2091: }
2092:
2093: static inline void
2094: cow_leave(int flag)
2095: {
2096: struct lwp *l = curlwp;
1.1 hannken 2097:
1.4 hannken 2098: l->l_flag &= ~flag;
1.1 hannken 2099: }
2100:
2101: /*
2102: * Get/Put direct block from inode or buffer containing disk addresses. Take
2103: * care for fs type (UFS1/UFS2) and byte swapping. These functions should go
2104: * into a global include.
2105: */
2106: static inline ufs2_daddr_t
2107: db_get(struct inode *ip, int loc)
2108: {
2109: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2110: return ufs_rw32(ip->i_ffs1_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2111: else
1.2 hannken 2112: return ufs_rw64(ip->i_ffs2_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2113: }
2114:
2115: static inline void
2116: db_assign(struct inode *ip, int loc, ufs2_daddr_t val)
2117: {
2118: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2119: ip->i_ffs1_db[loc] = ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2120: else
1.2 hannken 2121: ip->i_ffs2_db[loc] = ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2122: }
2123:
2124: static inline ufs2_daddr_t
2125: idb_get(struct inode *ip, caddr_t buf, int loc)
2126: {
2127: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2128: return ufs_rw32(((ufs1_daddr_t *)(buf))[loc],
2129: UFS_IPNEEDSWAP(ip));
1.1 hannken 2130: else
1.2 hannken 2131: return ufs_rw64(((ufs2_daddr_t *)(buf))[loc],
2132: UFS_IPNEEDSWAP(ip));
1.1 hannken 2133: }
2134:
2135: static inline void
2136: idb_assign(struct inode *ip, caddr_t buf, int loc, ufs2_daddr_t val)
2137: {
2138: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2139: ((ufs1_daddr_t *)(buf))[loc] =
2140: ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2141: else
1.2 hannken 2142: ((ufs2_daddr_t *)(buf))[loc] =
2143: ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2144: }
CVSweb <webmaster@jp.NetBSD.org>