Annotation of src/sys/ufs/ffs/ffs_snapshot.c, Revision 1.48
1.48 ! hannken 1: /* $NetBSD: ffs_snapshot.c,v 1.47 2007/08/09 07:34:28 hannken Exp $ */
1.18 thorpej 2:
1.1 hannken 3: /*
4: * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
5: *
6: * Further information about snapshots can be obtained from:
7: *
8: * Marshall Kirk McKusick http://www.mckusick.com/softdep/
9: * 1614 Oxford Street mckusick@mckusick.com
10: * Berkeley, CA 94709-1608 +1-510-843-9542
11: * USA
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: *
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: *
23: * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
24: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26: * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
27: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: *
35: * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
36: *
37: * from FreeBSD: ffs_snapshot.c,v 1.79 2004/02/13 02:02:06 kuriyama Exp
38: */
39:
40: #include <sys/cdefs.h>
1.48 ! hannken 41: __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.47 2007/08/09 07:34:28 hannken Exp $");
1.8 hannken 42:
43: #if defined(_KERNEL_OPT)
44: #include "opt_ffs.h"
45: #endif
1.1 hannken 46:
47: #include <sys/param.h>
48: #include <sys/kernel.h>
49: #include <sys/systm.h>
50: #include <sys/conf.h>
51: #include <sys/buf.h>
52: #include <sys/proc.h>
53: #include <sys/namei.h>
54: #include <sys/sched.h>
55: #include <sys/stat.h>
56: #include <sys/malloc.h>
57: #include <sys/mount.h>
58: #include <sys/resource.h>
59: #include <sys/resourcevar.h>
60: #include <sys/vnode.h>
1.29 elad 61: #include <sys/kauth.h>
1.40 hannken 62: #include <sys/fstrans.h>
1.1 hannken 63:
64: #include <miscfs/specfs/specdev.h>
65:
66: #include <ufs/ufs/quota.h>
67: #include <ufs/ufs/ufsmount.h>
68: #include <ufs/ufs/inode.h>
69: #include <ufs/ufs/ufs_extern.h>
70: #include <ufs/ufs/ufs_bswap.h>
71:
72: #include <ufs/ffs/fs.h>
73: #include <ufs/ffs/ffs_extern.h>
74:
75: /* FreeBSD -> NetBSD conversion */
1.31 ad 76: #define KERNCRED lwp0.l_cred
1.1 hannken 77: #define ufs1_daddr_t int32_t
78: #define ufs2_daddr_t int64_t
79: #define ufs_lbn_t daddr_t
80: #define VI_MTX(v) (&(v)->v_interlock)
81: #define VI_LOCK(v) simple_lock(&(v)->v_interlock)
82: #define VI_UNLOCK(v) simple_unlock(&(v)->v_interlock)
83: #define MNT_ILOCK(v) simple_lock(&mntvnode_slock)
84: #define MNT_IUNLOCK(v) simple_unlock(&mntvnode_slock)
85:
1.10 hannken 86: #if !defined(FFS_NO_SNAPSHOT)
1.43 christos 87: static int cgaccount(int, struct vnode *, void *, int);
1.1 hannken 88: static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
89: int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
90: ufs_lbn_t, int), int);
91: static int indiracct_ufs1(struct vnode *, struct vnode *, int,
92: ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
93: int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
94: ufs_lbn_t, int), int);
95: static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
96: struct fs *, ufs_lbn_t, int);
97: static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
98: struct fs *, ufs_lbn_t, int);
99: static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
100: struct fs *, ufs_lbn_t, int);
101: static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
102: int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
103: ufs_lbn_t, int), int);
104: static int indiracct_ufs2(struct vnode *, struct vnode *, int,
105: ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
106: int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
107: ufs_lbn_t, int), int);
108: static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
109: struct fs *, ufs_lbn_t, int);
110: static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
111: struct fs *, ufs_lbn_t, int);
112: static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
113: struct fs *, ufs_lbn_t, int);
1.43 christos 114: static int readvnblk(struct vnode *, void *, ufs2_daddr_t);
1.10 hannken 115: #endif /* !defined(FFS_NO_SNAPSHOT) */
116:
1.47 hannken 117: static void si_mount_dtor(void *);
118: static struct snap_info *si_mount_init(struct mount *);
1.1 hannken 119: static int ffs_copyonwrite(void *, struct buf *);
1.43 christos 120: static int readfsblk(struct vnode *, void *, ufs2_daddr_t);
121: static int writevnblk(struct vnode *, void *, ufs2_daddr_t);
1.4 hannken 122: static inline int cow_enter(void);
123: static inline void cow_leave(int);
1.1 hannken 124: static inline ufs2_daddr_t db_get(struct inode *, int);
125: static inline void db_assign(struct inode *, int, ufs2_daddr_t);
1.43 christos 126: static inline ufs2_daddr_t idb_get(struct inode *, void *, int);
127: static inline void idb_assign(struct inode *, void *, int, ufs2_daddr_t);
1.1 hannken 128:
1.47 hannken 129: struct snap_info {
130: TAILQ_HEAD(inodelst, inode) si_snapshots; /* List of active snapshots */
131: daddr_t *si_snapblklist; /* Snapshot block hints list */
132: };
133:
1.1 hannken 134: #ifdef DEBUG
135: static int snapdebug = 0;
136: #endif
1.47 hannken 137: static kmutex_t si_mount_init_lock;
138: static specificdata_key_t si_mount_data_key;
139:
140: void
141: ffs_snapshot_init(void)
142: {
143: int error;
144:
145: error = mount_specific_key_create(&si_mount_data_key, si_mount_dtor);
146: KASSERT(error == 0);
147: mutex_init(&si_mount_init_lock, MUTEX_DEFAULT, IPL_NONE);
148: }
149:
150: void
151: ffs_snapshot_fini(void)
152: {
153: mount_specific_key_delete(si_mount_data_key);
154: mutex_destroy(&si_mount_init_lock);
155: }
156:
157: static void
158: si_mount_dtor(void *arg)
159: {
160: struct snap_info *si = arg;
161:
162: KASSERT(TAILQ_EMPTY(&si->si_snapshots));
163: KASSERT(si->si_snapblklist == NULL);
164: free(si, M_MOUNT);
165: }
166:
167: static struct snap_info *
168: si_mount_init(struct mount *mp)
169: {
170: struct snap_info *new;
171:
172: mutex_enter(&si_mount_init_lock);
173:
174: if ((new = mount_getspecific(mp, si_mount_data_key)) != NULL) {
175: mutex_exit(&si_mount_init_lock);
176: return new;
177: }
178:
179: new = malloc(sizeof(*new), M_MOUNT, M_WAITOK);
180: TAILQ_INIT(&new->si_snapshots);
181: new->si_snapblklist = NULL;
182: mount_setspecific(mp, si_mount_data_key, new);
183: mutex_exit(&si_mount_init_lock);
184: return new;
185: }
1.1 hannken 186:
187: /*
188: * Create a snapshot file and initialize it for the filesystem.
1.4 hannken 189: * Vnode is locked on entry and return.
1.1 hannken 190: */
191: int
1.36 christos 192: ffs_snapshot(struct mount *mp, struct vnode *vp,
193: struct timespec *ctime)
1.1 hannken 194: {
1.10 hannken 195: #if defined(FFS_NO_SNAPSHOT)
196: return EOPNOTSUPP;
197: }
198: #else /* defined(FFS_NO_SNAPSHOT) */
1.1 hannken 199: ufs2_daddr_t numblks, blkno, *blkp, snaplistsize = 0, *snapblklist;
200: int error, ns, cg, snaploc;
1.15 hannken 201: int i, s, size, len, loc;
1.1 hannken 202: int flag = mp->mnt_flag;
203: struct timeval starttime;
204: #ifdef DEBUG
205: struct timeval endtime;
206: #endif
207: struct timespec ts;
208: long redo = 0;
209: int32_t *lp;
210: void *space;
1.43 christos 211: void *sbbuf = NULL;
1.47 hannken 212: struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs;
1.23 christos 213: struct lwp *l = curlwp;
1.1 hannken 214: struct inode *ip, *xp;
1.15 hannken 215: struct buf *bp, *ibp, *nbp;
1.1 hannken 216: struct vattr vat;
1.35 reinoud 217: struct vnode *xvp, *nvp, *devvp;
1.47 hannken 218: struct snap_info *si;
1.1 hannken 219:
220: ns = UFS_FSNEEDSWAP(fs);
1.47 hannken 221: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
222: si = si_mount_init(mp);
1.1 hannken 223: /*
224: * Need to serialize access to snapshot code per filesystem.
225: */
226: /*
227: * If the vnode already is a snapshot, return.
228: */
229: if (VTOI(vp)->i_flags & SF_SNAPSHOT) {
230: if (ctime) {
231: ctime->tv_sec = DIP(VTOI(vp), mtime);
232: ctime->tv_nsec = DIP(VTOI(vp), mtimensec);
233: }
234: return 0;
235: }
236: /*
1.9 hannken 237: * Check mount, exclusive reference and owner.
1.1 hannken 238: */
1.4 hannken 239: if (vp->v_mount != mp)
1.1 hannken 240: return EXDEV;
1.4 hannken 241: if (vp->v_usecount != 1 || vp->v_writecount != 0)
1.1 hannken 242: return EBUSY;
1.31 ad 243: if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
1.39 elad 244: NULL) != 0 &&
1.31 ad 245: VTOI(vp)->i_uid != kauth_cred_geteuid(l->l_cred))
1.9 hannken 246: return EACCES;
247:
1.1 hannken 248: if (vp->v_size != 0) {
1.23 christos 249: error = ffs_truncate(vp, 0, 0, NOCRED, l);
1.4 hannken 250: if (error)
1.1 hannken 251: return error;
252: }
253: /*
254: * Assign a snapshot slot in the superblock.
255: */
256: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
257: if (fs->fs_snapinum[snaploc] == 0)
258: break;
259: if (snaploc == FSMAXSNAP)
260: return (ENOSPC);
261: ip = VTOI(vp);
262: devvp = ip->i_devvp;
263: /*
1.16 hannken 264: * Write an empty list of preallocated blocks to the end of
265: * the snapshot to set size to at least that of the filesystem.
1.1 hannken 266: */
267: numblks = howmany(fs->fs_size, fs->fs_frag);
1.16 hannken 268: blkno = 1;
269: blkno = ufs_rw64(blkno, ns);
1.1 hannken 270: error = vn_rdwr(UIO_WRITE, vp,
1.43 christos 271: (void *)&blkno, sizeof(blkno), lblktosize(fs, (off_t)numblks),
1.31 ad 272: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, l->l_cred, NULL, NULL);
1.1 hannken 273: if (error)
274: goto out;
275: /*
276: * Preallocate critical data structures so that we can copy
277: * them in without further allocation after we suspend all
278: * operations on the filesystem. We would like to just release
279: * the allocated buffers without writing them since they will
280: * be filled in below once we are ready to go, but this upsets
281: * the soft update code, so we go ahead and write the new buffers.
282: *
283: * Allocate all indirect blocks and mark all of them as not
284: * needing to be copied.
285: */
286: for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
1.22 yamt 287: error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno),
1.31 ad 288: fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
1.1 hannken 289: if (error)
290: goto out;
1.15 hannken 291: bawrite(ibp);
1.1 hannken 292: }
293: /*
294: * Allocate copies for the superblock and its summary information.
295: */
1.22 yamt 296: error = ffs_balloc(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED,
1.15 hannken 297: 0, &nbp);
298: if (error)
299: goto out;
300: bawrite(nbp);
1.1 hannken 301: blkno = fragstoblks(fs, fs->fs_csaddr);
1.15 hannken 302: len = howmany(fs->fs_cssize, fs->fs_bsize);
303: for (loc = 0; loc < len; loc++) {
1.22 yamt 304: error = ffs_balloc(vp, lblktosize(fs, (off_t)(blkno + loc)),
1.15 hannken 305: fs->fs_bsize, KERNCRED, 0, &nbp);
306: if (error)
1.1 hannken 307: goto out;
1.15 hannken 308: bawrite(nbp);
309: }
1.1 hannken 310: /*
311: * Copy all the cylinder group maps. Although the
312: * filesystem is still active, we hope that only a few
313: * cylinder groups will change between now and when we
314: * suspend operations. Thus, we will be able to quickly
315: * touch up the few cylinder groups that changed during
316: * the suspension period.
317: */
318: len = howmany(fs->fs_ncg, NBBY);
1.25 christos 319: fs->fs_active = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO);
1.1 hannken 320: for (cg = 0; cg < fs->fs_ncg; cg++) {
1.22 yamt 321: if ((error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
1.15 hannken 322: fs->fs_bsize, KERNCRED, 0, &nbp)) != 0)
1.1 hannken 323: goto out;
1.15 hannken 324: error = cgaccount(cg, vp, nbp->b_data, 1);
325: bawrite(nbp);
326: if (error)
1.1 hannken 327: goto out;
328: }
329: /*
330: * Change inode to snapshot type file.
331: */
332: ip->i_flags |= SF_SNAPSHOT;
333: DIP_ASSIGN(ip, flags, ip->i_flags);
334: ip->i_flag |= IN_CHANGE | IN_UPDATE;
335: /*
336: * Ensure that the snapshot is completely on disk.
337: * Since we have marked it as a snapshot it is safe to
338: * unlock it as no process will be allowed to write to it.
339: */
1.23 christos 340: if ((error = VOP_FSYNC(vp, KERNCRED, FSYNC_WAIT, 0, 0, l)) != 0)
1.1 hannken 341: goto out;
342: VOP_UNLOCK(vp, 0);
343: /*
344: * All allocations are done, so we can now snapshot the system.
345: *
346: * Suspend operation on filesystem.
347: */
1.40 hannken 348: if ((error = vfs_suspend(vp->v_mount, 0)) != 0) {
1.1 hannken 349: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
350: goto out;
351: }
352: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.30 kardel 353: getmicrotime(&starttime);
1.1 hannken 354: /*
355: * First, copy all the cylinder group maps that have changed.
356: */
357: for (cg = 0; cg < fs->fs_ncg; cg++) {
358: if (ACTIVECG_ISSET(fs, cg))
359: continue;
360: redo++;
1.22 yamt 361: if ((error = ffs_balloc(vp, lfragtosize(fs, cgtod(fs, cg)),
1.15 hannken 362: fs->fs_bsize, KERNCRED, 0, &nbp)) != 0)
1.1 hannken 363: goto out1;
1.15 hannken 364: error = cgaccount(cg, vp, nbp->b_data, 2);
365: bawrite(nbp);
366: if (error)
1.1 hannken 367: goto out1;
368: }
369: /*
370: * Grab a copy of the superblock and its summary information.
371: * We delay writing it until the suspension is released below.
372: */
1.16 hannken 373: sbbuf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.1 hannken 374: loc = blkoff(fs, fs->fs_sblockloc);
375: if (loc > 0)
1.43 christos 376: memset(sbbuf, 0, loc);
377: copy_fs = (struct fs *)((char *)sbbuf + loc);
1.1 hannken 378: bcopy(fs, copy_fs, fs->fs_sbsize);
379: size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
380: if (fs->fs_sbsize < size)
1.43 christos 381: memset((char *)sbbuf + loc + fs->fs_sbsize, 0,
382: size - fs->fs_sbsize);
1.1 hannken 383: size = blkroundup(fs, fs->fs_cssize);
384: if (fs->fs_contigsumsize > 0)
385: size += fs->fs_ncg * sizeof(int32_t);
386: space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
387: copy_fs->fs_csp = space;
388: bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize);
1.12 yamt 389: space = (char *)space + fs->fs_cssize;
1.1 hannken 390: loc = howmany(fs->fs_cssize, fs->fs_fsize);
391: i = fs->fs_frag - loc % fs->fs_frag;
392: len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
393: if (len > 0) {
394: if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc),
395: len, KERNCRED, &bp)) != 0) {
396: brelse(bp);
397: free(copy_fs->fs_csp, M_UFSMNT);
398: goto out1;
399: }
400: bcopy(bp->b_data, space, (u_int)len);
1.12 yamt 401: space = (char *)space + len;
1.1 hannken 402: bp->b_flags |= B_INVAL | B_NOCACHE;
403: brelse(bp);
404: }
405: if (fs->fs_contigsumsize > 0) {
406: copy_fs->fs_maxcluster = lp = space;
407: for (i = 0; i < fs->fs_ncg; i++)
408: *lp++ = fs->fs_contigsumsize;
409: }
410: /*
411: * We must check for active files that have been unlinked
412: * (e.g., with a zero link count). We have to expunge all
413: * trace of these files from the snapshot so that they are
414: * not reclaimed prematurely by fsck or unnecessarily dumped.
415: * We turn off the MNTK_SUSPENDED flag to avoid a panic from
416: * spec_strategy about writing on a suspended filesystem.
417: * Note that we skip unlinked snapshot files as they will
418: * be handled separately below.
419: *
420: * We also calculate the needed size for the snapshot list.
421: */
422: snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
423: FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
424: MNT_ILOCK(mp);
425: loop:
1.35 reinoud 426: /*
427: * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
428: * and vclean() can be called indirectly
429: */
430: for (xvp = TAILQ_FIRST(&mp->mnt_vnodelist); xvp; xvp = nvp) {
1.1 hannken 431: /*
432: * Make sure this vnode wasn't reclaimed in getnewvnode().
433: * Start over if it has (it won't be on the list anymore).
434: */
435: if (xvp->v_mount != mp)
436: goto loop;
437: VI_LOCK(xvp);
1.35 reinoud 438: nvp = TAILQ_NEXT(xvp, v_mntvnodes);
1.1 hannken 439: MNT_IUNLOCK(mp);
440: if ((xvp->v_flag & VXLOCK) ||
441: xvp->v_usecount == 0 || xvp->v_type == VNON ||
442: (VTOI(xvp)->i_flags & SF_SNAPSHOT)) {
443: VI_UNLOCK(xvp);
444: MNT_ILOCK(mp);
445: continue;
446: }
1.40 hannken 447: VI_UNLOCK(xvp);
1.1 hannken 448: #ifdef DEBUG
449: if (snapdebug)
450: vprint("ffs_snapshot: busy vnode", xvp);
451: #endif
1.31 ad 452: if (VOP_GETATTR(xvp, &vat, l->l_cred, l) == 0 &&
1.1 hannken 453: vat.va_nlink > 0) {
454: MNT_ILOCK(mp);
455: continue;
456: }
457: xp = VTOI(xvp);
458: if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) {
459: MNT_ILOCK(mp);
460: continue;
461: }
462: /*
463: * If there is a fragment, clear it here.
464: */
465: blkno = 0;
466: loc = howmany(xp->i_size, fs->fs_bsize) - 1;
467: if (loc < NDADDR) {
468: len = fragroundup(fs, blkoff(fs, xp->i_size));
1.5 hannken 469: if (len > 0 && len < fs->fs_bsize) {
1.1 hannken 470: ffs_blkfree(copy_fs, vp, db_get(xp, loc),
471: len, xp->i_number);
472: blkno = db_get(xp, loc);
473: db_assign(xp, loc, 0);
474: }
475: }
476: snaplistsize += 1;
477: if (xp->i_ump->um_fstype == UFS1)
478: error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
479: BLK_NOCOPY);
480: else
481: error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
482: BLK_NOCOPY);
483: if (blkno)
484: db_assign(xp, loc, blkno);
1.6 hannken 485: if (!error)
486: error = ffs_freefile(copy_fs, vp, xp->i_number,
487: xp->i_mode);
1.1 hannken 488: if (error) {
489: free(copy_fs->fs_csp, M_UFSMNT);
490: goto out1;
491: }
492: MNT_ILOCK(mp);
493: }
494: MNT_IUNLOCK(mp);
495: /*
496: * If there already exist snapshots on this filesystem, grab a
497: * reference to their shared lock. If this is the first snapshot
498: * on this filesystem, we need to allocate a lock for the snapshots
499: * to share. In either case, acquire the snapshot lock and give
500: * up our original private lock.
501: */
502: VI_LOCK(devvp);
1.47 hannken 503: if ((xp = TAILQ_FIRST(&si->si_snapshots)) != NULL) {
1.1 hannken 504: struct lock *lkp;
505:
506: lkp = ITOV(xp)->v_vnlock;
507: VI_UNLOCK(devvp);
508: VI_LOCK(vp);
509: vp->v_vnlock = lkp;
510: } else {
511: struct lock *lkp;
512:
513: VI_UNLOCK(devvp);
514: MALLOC(lkp, struct lock *, sizeof(struct lock), M_UFSMNT,
515: M_WAITOK);
516: lockinit(lkp, PVFS, "snaplk", 0, LK_CANRECURSE);
517: VI_LOCK(vp);
518: vp->v_vnlock = lkp;
519: }
520: vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY);
521: transferlockers(&vp->v_lock, vp->v_vnlock);
522: lockmgr(&vp->v_lock, LK_RELEASE, NULL);
523: /*
524: * If this is the first snapshot on this filesystem, then we need
525: * to allocate the space for the list of preallocated snapshot blocks.
526: * This list will be refined below, but this preliminary one will
527: * keep us out of deadlock until the full one is ready.
528: */
529: if (xp == NULL) {
1.25 christos 530: snapblklist = malloc(
1.1 hannken 531: snaplistsize * sizeof(ufs2_daddr_t), M_UFSMNT, M_WAITOK);
532: blkp = &snapblklist[1];
1.15 hannken 533: *blkp++ = lblkno(fs, fs->fs_sblockloc);
1.1 hannken 534: blkno = fragstoblks(fs, fs->fs_csaddr);
535: for (cg = 0; cg < fs->fs_ncg; cg++) {
1.15 hannken 536: if (fragstoblks(fs, cgtod(fs, cg)) > blkno)
1.1 hannken 537: break;
1.15 hannken 538: *blkp++ = fragstoblks(fs, cgtod(fs, cg));
1.1 hannken 539: }
540: len = howmany(fs->fs_cssize, fs->fs_bsize);
541: for (loc = 0; loc < len; loc++)
1.15 hannken 542: *blkp++ = blkno + loc;
1.1 hannken 543: for (; cg < fs->fs_ncg; cg++)
1.15 hannken 544: *blkp++ = fragstoblks(fs, cgtod(fs, cg));
545: snapblklist[0] = blkp - snapblklist;
1.1 hannken 546: VI_LOCK(devvp);
1.47 hannken 547: if (si->si_snapblklist != NULL)
1.1 hannken 548: panic("ffs_snapshot: non-empty list");
1.47 hannken 549: si->si_snapblklist = snapblklist;
1.1 hannken 550: VI_UNLOCK(devvp);
551: }
552: /*
553: * Record snapshot inode. Since this is the newest snapshot,
554: * it must be placed at the end of the list.
555: */
556: VI_LOCK(devvp);
557: fs->fs_snapinum[snaploc] = ip->i_number;
558: if (ip->i_nextsnap.tqe_prev != 0)
1.19 christos 559: panic("ffs_snapshot: %llu already on list",
560: (unsigned long long)ip->i_number);
1.47 hannken 561: TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
1.1 hannken 562: VI_UNLOCK(devvp);
563: if (xp == NULL)
564: vn_cow_establish(devvp, ffs_copyonwrite, devvp);
565: vp->v_flag |= VSYSTEM;
566: out1:
567: /*
568: * Resume operation on filesystem.
569: */
1.40 hannken 570: vfs_resume(vp->v_mount);
1.1 hannken 571: /*
572: * Set the mtime to the time the snapshot has been taken.
573: */
1.21 yamt 574: TIMEVAL_TO_TIMESPEC(&starttime, &ts);
1.1 hannken 575: if (ctime)
576: *ctime = ts;
577: DIP_ASSIGN(ip, mtime, ts.tv_sec);
578: DIP_ASSIGN(ip, mtimensec, ts.tv_nsec);
579: ip->i_flag |= IN_CHANGE | IN_UPDATE;
580:
581: #ifdef DEBUG
582: if (starttime.tv_sec > 0) {
1.30 kardel 583: getmicrotime(&endtime);
1.1 hannken 584: timersub(&endtime, &starttime, &endtime);
585: printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
586: vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
587: endtime.tv_usec / 1000, redo, fs->fs_ncg);
588: }
589: #endif
590: if (error)
591: goto out;
592: /*
593: * Copy allocation information from all the snapshots in
594: * this snapshot and then expunge them from its view.
595: */
1.47 hannken 596: TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) {
1.1 hannken 597: if (xp == ip)
598: break;
599: if (xp->i_ump->um_fstype == UFS1)
600: error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
601: BLK_SNAP);
602: else
603: error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
604: BLK_SNAP);
1.48 ! hannken 605: if (error == 0 && xp->i_ffs_effnlink == 0)
! 606: error = ffs_freefile(copy_fs, vp,
! 607: xp->i_number, xp->i_mode);
1.1 hannken 608: if (error) {
609: fs->fs_snapinum[snaploc] = 0;
610: goto done;
611: }
612: }
613: /*
614: * Allocate space for the full list of preallocated snapshot blocks.
615: */
1.25 christos 616: snapblklist = malloc(snaplistsize * sizeof(ufs2_daddr_t),
1.1 hannken 617: M_UFSMNT, M_WAITOK);
618: ip->i_snapblklist = &snapblklist[1];
619: /*
620: * Expunge the blocks used by the snapshots from the set of
621: * blocks marked as used in the snapshot bitmaps. Also, collect
622: * the list of allocated blocks in i_snapblklist.
623: */
624: if (ip->i_ump->um_fstype == UFS1)
625: error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP);
626: else
627: error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP);
628: if (error) {
629: fs->fs_snapinum[snaploc] = 0;
630: FREE(snapblklist, M_UFSMNT);
631: goto done;
632: }
633: if (snaplistsize < ip->i_snapblklist - snapblklist)
634: panic("ffs_snapshot: list too small");
635: snaplistsize = ip->i_snapblklist - snapblklist;
1.15 hannken 636: snapblklist[0] = snaplistsize;
637: ip->i_snapblklist = &snapblklist[0];
1.1 hannken 638: /*
639: * Write out the list of allocated blocks to the end of the snapshot.
640: */
1.15 hannken 641: for (i = 0; i < snaplistsize; i++)
642: snapblklist[i] = ufs_rw64(snapblklist[i], ns);
1.43 christos 643: error = vn_rdwr(UIO_WRITE, vp, (void *)snapblklist,
1.16 hannken 644: snaplistsize*sizeof(ufs2_daddr_t), lblktosize(fs, (off_t)numblks),
1.31 ad 645: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, l->l_cred, NULL, NULL);
1.15 hannken 646: for (i = 0; i < snaplistsize; i++)
647: snapblklist[i] = ufs_rw64(snapblklist[i], ns);
1.1 hannken 648: if (error) {
649: fs->fs_snapinum[snaploc] = 0;
650: FREE(snapblklist, M_UFSMNT);
651: goto done;
652: }
653: /*
654: * Write the superblock and its summary information
655: * to the snapshot.
656: */
657: blkno = fragstoblks(fs, fs->fs_csaddr);
658: len = howmany(fs->fs_cssize, fs->fs_bsize);
659: space = copy_fs->fs_csp;
1.8 hannken 660: #ifdef FFS_EI
1.1 hannken 661: if (ns) {
662: ffs_sb_swap(copy_fs, copy_fs);
663: ffs_csum_swap(space, space, fs->fs_cssize);
664: }
1.8 hannken 665: #endif
1.1 hannken 666: for (loc = 0; loc < len; loc++) {
1.15 hannken 667: error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp);
668: if (error) {
669: brelse(nbp);
1.1 hannken 670: fs->fs_snapinum[snaploc] = 0;
671: FREE(snapblklist, M_UFSMNT);
672: goto done;
673: }
1.15 hannken 674: bcopy(space, nbp->b_data, fs->fs_bsize);
1.1 hannken 675: space = (char *)space + fs->fs_bsize;
1.15 hannken 676: bawrite(nbp);
1.1 hannken 677: }
678: /*
679: * As this is the newest list, it is the most inclusive, so
1.15 hannken 680: * should replace the previous list. If this is the first snapshot
681: * free the preliminary list.
1.1 hannken 682: */
683: VI_LOCK(devvp);
1.47 hannken 684: space = si->si_snapblklist;
685: si->si_snapblklist = snapblklist;
1.1 hannken 686: VI_UNLOCK(devvp);
1.47 hannken 687: if (TAILQ_FIRST(&si->si_snapshots) == ip)
1.1 hannken 688: FREE(space, M_UFSMNT);
689: done:
690: free(copy_fs->fs_csp, M_UFSMNT);
1.15 hannken 691: if (!error) {
692: error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
693: KERNCRED, &nbp);
694: if (error) {
695: brelse(nbp);
696: fs->fs_snapinum[snaploc] = 0;
697: }
1.16 hannken 698: bcopy(sbbuf, nbp->b_data, fs->fs_bsize);
1.15 hannken 699: bawrite(nbp);
700: }
1.1 hannken 701: out:
1.4 hannken 702: /*
1.15 hannken 703: * Invalidate and free all pages on the snapshot vnode.
704: * All metadata has been written through the buffer cache.
705: * Clean all dirty buffers now to avoid UBC inconsistencies.
1.4 hannken 706: */
707: if (!error) {
708: simple_lock(&vp->v_interlock);
709: error = VOP_PUTPAGES(vp, 0, 0,
710: PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
711: }
1.15 hannken 712: if (!error) {
713: s = splbio();
714: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
715: nbp = LIST_NEXT(bp, b_vnbufs);
716: simple_lock(&bp->b_interlock);
717: splx(s);
718: if ((bp->b_flags & (B_DELWRI|B_BUSY)) != B_DELWRI)
719: panic("ffs_snapshot: not dirty or busy, bp %p",
720: bp);
721: bp->b_flags |= B_BUSY|B_VFLUSH;
722: if (LIST_FIRST(&bp->b_dep) == NULL)
723: bp->b_flags |= B_NOCACHE;
724: simple_unlock(&bp->b_interlock);
725: bwrite(bp);
726: s = splbio();
727: }
728: simple_lock(&global_v_numoutput_slock);
729: while (vp->v_numoutput) {
730: vp->v_flag |= VBWAIT;
1.43 christos 731: ltsleep((void *)&vp->v_numoutput, PRIBIO+1,
1.15 hannken 732: "snapflushbuf", 0, &global_v_numoutput_slock);
733: }
734: simple_unlock(&global_v_numoutput_slock);
735: splx(s);
736: }
1.16 hannken 737: if (sbbuf)
738: free(sbbuf, M_UFSMNT);
1.1 hannken 739: if (fs->fs_active != 0) {
740: FREE(fs->fs_active, M_DEVBUF);
741: fs->fs_active = 0;
742: }
743: mp->mnt_flag = flag;
744: if (error)
1.23 christos 745: (void) ffs_truncate(vp, (off_t)0, 0, NOCRED, l);
1.1 hannken 746: else
747: vref(vp);
748: return (error);
749: }
750:
751: /*
752: * Copy a cylinder group map. All the unallocated blocks are marked
753: * BLK_NOCOPY so that the snapshot knows that it need not copy them
754: * if they are later written. If passno is one, then this is a first
755: * pass, so only setting needs to be done. If passno is 2, then this
756: * is a revision to a previous pass which must be undone as the
757: * replacement pass is done.
758: */
759: static int
1.43 christos 760: cgaccount(int cg, struct vnode *vp, void *data, int passno)
1.1 hannken 761: {
762: struct buf *bp, *ibp;
763: struct inode *ip;
764: struct cg *cgp;
765: struct fs *fs;
766: ufs2_daddr_t base, numblks;
767: int error, len, loc, ns, indiroff;
768:
769: ip = VTOI(vp);
770: fs = ip->i_fs;
771: ns = UFS_FSNEEDSWAP(fs);
772: error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
773: (int)fs->fs_cgsize, KERNCRED, &bp);
774: if (error) {
775: brelse(bp);
776: return (error);
777: }
778: cgp = (struct cg *)bp->b_data;
779: if (!cg_chkmagic(cgp, ns)) {
780: brelse(bp);
781: return (EIO);
782: }
783: ACTIVECG_SET(fs, cg);
784:
785: bcopy(bp->b_data, data, fs->fs_cgsize);
786: brelse(bp);
787: if (fs->fs_cgsize < fs->fs_bsize)
1.43 christos 788: memset((char *)data + fs->fs_cgsize, 0,
1.1 hannken 789: fs->fs_bsize - fs->fs_cgsize);
790: numblks = howmany(fs->fs_size, fs->fs_frag);
791: len = howmany(fs->fs_fpg, fs->fs_frag);
792: base = cg * fs->fs_fpg / fs->fs_frag;
793: if (base + len >= numblks)
794: len = numblks - base - 1;
795: loc = 0;
796: if (base < NDADDR) {
797: for ( ; loc < NDADDR; loc++) {
798: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
799: db_assign(ip, loc, BLK_NOCOPY);
800: else if (db_get(ip, loc) == BLK_NOCOPY) {
801: if (passno == 2)
802: db_assign(ip, loc, 0);
803: else if (passno == 1)
804: panic("ffs_snapshot: lost direct block");
805: }
806: }
807: }
1.22 yamt 808: if ((error = ffs_balloc(vp, lblktosize(fs, (off_t)(base + loc)),
1.11 perry 809: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp)) != 0)
1.1 hannken 810: return (error);
811: indiroff = (base + loc - NDADDR) % NINDIR(fs);
812: for ( ; loc < len; loc++, indiroff++) {
813: if (indiroff >= NINDIR(fs)) {
1.15 hannken 814: bawrite(ibp);
1.22 yamt 815: if ((error = ffs_balloc(vp,
1.1 hannken 816: lblktosize(fs, (off_t)(base + loc)),
817: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp)) != 0)
818: return (error);
819: indiroff = 0;
820: }
821: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
822: idb_assign(ip, ibp->b_data, indiroff, BLK_NOCOPY);
823: else if (idb_get(ip, ibp->b_data, indiroff) == BLK_NOCOPY) {
824: if (passno == 2)
825: idb_assign(ip, ibp->b_data, indiroff, 0);
826: else if (passno == 1)
827: panic("ffs_snapshot: lost indirect block");
828: }
829: }
1.15 hannken 830: bdwrite(ibp);
1.1 hannken 831: return (0);
832: }
833:
834: /*
835: * Before expunging a snapshot inode, note all the
836: * blocks that it claims with BLK_SNAP so that fsck will
837: * be able to account for those blocks properly and so
838: * that this snapshot knows that it need not copy them
839: * if the other snapshot holding them is freed. This code
840: * is reproduced once each for UFS1 and UFS2.
841: */
842: static int
1.18 thorpej 843: expunge_ufs1(struct vnode *snapvp, struct inode *cancelip, struct fs *fs,
844: int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
845: struct fs *, ufs_lbn_t, int),
846: int expungetype)
1.1 hannken 847: {
1.4 hannken 848: int i, s, error, ns, indiroff;
1.1 hannken 849: ufs_lbn_t lbn, rlbn;
850: ufs2_daddr_t len, blkno, numblks, blksperindir;
851: struct ufs1_dinode *dip;
852: struct buf *bp;
1.43 christos 853: void *bf;
1.1 hannken 854:
855: ns = UFS_FSNEEDSWAP(fs);
856: /*
857: * Prepare to expunge the inode. If its inode block has not
858: * yet been copied, then allocate and fill the copy.
859: */
860: lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
861: blkno = 0;
862: if (lbn < NDADDR) {
863: blkno = db_get(VTOI(snapvp), lbn);
864: } else {
1.4 hannken 865: s = cow_enter();
1.22 yamt 866: error = ffs_balloc(snapvp, lblktosize(fs, (off_t)lbn),
1.1 hannken 867: fs->fs_bsize, KERNCRED, B_METAONLY, &bp);
1.4 hannken 868: cow_leave(s);
1.1 hannken 869: if (error)
870: return (error);
871: indiroff = (lbn - NDADDR) % NINDIR(fs);
872: blkno = idb_get(VTOI(snapvp), bp->b_data, indiroff);
873: brelse(bp);
874: }
1.17 christos 875: bf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.1 hannken 876: if (blkno != 0)
1.17 christos 877: error = readvnblk(snapvp, bf, lbn);
1.1 hannken 878: else
1.17 christos 879: error = readfsblk(snapvp, bf, lbn);
1.1 hannken 880: if (error) {
1.17 christos 881: free(bf, M_UFSMNT);
1.1 hannken 882: return error;
883: }
884: /*
885: * Set a snapshot inode to be a zero length file, regular files
1.48 ! hannken 886: * or unlinked snapshots to be completely unallocated.
1.1 hannken 887: */
1.17 christos 888: dip = (struct ufs1_dinode *)bf + ino_to_fsbo(fs, cancelip->i_number);
1.48 ! hannken 889: if (expungetype == BLK_NOCOPY || cancelip->i_ffs_effnlink == 0)
1.1 hannken 890: dip->di_mode = 0;
891: dip->di_size = 0;
892: dip->di_blocks = 0;
893: dip->di_flags =
894: ufs_rw32(ufs_rw32(dip->di_flags, ns) & ~SF_SNAPSHOT, ns);
895: bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t));
1.17 christos 896: error = writevnblk(snapvp, bf, lbn);
897: free(bf, M_UFSMNT);
1.1 hannken 898: if (error)
899: return error;
900: /*
901: * Now go through and expunge all the blocks in the file
902: * using the function requested.
903: */
904: numblks = howmany(cancelip->i_size, fs->fs_bsize);
905: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs1_db[0],
906: &cancelip->i_ffs1_db[NDADDR], fs, 0, expungetype)))
907: return (error);
908: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs1_ib[0],
909: &cancelip->i_ffs1_ib[NIADDR], fs, -1, expungetype)))
910: return (error);
911: blksperindir = 1;
912: lbn = -NDADDR;
913: len = numblks - NDADDR;
914: rlbn = NDADDR;
915: for (i = 0; len > 0 && i < NIADDR; i++) {
916: error = indiracct_ufs1(snapvp, ITOV(cancelip), i,
917: ufs_rw32(cancelip->i_ffs1_ib[i], ns), lbn, rlbn, len,
918: blksperindir, fs, acctfunc, expungetype);
919: if (error)
920: return (error);
921: blksperindir *= NINDIR(fs);
922: lbn -= blksperindir + 1;
923: len -= blksperindir;
924: rlbn += blksperindir;
925: }
926: return (0);
927: }
928:
929: /*
930: * Descend an indirect block chain for vnode cancelvp accounting for all
931: * its indirect blocks in snapvp.
1.11 perry 932: */
1.1 hannken 933: static int
1.18 thorpej 934: indiracct_ufs1(struct vnode *snapvp, struct vnode *cancelvp, int level,
935: ufs1_daddr_t blkno, ufs_lbn_t lbn, ufs_lbn_t rlbn, ufs_lbn_t remblks,
936: ufs_lbn_t blksperindir, struct fs *fs,
937: int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
938: struct fs *, ufs_lbn_t, int),
939: int expungetype)
1.1 hannken 940: {
941: int error, ns, num, i;
942: ufs_lbn_t subblksperindir;
943: struct indir indirs[NIADDR + 2];
944: ufs1_daddr_t last, *bap;
945: struct buf *bp;
946:
947: ns = UFS_FSNEEDSWAP(fs);
948:
949: if (blkno == 0) {
950: if (expungetype == BLK_NOCOPY)
951: return (0);
952: panic("indiracct_ufs1: missing indir");
953: }
954: if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
955: return (error);
956: if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
957: panic("indiracct_ufs1: botched params");
958: /*
959: * We have to expand bread here since it will deadlock looking
960: * up the block number for any blocks that are not in the cache.
961: */
962: bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
963: bp->b_blkno = fsbtodb(fs, blkno);
964: if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
965: (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
966: brelse(bp);
967: return (error);
968: }
969: /*
970: * Account for the block pointers in this indirect block.
971: */
972: last = howmany(remblks, blksperindir);
973: if (last > NINDIR(fs))
974: last = NINDIR(fs);
1.25 christos 975: bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
1.43 christos 976: bcopy(bp->b_data, (void *)bap, fs->fs_bsize);
1.1 hannken 977: brelse(bp);
978: error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
979: level == 0 ? rlbn : -1, expungetype);
980: if (error || level == 0)
981: goto out;
982: /*
983: * Account for the block pointers in each of the indirect blocks
984: * in the levels below us.
985: */
986: subblksperindir = blksperindir / NINDIR(fs);
987: for (lbn++, level--, i = 0; i < last; i++) {
988: error = indiracct_ufs1(snapvp, cancelvp, level,
989: ufs_rw32(bap[i], ns), lbn, rlbn, remblks, subblksperindir,
990: fs, acctfunc, expungetype);
991: if (error)
992: goto out;
993: rlbn += blksperindir;
994: lbn -= blksperindir;
995: remblks -= blksperindir;
996: }
997: out:
998: FREE(bap, M_DEVBUF);
999: return (error);
1000: }
1001:
1002: /*
1003: * Do both snap accounting and map accounting.
1004: */
1005: static int
1.18 thorpej 1006: fullacct_ufs1(struct vnode *vp, ufs1_daddr_t *oldblkp, ufs1_daddr_t *lastblkp,
1007: struct fs *fs, ufs_lbn_t lblkno,
1008: int exptype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1009: {
1010: int error;
1011:
1012: if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
1013: return (error);
1014: return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype));
1015: }
1016:
1017: /*
1018: * Identify a set of blocks allocated in a snapshot inode.
1019: */
1020: static int
1.18 thorpej 1021: snapacct_ufs1(struct vnode *vp, ufs1_daddr_t *oldblkp, ufs1_daddr_t *lastblkp,
1.36 christos 1022: struct fs *fs, ufs_lbn_t lblkno,
1.18 thorpej 1023: int expungetype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1024: {
1025: struct inode *ip = VTOI(vp);
1026: ufs1_daddr_t blkno, *blkp;
1027: ufs_lbn_t lbn;
1028: struct buf *ibp;
1029: int error, ns;
1030:
1031: ns = UFS_FSNEEDSWAP(fs);
1032:
1033: for ( ; oldblkp < lastblkp; oldblkp++) {
1034: blkno = ufs_rw32(*oldblkp, ns);
1035: if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
1036: continue;
1037: lbn = fragstoblks(fs, blkno);
1038: if (lbn < NDADDR) {
1039: blkp = &ip->i_ffs1_db[lbn];
1040: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1041: } else {
1.22 yamt 1042: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1043: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1044: if (error)
1045: return (error);
1046: blkp = &((ufs1_daddr_t *)(ibp->b_data))
1047: [(lbn - NDADDR) % NINDIR(fs)];
1048: }
1049: /*
1050: * If we are expunging a snapshot vnode and we
1051: * find a block marked BLK_NOCOPY, then it is
1052: * one that has been allocated to this snapshot after
1053: * we took our current snapshot and can be ignored.
1054: */
1055: blkno = ufs_rw32(*blkp, ns);
1056: if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
1057: if (lbn >= NDADDR)
1058: brelse(ibp);
1059: } else {
1060: if (blkno != 0)
1061: panic("snapacct_ufs1: bad block");
1062: *blkp = ufs_rw32(expungetype, ns);
1063: if (lbn >= NDADDR)
1.15 hannken 1064: bdwrite(ibp);
1.1 hannken 1065: }
1066: }
1067: return (0);
1068: }
1069:
1070: /*
1071: * Account for a set of blocks allocated in a snapshot inode.
1072: */
1073: static int
1.18 thorpej 1074: mapacct_ufs1(struct vnode *vp, ufs1_daddr_t *oldblkp, ufs1_daddr_t *lastblkp,
1075: struct fs *fs, ufs_lbn_t lblkno, int expungetype)
1.1 hannken 1076: {
1077: ufs1_daddr_t blkno;
1078: struct inode *ip;
1079: ino_t inum;
1080: int acctit, ns;
1081:
1082: ns = UFS_FSNEEDSWAP(fs);
1083: ip = VTOI(vp);
1084: inum = ip->i_number;
1085: if (lblkno == -1)
1086: acctit = 0;
1087: else
1088: acctit = 1;
1089: for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
1090: blkno = ufs_rw32(*oldblkp, ns);
1091: if (blkno == 0 || blkno == BLK_NOCOPY)
1092: continue;
1093: if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
1.15 hannken 1094: *ip->i_snapblklist++ = lblkno;
1.1 hannken 1095: if (blkno == BLK_SNAP)
1096: blkno = blkstofrags(fs, lblkno);
1097: ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
1098: }
1099: return (0);
1100: }
1101:
1102: /*
1103: * Before expunging a snapshot inode, note all the
1104: * blocks that it claims with BLK_SNAP so that fsck will
1105: * be able to account for those blocks properly and so
1106: * that this snapshot knows that it need not copy them
1107: * if the other snapshot holding them is freed. This code
1108: * is reproduced once each for UFS1 and UFS2.
1109: */
1110: static int
1.18 thorpej 1111: expunge_ufs2(struct vnode *snapvp, struct inode *cancelip, struct fs *fs,
1112: int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
1113: struct fs *, ufs_lbn_t, int),
1114: int expungetype)
1.1 hannken 1115: {
1.4 hannken 1116: int i, s, error, ns, indiroff;
1.1 hannken 1117: ufs_lbn_t lbn, rlbn;
1118: ufs2_daddr_t len, blkno, numblks, blksperindir;
1119: struct ufs2_dinode *dip;
1120: struct buf *bp;
1.43 christos 1121: void *bf;
1.1 hannken 1122:
1123: ns = UFS_FSNEEDSWAP(fs);
1124: /*
1125: * Prepare to expunge the inode. If its inode block has not
1126: * yet been copied, then allocate and fill the copy.
1127: */
1128: lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
1129: blkno = 0;
1130: if (lbn < NDADDR) {
1131: blkno = db_get(VTOI(snapvp), lbn);
1132: } else {
1.4 hannken 1133: s = cow_enter();
1.22 yamt 1134: error = ffs_balloc(snapvp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1135: fs->fs_bsize, KERNCRED, B_METAONLY, &bp);
1.4 hannken 1136: cow_leave(s);
1.1 hannken 1137: if (error)
1138: return (error);
1139: indiroff = (lbn - NDADDR) % NINDIR(fs);
1140: blkno = idb_get(VTOI(snapvp), bp->b_data, indiroff);
1141: brelse(bp);
1142: }
1.17 christos 1143: bf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.1 hannken 1144: if (blkno != 0)
1.17 christos 1145: error = readvnblk(snapvp, bf, lbn);
1.1 hannken 1146: else
1.17 christos 1147: error = readfsblk(snapvp, bf, lbn);
1.1 hannken 1148: if (error) {
1.17 christos 1149: free(bf, M_UFSMNT);
1.1 hannken 1150: return error;
1151: }
1152: /*
1153: * Set a snapshot inode to be a zero length file, regular files
1.48 ! hannken 1154: * or unlinked snapshots to be completely unallocated.
1.1 hannken 1155: */
1.17 christos 1156: dip = (struct ufs2_dinode *)bf + ino_to_fsbo(fs, cancelip->i_number);
1.48 ! hannken 1157: if (expungetype == BLK_NOCOPY || cancelip->i_ffs_effnlink == 0)
1.1 hannken 1158: dip->di_mode = 0;
1159: dip->di_size = 0;
1160: dip->di_blocks = 0;
1161: dip->di_flags =
1162: ufs_rw32(ufs_rw32(dip->di_flags, ns) & ~SF_SNAPSHOT, ns);
1163: bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t));
1.17 christos 1164: error = writevnblk(snapvp, bf, lbn);
1165: free(bf, M_UFSMNT);
1.1 hannken 1166: if (error)
1167: return error;
1168: /*
1169: * Now go through and expunge all the blocks in the file
1170: * using the function requested.
1171: */
1172: numblks = howmany(cancelip->i_size, fs->fs_bsize);
1173: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs2_db[0],
1174: &cancelip->i_ffs2_db[NDADDR], fs, 0, expungetype)))
1175: return (error);
1176: if ((error = (*acctfunc)(snapvp, &cancelip->i_ffs2_ib[0],
1177: &cancelip->i_ffs2_ib[NIADDR], fs, -1, expungetype)))
1178: return (error);
1179: blksperindir = 1;
1180: lbn = -NDADDR;
1181: len = numblks - NDADDR;
1182: rlbn = NDADDR;
1183: for (i = 0; len > 0 && i < NIADDR; i++) {
1184: error = indiracct_ufs2(snapvp, ITOV(cancelip), i,
1185: ufs_rw64(cancelip->i_ffs2_ib[i], ns), lbn, rlbn, len,
1186: blksperindir, fs, acctfunc, expungetype);
1187: if (error)
1188: return (error);
1189: blksperindir *= NINDIR(fs);
1190: lbn -= blksperindir + 1;
1191: len -= blksperindir;
1192: rlbn += blksperindir;
1193: }
1194: return (0);
1195: }
1196:
1197: /*
1198: * Descend an indirect block chain for vnode cancelvp accounting for all
1199: * its indirect blocks in snapvp.
1.11 perry 1200: */
1.1 hannken 1201: static int
1.18 thorpej 1202: indiracct_ufs2(struct vnode *snapvp, struct vnode *cancelvp, int level,
1203: ufs2_daddr_t blkno, ufs_lbn_t lbn, ufs_lbn_t rlbn, ufs_lbn_t remblks,
1204: ufs_lbn_t blksperindir, struct fs *fs,
1205: int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
1206: struct fs *, ufs_lbn_t, int),
1207: int expungetype)
1.1 hannken 1208: {
1209: int error, ns, num, i;
1210: ufs_lbn_t subblksperindir;
1211: struct indir indirs[NIADDR + 2];
1212: ufs2_daddr_t last, *bap;
1213: struct buf *bp;
1214:
1215: ns = UFS_FSNEEDSWAP(fs);
1216:
1217: if (blkno == 0) {
1218: if (expungetype == BLK_NOCOPY)
1219: return (0);
1220: panic("indiracct_ufs2: missing indir");
1221: }
1222: if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
1223: return (error);
1224: if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
1225: panic("indiracct_ufs2: botched params");
1226: /*
1227: * We have to expand bread here since it will deadlock looking
1228: * up the block number for any blocks that are not in the cache.
1229: */
1230: bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
1231: bp->b_blkno = fsbtodb(fs, blkno);
1232: if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
1233: (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
1234: brelse(bp);
1235: return (error);
1236: }
1237: /*
1238: * Account for the block pointers in this indirect block.
1239: */
1240: last = howmany(remblks, blksperindir);
1241: if (last > NINDIR(fs))
1242: last = NINDIR(fs);
1.25 christos 1243: bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
1.43 christos 1244: bcopy(bp->b_data, (void *)bap, fs->fs_bsize);
1.1 hannken 1245: brelse(bp);
1246: error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
1247: level == 0 ? rlbn : -1, expungetype);
1248: if (error || level == 0)
1249: goto out;
1250: /*
1251: * Account for the block pointers in each of the indirect blocks
1252: * in the levels below us.
1253: */
1254: subblksperindir = blksperindir / NINDIR(fs);
1255: for (lbn++, level--, i = 0; i < last; i++) {
1256: error = indiracct_ufs2(snapvp, cancelvp, level,
1257: ufs_rw64(bap[i], ns), lbn, rlbn, remblks, subblksperindir,
1258: fs, acctfunc, expungetype);
1259: if (error)
1260: goto out;
1261: rlbn += blksperindir;
1262: lbn -= blksperindir;
1263: remblks -= blksperindir;
1264: }
1265: out:
1266: FREE(bap, M_DEVBUF);
1267: return (error);
1268: }
1269:
1270: /*
1271: * Do both snap accounting and map accounting.
1272: */
1273: static int
1.18 thorpej 1274: fullacct_ufs2(struct vnode *vp, ufs2_daddr_t *oldblkp, ufs2_daddr_t *lastblkp,
1275: struct fs *fs, ufs_lbn_t lblkno,
1276: int exptype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1277: {
1278: int error;
1279:
1280: if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
1281: return (error);
1282: return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype));
1283: }
1284:
1285: /*
1286: * Identify a set of blocks allocated in a snapshot inode.
1287: */
1288: static int
1.18 thorpej 1289: snapacct_ufs2(struct vnode *vp, ufs2_daddr_t *oldblkp, ufs2_daddr_t *lastblkp,
1.36 christos 1290: struct fs *fs, ufs_lbn_t lblkno,
1.18 thorpej 1291: int expungetype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1292: {
1293: struct inode *ip = VTOI(vp);
1294: ufs2_daddr_t blkno, *blkp;
1295: ufs_lbn_t lbn;
1296: struct buf *ibp;
1297: int error, ns;
1298:
1299: ns = UFS_FSNEEDSWAP(fs);
1300:
1301: for ( ; oldblkp < lastblkp; oldblkp++) {
1302: blkno = ufs_rw64(*oldblkp, ns);
1303: if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
1304: continue;
1305: lbn = fragstoblks(fs, blkno);
1306: if (lbn < NDADDR) {
1307: blkp = &ip->i_ffs2_db[lbn];
1308: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1309: } else {
1.22 yamt 1310: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1311: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1312: if (error)
1313: return (error);
1314: blkp = &((ufs2_daddr_t *)(ibp->b_data))
1315: [(lbn - NDADDR) % NINDIR(fs)];
1316: }
1317: /*
1318: * If we are expunging a snapshot vnode and we
1319: * find a block marked BLK_NOCOPY, then it is
1320: * one that has been allocated to this snapshot after
1321: * we took our current snapshot and can be ignored.
1322: */
1323: blkno = ufs_rw64(*blkp, ns);
1324: if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
1325: if (lbn >= NDADDR)
1326: brelse(ibp);
1327: } else {
1328: if (blkno != 0)
1329: panic("snapacct_ufs2: bad block");
1330: *blkp = ufs_rw64(expungetype, ns);
1331: if (lbn >= NDADDR)
1.15 hannken 1332: bdwrite(ibp);
1.1 hannken 1333: }
1334: }
1335: return (0);
1336: }
1337:
1338: /*
1339: * Account for a set of blocks allocated in a snapshot inode.
1340: */
1341: static int
1.18 thorpej 1342: mapacct_ufs2(struct vnode *vp, ufs2_daddr_t *oldblkp, ufs2_daddr_t *lastblkp,
1343: struct fs *fs, ufs_lbn_t lblkno, int expungetype)
1.1 hannken 1344: {
1345: ufs2_daddr_t blkno;
1346: struct inode *ip;
1347: ino_t inum;
1348: int acctit, ns;
1349:
1350: ns = UFS_FSNEEDSWAP(fs);
1351: ip = VTOI(vp);
1352: inum = ip->i_number;
1353: if (lblkno == -1)
1354: acctit = 0;
1355: else
1356: acctit = 1;
1357: for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
1358: blkno = ufs_rw64(*oldblkp, ns);
1359: if (blkno == 0 || blkno == BLK_NOCOPY)
1360: continue;
1361: if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
1.15 hannken 1362: *ip->i_snapblklist++ = lblkno;
1.1 hannken 1363: if (blkno == BLK_SNAP)
1364: blkno = blkstofrags(fs, lblkno);
1365: ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
1366: }
1367: return (0);
1368: }
1.10 hannken 1369: #endif /* defined(FFS_NO_SNAPSHOT) */
1.1 hannken 1370:
1371: /*
1372: * Decrement extra reference on snapshot when last name is removed.
1373: * It will not be freed until the last open reference goes away.
1374: */
1375: void
1.18 thorpej 1376: ffs_snapgone(struct inode *ip)
1.1 hannken 1377: {
1.47 hannken 1378: struct mount *mp = ip->i_devvp->v_specmountpoint;
1.1 hannken 1379: struct inode *xp;
1380: struct fs *fs;
1.47 hannken 1381: struct snap_info *si;
1.1 hannken 1382: int snaploc;
1383:
1.47 hannken 1384: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1385: return;
1.1 hannken 1386: /*
1387: * Find snapshot in incore list.
1388: */
1.47 hannken 1389: TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap)
1.1 hannken 1390: if (xp == ip)
1391: break;
1392: if (xp != NULL)
1393: vrele(ITOV(ip));
1394: #ifdef DEBUG
1395: else if (snapdebug)
1.19 christos 1396: printf("ffs_snapgone: lost snapshot vnode %llu\n",
1397: (unsigned long long)ip->i_number);
1.1 hannken 1398: #endif
1399: /*
1400: * Delete snapshot inode from superblock. Keep list dense.
1401: */
1402: fs = ip->i_fs;
1403: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
1404: if (fs->fs_snapinum[snaploc] == ip->i_number)
1405: break;
1406: if (snaploc < FSMAXSNAP) {
1407: for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
1408: if (fs->fs_snapinum[snaploc] == 0)
1409: break;
1410: fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
1411: }
1412: fs->fs_snapinum[snaploc - 1] = 0;
1413: }
1414: }
1415:
1416: /*
1417: * Prepare a snapshot file for being removed.
1418: */
1419: void
1.18 thorpej 1420: ffs_snapremove(struct vnode *vp)
1.1 hannken 1421: {
1.15 hannken 1422: struct inode *ip = VTOI(vp), *xp;
1.1 hannken 1423: struct vnode *devvp = ip->i_devvp;
1424: struct fs *fs = ip->i_fs;
1.47 hannken 1425: struct mount *mp = devvp->v_specmountpoint;
1.1 hannken 1426: struct lock *lkp;
1427: struct buf *ibp;
1.47 hannken 1428: struct snap_info *si;
1.15 hannken 1429: ufs2_daddr_t numblks, blkno, dblk;
1.1 hannken 1430: int error, ns, loc, last;
1431:
1.47 hannken 1432: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1433: return;
1.1 hannken 1434: ns = UFS_FSNEEDSWAP(fs);
1435: /*
1436: * If active, delete from incore list (this snapshot may
1437: * already have been in the process of being deleted, so
1438: * would not have been active).
1439: *
1440: * Clear copy-on-write flag if last snapshot.
1441: */
1442: if (ip->i_nextsnap.tqe_prev != 0) {
1443: VI_LOCK(devvp);
1444: lockmgr(&vp->v_lock, LK_INTERLOCK | LK_EXCLUSIVE,
1445: VI_MTX(devvp));
1446: VI_LOCK(devvp);
1.47 hannken 1447: TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap);
1.1 hannken 1448: ip->i_nextsnap.tqe_prev = 0;
1449: lkp = vp->v_vnlock;
1450: vp->v_vnlock = &vp->v_lock;
1451: lockmgr(lkp, LK_RELEASE, NULL);
1.47 hannken 1452: if (TAILQ_FIRST(&si->si_snapshots) != 0) {
1.15 hannken 1453: /* Roll back the list of preallocated blocks. */
1.47 hannken 1454: xp = TAILQ_LAST(&si->si_snapshots, inodelst);
1455: si->si_snapblklist = xp->i_snapblklist;
1.1 hannken 1456: VI_UNLOCK(devvp);
1457: } else {
1.47 hannken 1458: si->si_snapblklist = 0;
1.1 hannken 1459: lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp));
1460: lockmgr(lkp, LK_RELEASE, NULL);
1461: vn_cow_disestablish(devvp, ffs_copyonwrite, devvp);
1462: FREE(lkp, M_UFSMNT);
1463: }
1.15 hannken 1464: FREE(ip->i_snapblklist, M_UFSMNT);
1465: ip->i_snapblklist = NULL;
1.1 hannken 1466: }
1467: /*
1468: * Clear all BLK_NOCOPY fields. Pass any block claims to other
1469: * snapshots that want them (see ffs_snapblkfree below).
1470: */
1471: for (blkno = 1; blkno < NDADDR; blkno++) {
1472: dblk = db_get(ip, blkno);
1473: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1474: db_assign(ip, blkno, 0);
1475: else if ((dblk == blkstofrags(fs, blkno) &&
1476: ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
1477: ip->i_number))) {
1478: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1479: db_assign(ip, blkno, 0);
1480: }
1481: }
1482: numblks = howmany(ip->i_size, fs->fs_bsize);
1483: for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
1.22 yamt 1484: error = ffs_balloc(vp, lblktosize(fs, (off_t)blkno),
1.1 hannken 1485: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1486: if (error)
1487: continue;
1488: if (fs->fs_size - blkno > NINDIR(fs))
1489: last = NINDIR(fs);
1490: else
1491: last = fs->fs_size - blkno;
1492: for (loc = 0; loc < last; loc++) {
1493: dblk = idb_get(ip, ibp->b_data, loc);
1494: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1495: idb_assign(ip, ibp->b_data, loc, 0);
1496: else if (dblk == blkstofrags(fs, blkno) &&
1497: ffs_snapblkfree(fs, ip->i_devvp, dblk,
1498: fs->fs_bsize, ip->i_number)) {
1499: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1500: idb_assign(ip, ibp->b_data, loc, 0);
1501: }
1502: }
1.15 hannken 1503: bawrite(ibp);
1.1 hannken 1504: }
1505: /*
1506: * Clear snapshot flag and drop reference.
1507: */
1508: ip->i_flags &= ~SF_SNAPSHOT;
1509: DIP_ASSIGN(ip, flags, ip->i_flags);
1510: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1511: }
1512:
1513: /*
1514: * Notification that a block is being freed. Return zero if the free
1515: * should be allowed to proceed. Return non-zero if the snapshot file
1516: * wants to claim the block. The block will be claimed if it is an
1517: * uncopied part of one of the snapshots. It will be freed if it is
1518: * either a BLK_NOCOPY or has already been copied in all of the snapshots.
1519: * If a fragment is being freed, then all snapshots that care about
1520: * it must make a copy since a snapshot file can only claim full sized
1521: * blocks. Note that if more than one snapshot file maps the block,
1522: * we can pick one at random to claim it. Since none of the snapshots
1523: * can change, we are assurred that they will all see the same unmodified
1524: * image. When deleting a snapshot file (see ffs_snapremove above), we
1525: * must push any of these claimed blocks to one of the other snapshots
1526: * that maps it. These claimed blocks are easily identified as they will
1527: * have a block number equal to their logical block number within the
1528: * snapshot. A copied block can never have this property because they
1529: * must always have been allocated from a BLK_NOCOPY location.
1530: */
1531: int
1.18 thorpej 1532: ffs_snapblkfree(struct fs *fs, struct vnode *devvp, ufs2_daddr_t bno,
1.36 christos 1533: long size, ino_t inum)
1.1 hannken 1534: {
1.47 hannken 1535: struct mount *mp = devvp->v_specmountpoint;
1.1 hannken 1536: struct buf *ibp;
1537: struct inode *ip;
1538: struct vnode *vp = NULL, *saved_vp = NULL;
1.47 hannken 1539: struct snap_info *si;
1.43 christos 1540: void *saved_data = NULL;
1.1 hannken 1541: ufs_lbn_t lbn;
1542: ufs2_daddr_t blkno;
1.4 hannken 1543: int s, indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0;
1.1 hannken 1544:
1.47 hannken 1545: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1546: return 0;
1.1 hannken 1547: lbn = fragstoblks(fs, bno);
1548: retry:
1549: VI_LOCK(devvp);
1.47 hannken 1550: TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
1.1 hannken 1551: vp = ITOV(ip);
1552: /*
1553: * Lookup block being written.
1554: */
1555: if (lbn < NDADDR) {
1556: blkno = db_get(ip, lbn);
1557: } else {
1558: if (snapshot_locked == 0 &&
1559: lockmgr(vp->v_vnlock,
1560: LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
1561: VI_MTX(devvp)) != 0)
1562: goto retry;
1563: snapshot_locked = 1;
1.4 hannken 1564: s = cow_enter();
1.22 yamt 1565: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1566: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1.4 hannken 1567: cow_leave(s);
1.1 hannken 1568: if (error)
1569: break;
1570: indiroff = (lbn - NDADDR) % NINDIR(fs);
1571: blkno = idb_get(ip, ibp->b_data, indiroff);
1572: }
1573: /*
1574: * Check to see if block needs to be copied.
1575: */
1576: if (blkno == 0) {
1577: /*
1578: * A block that we map is being freed. If it has not
1579: * been claimed yet, we will claim or copy it (below).
1580: */
1581: claimedblk = 1;
1582: } else if (blkno == BLK_SNAP) {
1583: /*
1584: * No previous snapshot claimed the block,
1585: * so it will be freed and become a BLK_NOCOPY
1586: * (don't care) for us.
1587: */
1588: if (claimedblk)
1589: panic("snapblkfree: inconsistent block type");
1590: if (snapshot_locked == 0 &&
1591: lockmgr(vp->v_vnlock,
1592: LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT,
1593: VI_MTX(devvp)) != 0) {
1.32 christos 1594: #if 0 /* CID-2949: dead code */
1.1 hannken 1595: if (lbn >= NDADDR)
1596: brelse(ibp);
1.32 christos 1597: #endif
1.1 hannken 1598: vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL);
1599: goto retry;
1600: }
1601: snapshot_locked = 1;
1602: if (lbn < NDADDR) {
1603: db_assign(ip, lbn, BLK_NOCOPY);
1604: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1605: } else {
1606: idb_assign(ip, ibp->b_data, indiroff,
1607: BLK_NOCOPY);
1608: bwrite(ibp);
1609: }
1610: continue;
1611: } else /* BLK_NOCOPY or default */ {
1612: /*
1613: * If the snapshot has already copied the block
1614: * (default), or does not care about the block,
1615: * it is not needed.
1616: */
1.27 bouyer 1617: if (lbn >= NDADDR)
1.1 hannken 1618: brelse(ibp);
1619: continue;
1620: }
1621: /*
1622: * If this is a full size block, we will just grab it
1623: * and assign it to the snapshot inode. Otherwise we
1624: * will proceed to copy it. See explanation for this
1625: * routine as to why only a single snapshot needs to
1626: * claim this block.
1627: */
1628: if (snapshot_locked == 0 &&
1629: lockmgr(vp->v_vnlock,
1630: LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT,
1631: VI_MTX(devvp)) != 0) {
1632: vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL);
1633: goto retry;
1634: }
1635: snapshot_locked = 1;
1636: if (size == fs->fs_bsize) {
1637: #ifdef DEBUG
1638: if (snapdebug)
1.19 christos 1639: printf("%s %llu lbn %" PRId64
1640: "from inum %llu\n",
1641: "Grabonremove: snapino",
1642: (unsigned long long)ip->i_number,
1643: lbn, (unsigned long long)inum);
1.1 hannken 1644: #endif
1645: if (lbn < NDADDR) {
1646: db_assign(ip, lbn, bno);
1647: } else {
1648: idb_assign(ip, ibp->b_data, indiroff, bno);
1649: bwrite(ibp);
1650: }
1651: DIP_ADD(ip, blocks, btodb(size));
1652: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1653: VOP_UNLOCK(vp, 0);
1654: return (1);
1655: }
1656: if (lbn >= NDADDR)
1657: brelse(ibp);
1658: #ifdef DEBUG
1659: if (snapdebug)
1.19 christos 1660: printf("%s%llu lbn %" PRId64 " %s %llu size %ld\n",
1661: "Copyonremove: snapino ",
1662: (unsigned long long)ip->i_number,
1663: lbn, "for inum", (unsigned long long)inum, size);
1.1 hannken 1664: #endif
1665: /*
1666: * If we have already read the old block contents, then
1667: * simply copy them to the new block. Note that we need
1668: * to synchronously write snapshots that have not been
1669: * unlinked, and hence will be visible after a crash,
1670: * to ensure their integrity.
1671: */
1672: if (saved_data) {
1673: error = writevnblk(vp, saved_data, lbn);
1674: if (error)
1675: break;
1676: continue;
1677: }
1678: /*
1679: * Otherwise, read the old block contents into the buffer.
1680: */
1681: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1682: saved_vp = vp;
1683: if ((error = readfsblk(vp, saved_data, lbn)) != 0) {
1684: free(saved_data, M_UFSMNT);
1685: saved_data = NULL;
1686: break;
1687: }
1688: }
1689: /*
1690: * Note that we need to synchronously write snapshots that
1691: * have not been unlinked, and hence will be visible after
1692: * a crash, to ensure their integrity.
1693: */
1694: if (saved_data) {
1695: error = writevnblk(saved_vp, saved_data, lbn);
1696: free(saved_data, M_UFSMNT);
1697: }
1698: /*
1699: * If we have been unable to allocate a block in which to do
1700: * the copy, then return non-zero so that the fragment will
1701: * not be freed. Although space will be lost, the snapshot
1702: * will stay consistent.
1703: */
1704: if (snapshot_locked)
1705: VOP_UNLOCK(vp, 0);
1706: else
1707: VI_UNLOCK(devvp);
1708: return (error);
1709: }
1710:
1711: /*
1712: * Associate snapshot files when mounting.
1713: */
1714: void
1.18 thorpej 1715: ffs_snapshot_mount(struct mount *mp)
1.1 hannken 1716: {
1.47 hannken 1717: struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
1718: struct fs *fs = VFSTOUFS(mp)->um_fs;
1.31 ad 1719: struct lwp *l = curlwp;
1.1 hannken 1720: struct vnode *vp;
1721: struct inode *ip, *xp;
1.47 hannken 1722: struct snap_info *si;
1.1 hannken 1723: ufs2_daddr_t snaplistsize, *snapblklist;
1.15 hannken 1724: int i, error, ns, snaploc, loc;
1.1 hannken 1725:
1.46 hannken 1726: /*
1727: * No persistent snapshots on apple ufs file systems.
1728: */
1.47 hannken 1729: if (UFS_MPISAPPLEUFS(VFSTOUFS(mp)))
1.46 hannken 1730: return;
1731:
1.47 hannken 1732: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1733: si = si_mount_init(mp);
1.1 hannken 1734: ns = UFS_FSNEEDSWAP(fs);
1735: /*
1.22 yamt 1736: * XXX The following needs to be set before ffs_truncate or
1.1 hannken 1737: * VOP_READ can be called.
1738: */
1739: mp->mnt_stat.f_iosize = fs->fs_bsize;
1740: /*
1741: * Process each snapshot listed in the superblock.
1742: */
1743: vp = NULL;
1744: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
1745: if (fs->fs_snapinum[snaploc] == 0)
1746: break;
1747: if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc],
1748: &vp)) != 0) {
1749: printf("ffs_snapshot_mount: vget failed %d\n", error);
1750: continue;
1751: }
1752: ip = VTOI(vp);
1753: if ((ip->i_flags & SF_SNAPSHOT) == 0) {
1754: printf("ffs_snapshot_mount: non-snapshot inode %d\n",
1755: fs->fs_snapinum[snaploc]);
1756: vput(vp);
1757: vp = NULL;
1758: for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
1759: if (fs->fs_snapinum[loc] == 0)
1760: break;
1761: fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
1762: }
1763: fs->fs_snapinum[loc - 1] = 0;
1764: snaploc--;
1765: continue;
1766: }
1.15 hannken 1767:
1768: /*
1769: * Read the block hints list. Use an empty list on
1770: * read errors.
1771: */
1772: error = vn_rdwr(UIO_READ, vp,
1.43 christos 1773: (void *)&snaplistsize, sizeof(snaplistsize),
1.15 hannken 1774: lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1775: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
1.31 ad 1776: l->l_cred, NULL, NULL);
1.15 hannken 1777: if (error) {
1778: printf("ffs_snapshot_mount: read_1 failed %d\n", error);
1779: snaplistsize = 1;
1780: } else
1781: snaplistsize = ufs_rw64(snaplistsize, ns);
1.25 christos 1782: snapblklist = malloc(
1.15 hannken 1783: snaplistsize * sizeof(ufs2_daddr_t), M_UFSMNT, M_WAITOK);
1784: if (error)
1785: snapblklist[0] = 1;
1786: else {
1.43 christos 1787: error = vn_rdwr(UIO_READ, vp, (void *)snapblklist,
1.15 hannken 1788: snaplistsize * sizeof(ufs2_daddr_t),
1789: lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1790: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
1.31 ad 1791: l->l_cred, NULL, NULL);
1.15 hannken 1792: for (i = 0; i < snaplistsize; i++)
1793: snapblklist[i] = ufs_rw64(snapblklist[i], ns);
1794: if (error) {
1795: printf("ffs_snapshot_mount: read_2 failed %d\n",
1796: error);
1797: snapblklist[0] = 1;
1798: }
1799: }
1800: ip->i_snapblklist = &snapblklist[0];
1801:
1.1 hannken 1802: /*
1803: * If there already exist snapshots on this filesystem, grab a
1804: * reference to their shared lock. If this is the first snapshot
1805: * on this filesystem, we need to allocate a lock for the
1806: * snapshots to share. In either case, acquire the snapshot
1807: * lock and give up our original private lock.
1808: */
1809: VI_LOCK(devvp);
1.47 hannken 1810: if ((xp = TAILQ_FIRST(&si->si_snapshots)) != NULL) {
1.1 hannken 1811: struct lock *lkp;
1812:
1813: lkp = ITOV(xp)->v_vnlock;
1814: VI_UNLOCK(devvp);
1815: VI_LOCK(vp);
1816: vp->v_vnlock = lkp;
1817: } else {
1818: struct lock *lkp;
1819:
1820: VI_UNLOCK(devvp);
1821: MALLOC(lkp, struct lock *, sizeof(struct lock),
1822: M_UFSMNT, M_WAITOK);
1823: lockinit(lkp, PVFS, "snaplk", 0, LK_CANRECURSE);
1824: VI_LOCK(vp);
1825: vp->v_vnlock = lkp;
1826: }
1827: vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY);
1828: transferlockers(&vp->v_lock, vp->v_vnlock);
1829: lockmgr(&vp->v_lock, LK_RELEASE, NULL);
1830: /*
1831: * Link it onto the active snapshot list.
1832: */
1833: VI_LOCK(devvp);
1834: if (ip->i_nextsnap.tqe_prev != 0)
1.19 christos 1835: panic("ffs_snapshot_mount: %llu already on list",
1836: (unsigned long long)ip->i_number);
1.1 hannken 1837: else
1.47 hannken 1838: TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
1.1 hannken 1839: vp->v_flag |= VSYSTEM;
1840: VI_UNLOCK(devvp);
1841: VOP_UNLOCK(vp, 0);
1842: }
1843: /*
1844: * No usable snapshots found.
1845: */
1846: if (vp == NULL)
1847: return;
1848: /*
1.15 hannken 1849: * Attach the block hints list. We always want to
1.1 hannken 1850: * use the list from the newest snapshot.
1.15 hannken 1851: */
1.47 hannken 1852: xp = TAILQ_LAST(&si->si_snapshots, inodelst);
1.1 hannken 1853: VI_LOCK(devvp);
1.47 hannken 1854: si->si_snapblklist = xp->i_snapblklist;
1.1 hannken 1855: VI_UNLOCK(devvp);
1856: vn_cow_establish(devvp, ffs_copyonwrite, devvp);
1857: }
1858:
1859: /*
1860: * Disassociate snapshot files when unmounting.
1861: */
1862: void
1.18 thorpej 1863: ffs_snapshot_unmount(struct mount *mp)
1.1 hannken 1864: {
1.47 hannken 1865: struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
1.1 hannken 1866: struct lock *lkp = NULL;
1867: struct inode *xp;
1868: struct vnode *vp;
1.47 hannken 1869: struct snap_info *si;
1.1 hannken 1870:
1.47 hannken 1871: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1872: return;
1.1 hannken 1873: VI_LOCK(devvp);
1.47 hannken 1874: while ((xp = TAILQ_FIRST(&si->si_snapshots)) != 0) {
1.1 hannken 1875: vp = ITOV(xp);
1876: lkp = vp->v_vnlock;
1877: vp->v_vnlock = &vp->v_lock;
1.47 hannken 1878: TAILQ_REMOVE(&si->si_snapshots, xp, i_nextsnap);
1.1 hannken 1879: xp->i_nextsnap.tqe_prev = 0;
1.47 hannken 1880: if (xp->i_snapblklist == si->si_snapblklist)
1881: si->si_snapblklist = NULL;
1.15 hannken 1882: VI_UNLOCK(devvp);
1883: FREE(xp->i_snapblklist, M_UFSMNT);
1884: if (xp->i_ffs_effnlink > 0)
1.1 hannken 1885: vrele(vp);
1.15 hannken 1886: VI_LOCK(devvp);
1.1 hannken 1887: }
1888: VI_UNLOCK(devvp);
1889: if (lkp != NULL) {
1890: vn_cow_disestablish(devvp, ffs_copyonwrite, devvp);
1891: FREE(lkp, M_UFSMNT);
1892: }
1893: }
1894:
1895: /*
1896: * Check for need to copy block that is about to be written,
1897: * copying the block if necessary.
1898: */
1899: static int
1.18 thorpej 1900: ffs_copyonwrite(void *v, struct buf *bp)
1.1 hannken 1901: {
1902: struct buf *ibp;
1903: struct fs *fs;
1904: struct inode *ip;
1905: struct vnode *devvp = v, *vp = 0, *saved_vp = NULL;
1.47 hannken 1906: struct mount *mp = devvp->v_specmountpoint;
1907: struct snap_info *si;
1.43 christos 1908: void *saved_data = NULL;
1.1 hannken 1909: ufs2_daddr_t lbn, blkno, *snapblklist;
1.4 hannken 1910: int lower, upper, mid, s, ns, indiroff, snapshot_locked = 0, error = 0;
1.1 hannken 1911:
1912: /*
1913: * Check for valid snapshots.
1914: */
1.47 hannken 1915: if ((si = mount_getspecific(mp, si_mount_data_key)) == NULL)
1916: return 0;
1.1 hannken 1917: VI_LOCK(devvp);
1.47 hannken 1918: ip = TAILQ_FIRST(&si->si_snapshots);
1.1 hannken 1919: if (ip == NULL) {
1920: VI_UNLOCK(devvp);
1.11 perry 1921: return 0;
1.1 hannken 1922: }
1923: /*
1924: * First check to see if it is in the preallocated list.
1925: * By doing this check we avoid several potential deadlocks.
1926: */
1927: fs = ip->i_fs;
1928: ns = UFS_FSNEEDSWAP(fs);
1929: lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
1.47 hannken 1930: snapblklist = si->si_snapblklist;
1931: upper = si->si_snapblklist[0] - 1;
1.1 hannken 1932: lower = 1;
1933: while (lower <= upper) {
1934: mid = (lower + upper) / 2;
1.15 hannken 1935: if (snapblklist[mid] == lbn)
1.1 hannken 1936: break;
1.15 hannken 1937: if (snapblklist[mid] < lbn)
1.1 hannken 1938: lower = mid + 1;
1939: else
1940: upper = mid - 1;
1941: }
1942: if (lower <= upper) {
1943: VI_UNLOCK(devvp);
1944: return 0;
1945: }
1946: /*
1947: * Not in the precomputed list, so check the snapshots.
1948: */
1949: retry:
1.47 hannken 1950: TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
1.1 hannken 1951: vp = ITOV(ip);
1952: /*
1953: * We ensure that everything of our own that needs to be
1954: * copied will be done at the time that ffs_snapshot is
1955: * called. Thus we can skip the check here which can
1.22 yamt 1956: * deadlock in doing the lookup in ffs_balloc.
1.1 hannken 1957: */
1958: if (bp->b_vp == vp)
1959: continue;
1960: /*
1961: * Check to see if block needs to be copied. We do not have
1962: * to hold the snapshot lock while doing this lookup as it
1963: * will never require any additional allocations for the
1964: * snapshot inode.
1965: */
1966: if (lbn < NDADDR) {
1967: blkno = db_get(ip, lbn);
1968: } else {
1969: if (snapshot_locked == 0 &&
1970: lockmgr(vp->v_vnlock,
1971: LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
1972: VI_MTX(devvp)) != 0) {
1973: VI_LOCK(devvp);
1974: goto retry;
1975: }
1976: snapshot_locked = 1;
1.4 hannken 1977: s = cow_enter();
1.22 yamt 1978: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.1 hannken 1979: fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
1.4 hannken 1980: cow_leave(s);
1.1 hannken 1981: if (error)
1982: break;
1983: indiroff = (lbn - NDADDR) % NINDIR(fs);
1984: blkno = idb_get(ip, ibp->b_data, indiroff);
1985: brelse(ibp);
1986: }
1987: #ifdef DIAGNOSTIC
1988: if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
1989: panic("ffs_copyonwrite: bad copy block");
1990: #endif
1991: if (blkno != 0)
1992: continue;
1.4 hannken 1993: #ifdef DIAGNOSTIC
1.41 ad 1994: if (curlwp->l_pflag & LP_UFSCOW)
1.4 hannken 1995: printf("ffs_copyonwrite: recursive call\n");
1996: #endif
1.1 hannken 1997: /*
1998: * Allocate the block into which to do the copy. Since
1999: * multiple processes may all try to copy the same block,
2000: * we have to recheck our need to do a copy if we sleep
2001: * waiting for the lock.
2002: *
2003: * Because all snapshots on a filesystem share a single
2004: * lock, we ensure that we will never be in competition
2005: * with another process to allocate a block.
2006: */
2007: if (snapshot_locked == 0 &&
2008: lockmgr(vp->v_vnlock,
2009: LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
2010: VI_MTX(devvp)) != 0) {
2011: VI_LOCK(devvp);
2012: goto retry;
2013: }
2014: snapshot_locked = 1;
2015: #ifdef DEBUG
2016: if (snapdebug) {
1.19 christos 2017: printf("Copyonwrite: snapino %llu lbn %" PRId64 " for ",
2018: (unsigned long long)ip->i_number, lbn);
1.1 hannken 2019: if (bp->b_vp == devvp)
2020: printf("fs metadata");
2021: else
1.19 christos 2022: printf("inum %llu", (unsigned long long)
2023: VTOI(bp->b_vp)->i_number);
1.4 hannken 2024: printf(" lblkno %" PRId64 "\n", bp->b_lblkno);
1.1 hannken 2025: }
2026: #endif
2027: /*
2028: * If we have already read the old block contents, then
2029: * simply copy them to the new block. Note that we need
2030: * to synchronously write snapshots that have not been
2031: * unlinked, and hence will be visible after a crash,
2032: * to ensure their integrity.
2033: */
2034: if (saved_data) {
2035: error = writevnblk(vp, saved_data, lbn);
2036: if (error)
2037: break;
2038: continue;
2039: }
2040: /*
2041: * Otherwise, read the old block contents into the buffer.
2042: */
2043: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
2044: saved_vp = vp;
2045: if ((error = readfsblk(vp, saved_data, lbn)) != 0) {
2046: free(saved_data, M_UFSMNT);
2047: saved_data = NULL;
2048: break;
2049: }
2050: }
2051: /*
2052: * Note that we need to synchronously write snapshots that
2053: * have not been unlinked, and hence will be visible after
2054: * a crash, to ensure their integrity.
2055: */
2056: if (saved_data) {
2057: error = writevnblk(saved_vp, saved_data, lbn);
2058: free(saved_data, M_UFSMNT);
2059: }
2060: if (snapshot_locked)
2061: VOP_UNLOCK(vp, 0);
2062: else
2063: VI_UNLOCK(devvp);
2064: return error;
2065: }
2066:
2067: /*
2068: * Read the specified block from disk. Vp is usually a snapshot vnode.
2069: */
2070: static int
1.43 christos 2071: readfsblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
1.1 hannken 2072: {
1.24 yamt 2073: int error;
1.1 hannken 2074: struct inode *ip = VTOI(vp);
2075: struct fs *fs = ip->i_fs;
2076: struct buf *nbp;
2077:
1.24 yamt 2078: nbp = getiobuf();
1.1 hannken 2079: nbp->b_flags = B_READ;
2080: nbp->b_bcount = nbp->b_bufsize = fs->fs_bsize;
2081: nbp->b_error = 0;
2082: nbp->b_data = data;
2083: nbp->b_blkno = nbp->b_rawblkno = fsbtodb(fs, blkstofrags(fs, lbn));
2084: nbp->b_proc = NULL;
2085: nbp->b_dev = ip->i_devvp->v_rdev;
2086: nbp->b_vp = NULLVP;
2087:
1.44 ad 2088: bdev_strategy(nbp);
1.1 hannken 2089:
2090: error = biowait(nbp);
2091:
1.24 yamt 2092: putiobuf(nbp);
1.1 hannken 2093:
2094: return error;
2095: }
2096:
1.37 christos 2097: #if !defined(FFS_NO_SNAPSHOT)
1.1 hannken 2098: /*
1.4 hannken 2099: * Read the specified block. Bypass UBC to prevent deadlocks.
1.1 hannken 2100: */
2101: static int
1.43 christos 2102: readvnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
1.1 hannken 2103: {
1.4 hannken 2104: int error;
2105: daddr_t bn;
2106: off_t offset;
1.1 hannken 2107: struct inode *ip = VTOI(vp);
2108: struct fs *fs = ip->i_fs;
2109:
1.4 hannken 2110: error = VOP_BMAP(vp, lbn, NULL, &bn, NULL);
2111: if (error)
2112: return error;
1.1 hannken 2113:
1.4 hannken 2114: if (bn != (daddr_t)-1) {
2115: offset = dbtob(bn);
2116: simple_lock(&vp->v_interlock);
2117: error = VOP_PUTPAGES(vp, trunc_page(offset),
2118: round_page(offset+fs->fs_bsize),
2119: PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
1.1 hannken 2120: if (error)
1.4 hannken 2121: return error;
2122:
2123: return readfsblk(vp, data, fragstoblks(fs, dbtofsb(fs, bn)));
1.1 hannken 2124: }
2125:
1.4 hannken 2126: bzero(data, fs->fs_bsize);
2127:
2128: return 0;
1.1 hannken 2129: }
1.37 christos 2130: #endif /* !defined(FFS_NO_SNAPSHOT) */
1.1 hannken 2131:
2132: /*
1.4 hannken 2133: * Write the specified block. Bypass UBC to prevent deadlocks.
1.1 hannken 2134: */
2135: static int
1.43 christos 2136: writevnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
1.1 hannken 2137: {
1.4 hannken 2138: int s, error;
2139: off_t offset;
2140: struct buf *bp;
1.1 hannken 2141: struct inode *ip = VTOI(vp);
2142: struct fs *fs = ip->i_fs;
2143:
1.4 hannken 2144: offset = lblktosize(fs, (off_t)lbn);
2145: s = cow_enter();
2146: simple_lock(&vp->v_interlock);
2147: error = VOP_PUTPAGES(vp, trunc_page(offset),
2148: round_page(offset+fs->fs_bsize), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
2149: if (error == 0)
1.22 yamt 2150: error = ffs_balloc(vp, lblktosize(fs, (off_t)lbn),
1.4 hannken 2151: fs->fs_bsize, KERNCRED, B_SYNC, &bp);
2152: cow_leave(s);
1.1 hannken 2153: if (error)
2154: return error;
2155:
1.4 hannken 2156: bcopy(data, bp->b_data, fs->fs_bsize);
2157: bp->b_flags |= B_NOCACHE;
2158:
2159: return bwrite(bp);
2160: }
2161:
2162: /*
1.41 ad 2163: * Set/reset lwp's LP_UFSCOW flag.
1.4 hannken 2164: * May be called recursive.
2165: */
2166: static inline int
2167: cow_enter(void)
2168: {
2169: struct lwp *l = curlwp;
2170:
1.41 ad 2171: if (l->l_pflag & LP_UFSCOW) {
1.4 hannken 2172: return 0;
2173: } else {
1.41 ad 2174: l->l_pflag |= LP_UFSCOW;
2175: return LP_UFSCOW;
1.1 hannken 2176: }
1.4 hannken 2177: }
2178:
2179: static inline void
2180: cow_leave(int flag)
2181: {
2182: struct lwp *l = curlwp;
1.1 hannken 2183:
1.41 ad 2184: l->l_pflag &= ~flag;
1.1 hannken 2185: }
2186:
2187: /*
2188: * Get/Put direct block from inode or buffer containing disk addresses. Take
2189: * care for fs type (UFS1/UFS2) and byte swapping. These functions should go
2190: * into a global include.
2191: */
2192: static inline ufs2_daddr_t
2193: db_get(struct inode *ip, int loc)
2194: {
2195: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2196: return ufs_rw32(ip->i_ffs1_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2197: else
1.2 hannken 2198: return ufs_rw64(ip->i_ffs2_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2199: }
2200:
2201: static inline void
2202: db_assign(struct inode *ip, int loc, ufs2_daddr_t val)
2203: {
2204: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2205: ip->i_ffs1_db[loc] = ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2206: else
1.2 hannken 2207: ip->i_ffs2_db[loc] = ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2208: }
2209:
2210: static inline ufs2_daddr_t
1.43 christos 2211: idb_get(struct inode *ip, void *bf, int loc)
1.1 hannken 2212: {
2213: if (ip->i_ump->um_fstype == UFS1)
1.17 christos 2214: return ufs_rw32(((ufs1_daddr_t *)(bf))[loc],
1.2 hannken 2215: UFS_IPNEEDSWAP(ip));
1.1 hannken 2216: else
1.17 christos 2217: return ufs_rw64(((ufs2_daddr_t *)(bf))[loc],
1.2 hannken 2218: UFS_IPNEEDSWAP(ip));
1.1 hannken 2219: }
2220:
2221: static inline void
1.43 christos 2222: idb_assign(struct inode *ip, void *bf, int loc, ufs2_daddr_t val)
1.1 hannken 2223: {
2224: if (ip->i_ump->um_fstype == UFS1)
1.17 christos 2225: ((ufs1_daddr_t *)(bf))[loc] =
1.2 hannken 2226: ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2227: else
1.17 christos 2228: ((ufs2_daddr_t *)(bf))[loc] =
1.2 hannken 2229: ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2230: }
CVSweb <webmaster@jp.NetBSD.org>