Annotation of src/sys/ufs/ffs/ffs_snapshot.c, Revision 1.149
1.149 ! chs 1: /* $NetBSD: ffs_snapshot.c,v 1.148 2017/04/01 19:35:56 riastradh Exp $ */
1.18 thorpej 2:
1.1 hannken 3: /*
4: * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
5: *
6: * Further information about snapshots can be obtained from:
7: *
8: * Marshall Kirk McKusick http://www.mckusick.com/softdep/
9: * 1614 Oxford Street mckusick@mckusick.com
10: * Berkeley, CA 94709-1608 +1-510-843-9542
11: * USA
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: *
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: *
23: * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
24: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26: * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
27: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: *
35: * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00
36: *
37: * from FreeBSD: ffs_snapshot.c,v 1.79 2004/02/13 02:02:06 kuriyama Exp
38: */
39:
40: #include <sys/cdefs.h>
1.149 ! chs 41: __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.148 2017/04/01 19:35:56 riastradh Exp $");
1.8 hannken 42:
43: #if defined(_KERNEL_OPT)
44: #include "opt_ffs.h"
1.111 bouyer 45: #include "opt_quota.h"
1.8 hannken 46: #endif
1.1 hannken 47:
48: #include <sys/param.h>
49: #include <sys/kernel.h>
50: #include <sys/systm.h>
51: #include <sys/conf.h>
52: #include <sys/buf.h>
53: #include <sys/proc.h>
54: #include <sys/namei.h>
55: #include <sys/sched.h>
56: #include <sys/stat.h>
57: #include <sys/malloc.h>
58: #include <sys/mount.h>
59: #include <sys/resource.h>
60: #include <sys/resourcevar.h>
61: #include <sys/vnode.h>
1.29 elad 62: #include <sys/kauth.h>
1.40 hannken 63: #include <sys/fstrans.h>
1.75 hannken 64: #include <sys/wapbl.h>
1.1 hannken 65:
66: #include <miscfs/specfs/specdev.h>
67:
68: #include <ufs/ufs/quota.h>
69: #include <ufs/ufs/ufsmount.h>
70: #include <ufs/ufs/inode.h>
71: #include <ufs/ufs/ufs_extern.h>
72: #include <ufs/ufs/ufs_bswap.h>
1.75 hannken 73: #include <ufs/ufs/ufs_wapbl.h>
1.1 hannken 74:
75: #include <ufs/ffs/fs.h>
76: #include <ufs/ffs/ffs_extern.h>
77:
1.72 hannken 78: #include <uvm/uvm.h>
79:
1.137 matt 80: TAILQ_HEAD(inodelst, inode); /* List of active snapshots */
81:
1.97 hannken 82: struct snap_info {
83: kmutex_t si_lock; /* Lock this snapinfo */
84: kmutex_t si_snaplock; /* Snapshot vnode common lock */
1.140 maxv 85: lwp_t *si_owner; /* Snaplock owner */
1.137 matt 86: struct inodelst si_snapshots; /* List of active snapshots */
1.97 hannken 87: daddr_t *si_snapblklist; /* Snapshot block hints list */
88: uint32_t si_gen; /* Incremented on change */
89: };
90:
1.10 hannken 91: #if !defined(FFS_NO_SNAPSHOT)
1.76 hannken 92: typedef int (*acctfunc_t)
93: (struct vnode *, void *, int, int, struct fs *, daddr_t, int);
94:
1.79 hannken 95: static int snapshot_setup(struct mount *, struct vnode *);
96: static int snapshot_copyfs(struct mount *, struct vnode *, void **);
97: static int snapshot_expunge(struct mount *, struct vnode *,
98: struct fs *, daddr_t *, daddr_t **);
99: static int snapshot_expunge_snap(struct mount *, struct vnode *,
100: struct fs *, daddr_t);
101: static int snapshot_writefs(struct mount *, struct vnode *, void *);
102: static int cgaccount(struct vnode *, int, int *);
103: static int cgaccount1(int, struct vnode *, void *, int);
1.76 hannken 104: static int expunge(struct vnode *, struct inode *, struct fs *,
105: acctfunc_t, int);
106: static int indiracct(struct vnode *, struct vnode *, int, daddr_t,
107: daddr_t, daddr_t, daddr_t, daddr_t, struct fs *, acctfunc_t, int);
108: static int fullacct(struct vnode *, void *, int, int, struct fs *,
109: daddr_t, int);
110: static int snapacct(struct vnode *, void *, int, int, struct fs *,
111: daddr_t, int);
112: static int mapacct(struct vnode *, void *, int, int, struct fs *,
113: daddr_t, int);
1.10 hannken 114: #endif /* !defined(FFS_NO_SNAPSHOT) */
115:
1.55 hannken 116: static int ffs_copyonwrite(void *, struct buf *, bool);
1.72 hannken 117: static int snapblkaddr(struct vnode *, daddr_t, daddr_t *);
1.79 hannken 118: static int rwfsblk(struct vnode *, int, void *, daddr_t);
1.75 hannken 119: static int syncsnap(struct vnode *);
1.79 hannken 120: static int wrsnapblk(struct vnode *, void *, daddr_t);
1.107 he 121: #if !defined(FFS_NO_SNAPSHOT)
1.103 hannken 122: static int blocks_in_journal(struct fs *);
1.107 he 123: #endif
1.76 hannken 124:
1.97 hannken 125: static inline bool is_active_snapshot(struct snap_info *, struct inode *);
1.79 hannken 126: static inline daddr_t db_get(struct inode *, int);
127: static inline void db_assign(struct inode *, int, daddr_t);
128: static inline daddr_t ib_get(struct inode *, int);
129: static inline daddr_t idb_get(struct inode *, void *, int);
130: static inline void idb_assign(struct inode *, void *, int, daddr_t);
1.1 hannken 131:
132: #ifdef DEBUG
133: static int snapdebug = 0;
134: #endif
1.47 hannken 135:
1.66 hannken 136: int
137: ffs_snapshot_init(struct ufsmount *ump)
1.47 hannken 138: {
1.66 hannken 139: struct snap_info *si;
140:
141: si = ump->um_snapinfo = kmem_alloc(sizeof(*si), KM_SLEEP);
142: TAILQ_INIT(&si->si_snapshots);
143: mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE);
1.74 hannken 144: mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE);
1.106 hannken 145: si->si_owner = NULL;
1.66 hannken 146: si->si_gen = 0;
147: si->si_snapblklist = NULL;
1.47 hannken 148:
1.66 hannken 149: return 0;
1.47 hannken 150: }
151:
152: void
1.66 hannken 153: ffs_snapshot_fini(struct ufsmount *ump)
1.47 hannken 154: {
1.66 hannken 155: struct snap_info *si;
1.47 hannken 156:
1.66 hannken 157: si = ump->um_snapinfo;
158: ump->um_snapinfo = NULL;
1.47 hannken 159:
160: KASSERT(TAILQ_EMPTY(&si->si_snapshots));
1.49 hannken 161: mutex_destroy(&si->si_lock);
1.74 hannken 162: mutex_destroy(&si->si_snaplock);
1.47 hannken 163: KASSERT(si->si_snapblklist == NULL);
1.63 ad 164: kmem_free(si, sizeof(*si));
1.47 hannken 165: }
166:
1.1 hannken 167: /*
168: * Create a snapshot file and initialize it for the filesystem.
1.4 hannken 169: * Vnode is locked on entry and return.
1.1 hannken 170: */
171: int
1.79 hannken 172: ffs_snapshot(struct mount *mp, struct vnode *vp, struct timespec *ctime)
1.1 hannken 173: {
1.10 hannken 174: #if defined(FFS_NO_SNAPSHOT)
175: return EOPNOTSUPP;
176: }
177: #else /* defined(FFS_NO_SNAPSHOT) */
1.115 hannken 178: bool suspended = false;
1.79 hannken 179: int error, redo = 0, snaploc;
180: void *sbbuf = NULL;
181: daddr_t *snaplist = NULL, snaplistsize = 0;
182: struct buf *bp, *nbp;
1.102 matt 183: struct fs *copy_fs = NULL;
184: struct fs *fs = VFSTOUFS(mp)->um_fs;
1.79 hannken 185: struct inode *ip = VTOI(vp);
186: struct lwp *l = curlwp;
187: struct snap_info *si = VFSTOUFS(mp)->um_snapinfo;
188: struct timespec ts;
1.1 hannken 189: struct timeval starttime;
190: #ifdef DEBUG
191: struct timeval endtime;
192: #endif
1.79 hannken 193: struct vnode *devvp = ip->i_devvp;
1.1 hannken 194:
195: /*
196: * If the vnode already is a snapshot, return.
197: */
1.140 maxv 198: if ((ip->i_flags & SF_SNAPSHOT)) {
199: if ((ip->i_flags & SF_SNAPINVAL))
1.113 hannken 200: return EINVAL;
1.1 hannken 201: if (ctime) {
1.140 maxv 202: ctime->tv_sec = DIP(ip, mtime);
203: ctime->tv_nsec = DIP(ip, mtimensec);
1.1 hannken 204: }
205: return 0;
206: }
207: /*
1.79 hannken 208: * Check for free snapshot slot in the superblock.
209: */
210: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
211: if (fs->fs_snapinum[snaploc] == 0)
212: break;
213: if (snaploc == FSMAXSNAP)
214: return (ENOSPC);
215: /*
216: * Prepare the vnode to become a snapshot.
217: */
218: error = snapshot_setup(mp, vp);
219: if (error)
220: goto out;
1.111 bouyer 221:
1.79 hannken 222: /*
223: * Copy all the cylinder group maps. Although the
224: * filesystem is still active, we hope that only a few
225: * cylinder groups will change between now and when we
226: * suspend operations. Thus, we will be able to quickly
227: * touch up the few cylinder groups that changed during
228: * the suspension period.
229: */
230: error = cgaccount(vp, 1, NULL);
231: if (error)
232: goto out;
1.111 bouyer 233:
234: /*
235: * snapshot is now valid
236: */
237: ip->i_flags &= ~SF_SNAPINVAL;
238: DIP_ASSIGN(ip, flags, ip->i_flags);
239: ip->i_flag |= IN_CHANGE | IN_UPDATE;
240:
1.79 hannken 241: /*
242: * Ensure that the snapshot is completely on disk.
243: * Since we have marked it as a snapshot it is safe to
244: * unlock it as no process will be allowed to write to it.
245: */
246: error = VOP_FSYNC(vp, l->l_cred, FSYNC_WAIT, 0, 0);
247: if (error)
248: goto out;
1.99 hannken 249: VOP_UNLOCK(vp);
1.79 hannken 250: /*
251: * All allocations are done, so we can now suspend the filesystem.
252: */
253: error = vfs_suspend(vp->v_mount, 0);
1.144 hannken 254: if (error == 0) {
255: suspended = true;
1.145 hannken 256: vrele_flush(vp->v_mount);
1.144 hannken 257: error = VFS_SYNC(vp->v_mount, MNT_WAIT, curlwp->l_cred);
258: }
1.79 hannken 259: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
260: if (error)
261: goto out;
262: getmicrotime(&starttime);
263: /*
264: * First, copy all the cylinder group maps that have changed.
265: */
266: error = cgaccount(vp, 2, &redo);
267: if (error)
268: goto out;
269: /*
270: * Create a copy of the superblock and its summary information.
271: */
272: error = snapshot_copyfs(mp, vp, &sbbuf);
273: if (error)
274: goto out;
1.140 maxv 275: copy_fs = (struct fs *)((char *)sbbuf + ffs_blkoff(fs, fs->fs_sblockloc));
1.79 hannken 276: /*
277: * Expunge unlinked files from our view.
278: */
279: error = snapshot_expunge(mp, vp, copy_fs, &snaplistsize, &snaplist);
280: if (error)
281: goto out;
282: /*
283: * Record snapshot inode. Since this is the newest snapshot,
284: * it must be placed at the end of the list.
285: */
1.110 hannken 286: if (ip->i_nlink > 0)
287: fs->fs_snapinum[snaploc] = ip->i_number;
1.79 hannken 288:
289: mutex_enter(&si->si_lock);
1.97 hannken 290: if (is_active_snapshot(si, ip))
1.79 hannken 291: panic("ffs_snapshot: %"PRIu64" already on list", ip->i_number);
292: TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
293: if (TAILQ_FIRST(&si->si_snapshots) == ip) {
294: /*
295: * If this is the first snapshot on this filesystem, put the
296: * preliminary list in place and establish the cow handler.
297: */
298: si->si_snapblklist = snaplist;
299: fscow_establish(mp, ffs_copyonwrite, devvp);
300: }
301: si->si_gen++;
302: mutex_exit(&si->si_lock);
303:
304: vp->v_vflag |= VV_SYSTEM;
305: /*
306: * Set the mtime to the time the snapshot has been taken.
307: */
308: TIMEVAL_TO_TIMESPEC(&starttime, &ts);
309: if (ctime)
310: *ctime = ts;
311: DIP_ASSIGN(ip, mtime, ts.tv_sec);
312: DIP_ASSIGN(ip, mtimensec, ts.tv_nsec);
313: ip->i_flag |= IN_CHANGE | IN_UPDATE;
314: /*
315: * Copy allocation information from all snapshots and then
316: * expunge them from our view.
317: */
318: error = snapshot_expunge_snap(mp, vp, copy_fs, snaplistsize);
319: if (error)
320: goto out;
321: /*
322: * Write the superblock and its summary information to the snapshot.
323: */
324: error = snapshot_writefs(mp, vp, sbbuf);
325: if (error)
326: goto out;
327: /*
328: * We're nearly done, ensure that the snapshot is completely on disk.
329: */
330: error = VOP_FSYNC(vp, l->l_cred, FSYNC_WAIT, 0, 0);
331: if (error)
332: goto out;
333: /*
334: * Invalidate and free all pages on the snapshot vnode.
335: * We will read and write through the buffercache.
336: */
1.116 rmind 337: mutex_enter(vp->v_interlock);
1.79 hannken 338: error = VOP_PUTPAGES(vp, 0, 0,
339: PGO_ALLPAGES | PGO_CLEANIT | PGO_SYNCIO | PGO_FREE);
340: if (error)
341: goto out;
342: /*
343: * Invalidate short ( < fs_bsize ) buffers. We will always read
344: * full size buffers later.
345: */
346: mutex_enter(&bufcache_lock);
347: KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL);
348: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
349: nbp = LIST_NEXT(bp, b_vnbufs);
1.122 hannken 350: if (bp->b_bcount == fs->fs_bsize)
351: continue;
352: error = bbusy(bp, false, 0, NULL);
353: if (error != 0) {
354: if (error == EPASSTHROUGH) {
355: nbp = LIST_FIRST(&vp->v_cleanblkhd);
356: continue;
357: }
358: break;
1.79 hannken 359: }
1.122 hannken 360: brelsel(bp, BC_INVAL | BC_VFLUSH);
1.79 hannken 361: }
362: mutex_exit(&bufcache_lock);
363:
364: out:
365: if (sbbuf != NULL) {
366: free(copy_fs->fs_csp, M_UFSMNT);
367: free(sbbuf, M_UFSMNT);
368: }
369: if (fs->fs_active != NULL) {
370: free(fs->fs_active, M_DEVBUF);
371: fs->fs_active = NULL;
372: }
373:
374: mutex_enter(&si->si_lock);
375: if (snaplist != NULL) {
376: if (si->si_snapblklist == snaplist)
377: si->si_snapblklist = NULL;
378: free(snaplist, M_UFSMNT);
379: }
380: if (error) {
381: fs->fs_snapinum[snaploc] = 0;
382: } else {
383: /*
384: * As this is the newest list, it is the most inclusive, so
385: * should replace the previous list.
386: */
387: si->si_snapblklist = ip->i_snapblklist;
388: }
389: si->si_gen++;
390: mutex_exit(&si->si_lock);
391:
1.115 hannken 392: if (suspended) {
393: VOP_UNLOCK(vp);
1.79 hannken 394: vfs_resume(vp->v_mount);
1.115 hannken 395: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
396: #ifdef DEBUG
397: getmicrotime(&endtime);
398: timersub(&endtime, &starttime, &endtime);
399: printf("%s: suspended %lld.%03d sec, redo %d of %d\n",
400: mp->mnt_stat.f_mntonname, (long long)endtime.tv_sec,
401: endtime.tv_usec / 1000, redo, fs->fs_ncg);
402: #endif
1.79 hannken 403: }
404: if (error) {
1.136 dholland 405: if (UFS_WAPBL_BEGIN(mp) == 0) {
1.143 jdolecek 406: /*
407: * We depend on ffs_truncate() to call ffs_snapremove()
408: * before it may return an error. On failed
409: * ffs_truncate() we have normal file with leaked
410: * (meta-) data, but no snapshot to use.
411: */
1.79 hannken 412: (void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
413: UFS_WAPBL_END(mp);
414: }
1.110 hannken 415: } else if (ip->i_nlink > 0)
1.104 hannken 416: vref(vp);
1.79 hannken 417: return (error);
418: }
419:
420: /*
421: * Prepare vnode to become a snapshot.
422: */
423: static int
424: snapshot_setup(struct mount *mp, struct vnode *vp)
425: {
1.112 hannken 426: int error, n, len, loc, cg;
1.79 hannken 427: daddr_t blkno, numblks;
428: struct buf *ibp, *nbp;
429: struct fs *fs = VFSTOUFS(mp)->um_fs;
430: struct lwp *l = curlwp;
1.103 hannken 431: const int wbreak = blocks_in_journal(fs)/8;
1.111 bouyer 432: struct inode *ip = VTOI(vp);
1.79 hannken 433:
434: /*
1.135 hannken 435: * Check mount, readonly reference and owner.
1.1 hannken 436: */
1.4 hannken 437: if (vp->v_mount != mp)
1.1 hannken 438: return EXDEV;
1.135 hannken 439: if (vp->v_writecount != 0)
1.1 hannken 440: return EBUSY;
1.119 elad 441: error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_SNAPSHOT,
442: 0, mp, vp, NULL);
443: if (error)
1.9 hannken 444: return EACCES;
445:
1.1 hannken 446: if (vp->v_size != 0) {
1.143 jdolecek 447: /*
448: * Must completely truncate the file here. Allocated
449: * blocks on a snapshot mean that block has been copied
450: * on write, see ffs_copyonwrite() testing "blkno != 0"
451: */
452: error = ufs_truncate_retry(vp, 0, NOCRED);
1.142 jdolecek 453: if (error)
454: return error;
1.1 hannken 455: }
1.111 bouyer 456:
457: /* Change inode to snapshot type file. */
458: error = UFS_WAPBL_BEGIN(mp);
459: if (error)
460: return error;
461: #if defined(QUOTA) || defined(QUOTA2)
462: /* shapshot inodes are not accounted in quotas */
463: chkiq(ip, -1, l->l_cred, 0);
464: #endif
465: ip->i_flags |= (SF_SNAPSHOT | SF_SNAPINVAL);
466: DIP_ASSIGN(ip, flags, ip->i_flags);
467: ip->i_flag |= IN_CHANGE | IN_UPDATE;
468: ffs_update(vp, NULL, NULL, UPDATE_WAIT);
469: UFS_WAPBL_END(mp);
470:
471: KASSERT(ip->i_flags & SF_SNAPSHOT);
1.1 hannken 472: /*
1.16 hannken 473: * Write an empty list of preallocated blocks to the end of
474: * the snapshot to set size to at least that of the filesystem.
1.1 hannken 475: */
476: numblks = howmany(fs->fs_size, fs->fs_frag);
1.16 hannken 477: blkno = 1;
1.79 hannken 478: blkno = ufs_rw64(blkno, UFS_FSNEEDSWAP(fs));
1.1 hannken 479: error = vn_rdwr(UIO_WRITE, vp,
1.126 dholland 480: (void *)&blkno, sizeof(blkno), ffs_lblktosize(fs, (off_t)numblks),
1.31 ad 481: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, l->l_cred, NULL, NULL);
1.1 hannken 482: if (error)
1.79 hannken 483: return error;
1.1 hannken 484: /*
485: * Preallocate critical data structures so that we can copy
486: * them in without further allocation after we suspend all
487: * operations on the filesystem. We would like to just release
488: * the allocated buffers without writing them since they will
489: * be filled in below once we are ready to go, but this upsets
490: * the soft update code, so we go ahead and write the new buffers.
491: *
492: * Allocate all indirect blocks and mark all of them as not
493: * needing to be copied.
494: */
1.75 hannken 495: error = UFS_WAPBL_BEGIN(mp);
496: if (error)
1.79 hannken 497: return error;
1.124 dholland 498: for (blkno = UFS_NDADDR, n = 0; blkno < numblks; blkno += FFS_NINDIR(fs)) {
1.126 dholland 499: error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)blkno),
1.31 ad 500: fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
1.79 hannken 501: if (error)
1.1 hannken 502: goto out;
1.92 ad 503: brelse(ibp, 0);
1.103 hannken 504: if (wbreak > 0 && (++n % wbreak) == 0) {
1.75 hannken 505: UFS_WAPBL_END(mp);
506: error = UFS_WAPBL_BEGIN(mp);
507: if (error)
1.79 hannken 508: return error;
1.75 hannken 509: }
1.1 hannken 510: }
511: /*
512: * Allocate copies for the superblock and its summary information.
513: */
1.79 hannken 514: error = ffs_balloc(vp, fs->fs_sblockloc, fs->fs_sbsize, l->l_cred,
1.15 hannken 515: 0, &nbp);
1.79 hannken 516: if (error)
1.15 hannken 517: goto out;
518: bawrite(nbp);
1.127 dholland 519: blkno = ffs_fragstoblks(fs, fs->fs_csaddr);
1.15 hannken 520: len = howmany(fs->fs_cssize, fs->fs_bsize);
521: for (loc = 0; loc < len; loc++) {
1.126 dholland 522: error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)(blkno + loc)),
1.79 hannken 523: fs->fs_bsize, l->l_cred, 0, &nbp);
524: if (error)
1.1 hannken 525: goto out;
1.15 hannken 526: bawrite(nbp);
1.112 hannken 527: if (wbreak > 0 && (++n % wbreak) == 0) {
528: UFS_WAPBL_END(mp);
529: error = UFS_WAPBL_BEGIN(mp);
530: if (error)
531: return error;
532: }
533: }
534: /*
535: * Allocate all cylinder group blocks.
536: */
537: for (cg = 0; cg < fs->fs_ncg; cg++) {
1.126 dholland 538: error = ffs_balloc(vp, ffs_lfragtosize(fs, cgtod(fs, cg)),
1.112 hannken 539: fs->fs_bsize, l->l_cred, 0, &nbp);
540: if (error)
541: goto out;
542: bawrite(nbp);
543: if (wbreak > 0 && (++n % wbreak) == 0) {
544: UFS_WAPBL_END(mp);
545: error = UFS_WAPBL_BEGIN(mp);
546: if (error)
547: return error;
548: }
1.15 hannken 549: }
1.79 hannken 550:
551: out:
1.75 hannken 552: UFS_WAPBL_END(mp);
1.79 hannken 553: return error;
554: }
555:
556: /*
557: * Create a copy of the superblock and its summary information.
558: * It is up to the caller to free copyfs and copy_fs->fs_csp.
559: */
560: static int
561: snapshot_copyfs(struct mount *mp, struct vnode *vp, void **sbbuf)
562: {
563: int error, i, len, loc, size;
564: void *space;
565: int32_t *lp;
566: struct buf *bp;
567: struct fs *copyfs, *fs = VFSTOUFS(mp)->um_fs;
568: struct vnode *devvp = VTOI(vp)->i_devvp;
569:
1.1 hannken 570: /*
571: * Grab a copy of the superblock and its summary information.
572: * We delay writing it until the suspension is released below.
573: */
1.79 hannken 574: *sbbuf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.124 dholland 575: loc = ffs_blkoff(fs, fs->fs_sblockloc);
1.1 hannken 576: if (loc > 0)
1.79 hannken 577: memset(*sbbuf, 0, loc);
578: copyfs = (struct fs *)((char *)(*sbbuf) + loc);
1.95 tsutsui 579: memcpy(copyfs, fs, fs->fs_sbsize);
1.1 hannken 580: size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
581: if (fs->fs_sbsize < size)
1.79 hannken 582: memset((char *)(*sbbuf) + loc + fs->fs_sbsize, 0,
1.43 christos 583: size - fs->fs_sbsize);
1.126 dholland 584: size = ffs_blkroundup(fs, fs->fs_cssize);
1.1 hannken 585: if (fs->fs_contigsumsize > 0)
586: size += fs->fs_ncg * sizeof(int32_t);
1.79 hannken 587: space = malloc(size, M_UFSMNT, M_WAITOK);
588: copyfs->fs_csp = space;
1.95 tsutsui 589: memcpy(copyfs->fs_csp, fs->fs_csp, fs->fs_cssize);
1.12 yamt 590: space = (char *)space + fs->fs_cssize;
1.1 hannken 591: loc = howmany(fs->fs_cssize, fs->fs_fsize);
592: i = fs->fs_frag - loc % fs->fs_frag;
593: len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
594: if (len > 0) {
1.125 dholland 595: if ((error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + loc),
1.139 maxv 596: len, 0, &bp)) != 0) {
1.79 hannken 597: free(copyfs->fs_csp, M_UFSMNT);
598: free(*sbbuf, M_UFSMNT);
599: *sbbuf = NULL;
600: return error;
1.1 hannken 601: }
1.95 tsutsui 602: memcpy(space, bp->b_data, (u_int)len);
1.12 yamt 603: space = (char *)space + len;
1.52 ad 604: brelse(bp, BC_INVAL | BC_NOCACHE);
1.1 hannken 605: }
606: if (fs->fs_contigsumsize > 0) {
1.79 hannken 607: copyfs->fs_maxcluster = lp = space;
1.1 hannken 608: for (i = 0; i < fs->fs_ncg; i++)
609: *lp++ = fs->fs_contigsumsize;
610: }
1.79 hannken 611: if (mp->mnt_wapbl)
612: copyfs->fs_flags &= ~FS_DOWAPBL;
613: return 0;
614: }
615:
1.134 christos 616: struct snapshot_expunge_ctx {
617: struct vnode *logvp;
618: struct lwp *l;
619: struct vnode *vp;
620: struct fs *copy_fs;
621: };
622:
623: static bool
624: snapshot_expunge_selector(void *cl, struct vnode *xvp)
625: {
626: struct vattr vat;
627: struct snapshot_expunge_ctx *c = cl;
628: struct inode *xp;
629:
1.148 riastrad 630: KASSERT(mutex_owned(xvp->v_interlock));
631:
1.134 christos 632: xp = VTOI(xvp);
633: if (xvp->v_type == VNON || VTOI(xvp) == NULL ||
634: (xp->i_flags & SF_SNAPSHOT))
635: return false;
636: #ifdef DEBUG
637: if (snapdebug)
638: vprint("ffs_snapshot: busy vnode", xvp);
639: #endif
640:
641: if (xvp == c->logvp)
642: return true;
643:
644: if (VOP_GETATTR(xvp, &vat, c->l->l_cred) == 0 &&
645: vat.va_nlink > 0)
646: return false;
647:
648: if (ffs_checkfreefile(c->copy_fs, c->vp, xp->i_number))
649: return false;
650:
651: return true;
652: }
653:
1.79 hannken 654: /*
655: * We must check for active files that have been unlinked (e.g., with a zero
656: * link count). We have to expunge all trace of these files from the snapshot
657: * so that they are not reclaimed prematurely by fsck or unnecessarily dumped.
658: * Note that we skip unlinked snapshot files as they will be handled separately.
659: * Calculate the snapshot list size and create a preliminary list.
660: */
661: static int
662: snapshot_expunge(struct mount *mp, struct vnode *vp, struct fs *copy_fs,
663: daddr_t *snaplistsize, daddr_t **snaplist)
664: {
1.105 bouyer 665: int cg, error = 0, len, loc;
1.79 hannken 666: daddr_t blkno, *blkp;
667: struct fs *fs = VFSTOUFS(mp)->um_fs;
668: struct inode *xp;
669: struct lwp *l = curlwp;
1.133 hannken 670: struct vnode *logvp = NULL, *xvp;
671: struct vnode_iterator *marker;
1.134 christos 672: struct snapshot_expunge_ctx ctx;
1.79 hannken 673:
674: *snaplist = NULL;
1.1 hannken 675: /*
1.79 hannken 676: * Get the log inode if any.
677: */
678: if ((fs->fs_flags & FS_DOWAPBL) &&
679: fs->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM) {
680: error = VFS_VGET(mp,
681: fs->fs_journallocs[UFS_WAPBL_INFS_INO], &logvp);
682: if (error)
683: goto out;
684: }
685: /*
686: * We also calculate the needed size for the snapshot list.
687: */
688: *snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
689: FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
1.133 hannken 690:
691: vfs_vnode_iterator_init(mp, &marker);
1.134 christos 692: ctx.logvp = logvp;
693: ctx.l = l;
694: ctx.vp = vp;
695: ctx.copy_fs = copy_fs;
696: while ((xvp = vfs_vnode_iterator_next(marker, snapshot_expunge_selector,
697: &ctx)))
698: {
1.1 hannken 699: /*
700: * If there is a fragment, clear it here.
701: */
1.134 christos 702: xp = VTOI(xvp);
1.1 hannken 703: blkno = 0;
704: loc = howmany(xp->i_size, fs->fs_bsize) - 1;
1.121 dholland 705: if (loc < UFS_NDADDR) {
1.126 dholland 706: len = ffs_fragroundup(fs, ffs_blkoff(fs, xp->i_size));
1.5 hannken 707: if (len > 0 && len < fs->fs_bsize) {
1.103 hannken 708: error = UFS_WAPBL_BEGIN(mp);
709: if (error) {
1.133 hannken 710: vrele(xvp);
711: vfs_vnode_iterator_destroy(marker);
1.103 hannken 712: goto out;
713: }
1.83 joerg 714: ffs_blkfree_snap(copy_fs, vp, db_get(xp, loc),
1.1 hannken 715: len, xp->i_number);
716: blkno = db_get(xp, loc);
717: db_assign(xp, loc, 0);
1.103 hannken 718: UFS_WAPBL_END(mp);
1.1 hannken 719: }
720: }
1.79 hannken 721: *snaplistsize += 1;
1.76 hannken 722: error = expunge(vp, xp, copy_fs, fullacct, BLK_NOCOPY);
1.1 hannken 723: if (blkno)
724: db_assign(xp, loc, blkno);
1.103 hannken 725: if (!error) {
726: error = UFS_WAPBL_BEGIN(mp);
727: if (!error) {
728: error = ffs_freefile_snap(copy_fs, vp,
729: xp->i_number, xp->i_mode);
730: UFS_WAPBL_END(mp);
731: }
732: }
1.133 hannken 733: vrele(xvp);
1.1 hannken 734: if (error) {
1.133 hannken 735: vfs_vnode_iterator_destroy(marker);
1.75 hannken 736: goto out;
1.1 hannken 737: }
1.65 hannken 738: }
1.133 hannken 739: vfs_vnode_iterator_destroy(marker);
740:
1.1 hannken 741: /*
1.79 hannken 742: * Create a preliminary list of preallocated snapshot blocks.
1.1 hannken 743: */
1.79 hannken 744: *snaplist = malloc(*snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK);
745: blkp = &(*snaplist)[1];
1.126 dholland 746: *blkp++ = ffs_lblkno(fs, fs->fs_sblockloc);
1.127 dholland 747: blkno = ffs_fragstoblks(fs, fs->fs_csaddr);
1.79 hannken 748: for (cg = 0; cg < fs->fs_ncg; cg++) {
1.127 dholland 749: if (ffs_fragstoblks(fs, cgtod(fs, cg)) > blkno)
1.79 hannken 750: break;
1.127 dholland 751: *blkp++ = ffs_fragstoblks(fs, cgtod(fs, cg));
1.79 hannken 752: }
753: len = howmany(fs->fs_cssize, fs->fs_bsize);
754: for (loc = 0; loc < len; loc++)
755: *blkp++ = blkno + loc;
756: for (; cg < fs->fs_ncg; cg++)
1.127 dholland 757: *blkp++ = ffs_fragstoblks(fs, cgtod(fs, cg));
1.98 hannken 758: (*snaplist)[0] = blkp - &(*snaplist)[0];
1.79 hannken 759:
760: out:
761: if (logvp != NULL)
762: vput(logvp);
763: if (error && *snaplist != NULL) {
764: free(*snaplist, M_UFSMNT);
765: *snaplist = NULL;
766: }
767:
768: return error;
769: }
770:
771: /*
772: * Copy allocation information from all the snapshots in this snapshot and
773: * then expunge them from its view. Also, collect the list of allocated
774: * blocks in i_snapblklist.
775: */
776: static int
777: snapshot_expunge_snap(struct mount *mp, struct vnode *vp,
778: struct fs *copy_fs, daddr_t snaplistsize)
779: {
1.103 hannken 780: int error = 0, i;
1.79 hannken 781: daddr_t numblks, *snaplist = NULL;
782: struct fs *fs = VFSTOUFS(mp)->um_fs;
783: struct inode *ip = VTOI(vp), *xp;
784: struct lwp *l = curlwp;
785: struct snap_info *si = VFSTOUFS(mp)->um_snapinfo;
786:
1.47 hannken 787: TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) {
1.110 hannken 788: if (xp != ip) {
789: error = expunge(vp, xp, fs, snapacct, BLK_SNAP);
790: if (error)
791: break;
792: }
1.92 ad 793: if (xp->i_nlink != 0)
1.79 hannken 794: continue;
1.103 hannken 795: error = UFS_WAPBL_BEGIN(mp);
796: if (error)
797: break;
1.84 joerg 798: error = ffs_freefile_snap(copy_fs, vp, xp->i_number, xp->i_mode);
1.103 hannken 799: UFS_WAPBL_END(mp);
1.79 hannken 800: if (error)
801: break;
1.1 hannken 802: }
1.79 hannken 803: if (error)
804: goto out;
1.1 hannken 805: /*
806: * Allocate space for the full list of preallocated snapshot blocks.
807: */
1.79 hannken 808: snaplist = malloc(snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK);
809: ip->i_snapblklist = &snaplist[1];
1.1 hannken 810: /*
811: * Expunge the blocks used by the snapshots from the set of
812: * blocks marked as used in the snapshot bitmaps. Also, collect
813: * the list of allocated blocks in i_snapblklist.
814: */
1.79 hannken 815: error = expunge(vp, ip, copy_fs, mapacct, BLK_SNAP);
816: if (error)
817: goto out;
818: if (snaplistsize < ip->i_snapblklist - snaplist)
1.1 hannken 819: panic("ffs_snapshot: list too small");
1.79 hannken 820: snaplistsize = ip->i_snapblklist - snaplist;
821: snaplist[0] = snaplistsize;
822: ip->i_snapblklist = &snaplist[0];
1.1 hannken 823: /*
824: * Write out the list of allocated blocks to the end of the snapshot.
825: */
1.79 hannken 826: numblks = howmany(fs->fs_size, fs->fs_frag);
1.15 hannken 827: for (i = 0; i < snaplistsize; i++)
1.79 hannken 828: snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs));
829: error = vn_rdwr(UIO_WRITE, vp, (void *)snaplist,
1.126 dholland 830: snaplistsize * sizeof(daddr_t), ffs_lblktosize(fs, (off_t)numblks),
1.103 hannken 831: UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, l->l_cred, NULL, NULL);
1.15 hannken 832: for (i = 0; i < snaplistsize; i++)
1.79 hannken 833: snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs));
834: out:
835: if (error && snaplist != NULL) {
836: free(snaplist, M_UFSMNT);
837: ip->i_snapblklist = NULL;
1.1 hannken 838: }
1.79 hannken 839: return error;
840: }
841:
842: /*
843: * Write the superblock and its summary information to the snapshot.
1.121 dholland 844: * Make sure, the first UFS_NDADDR blocks get copied to the snapshot.
1.79 hannken 845: */
846: static int
847: snapshot_writefs(struct mount *mp, struct vnode *vp, void *sbbuf)
848: {
849: int error, len, loc;
850: void *space;
851: daddr_t blkno;
852: struct buf *bp;
853: struct fs *copyfs, *fs = VFSTOUFS(mp)->um_fs;
854: struct inode *ip = VTOI(vp);
855: struct lwp *l = curlwp;
856:
1.124 dholland 857: copyfs = (struct fs *)((char *)sbbuf + ffs_blkoff(fs, fs->fs_sblockloc));
1.79 hannken 858:
1.1 hannken 859: /*
860: * Write the superblock and its summary information
861: * to the snapshot.
862: */
1.127 dholland 863: blkno = ffs_fragstoblks(fs, fs->fs_csaddr);
1.1 hannken 864: len = howmany(fs->fs_cssize, fs->fs_bsize);
1.79 hannken 865: space = copyfs->fs_csp;
1.8 hannken 866: #ifdef FFS_EI
1.79 hannken 867: if (UFS_FSNEEDSWAP(fs)) {
868: ffs_sb_swap(copyfs, copyfs);
1.1 hannken 869: ffs_csum_swap(space, space, fs->fs_cssize);
870: }
1.8 hannken 871: #endif
1.75 hannken 872: error = UFS_WAPBL_BEGIN(mp);
1.79 hannken 873: if (error)
874: return error;
1.1 hannken 875: for (loc = 0; loc < len; loc++) {
1.139 maxv 876: error = bread(vp, blkno + loc, fs->fs_bsize,
1.79 hannken 877: B_MODIFY, &bp);
1.15 hannken 878: if (error) {
1.79 hannken 879: break;
1.1 hannken 880: }
1.95 tsutsui 881: memcpy(bp->b_data, space, fs->fs_bsize);
1.1 hannken 882: space = (char *)space + fs->fs_bsize;
1.79 hannken 883: bawrite(bp);
884: }
885: if (error)
886: goto out;
1.126 dholland 887: error = bread(vp, ffs_lblkno(fs, fs->fs_sblockloc),
1.139 maxv 888: fs->fs_bsize, B_MODIFY, &bp);
1.79 hannken 889: if (error) {
890: goto out;
891: } else {
1.95 tsutsui 892: memcpy(bp->b_data, sbbuf, fs->fs_bsize);
1.79 hannken 893: bawrite(bp);
1.1 hannken 894: }
895: /*
1.121 dholland 896: * Copy the first UFS_NDADDR blocks to the snapshot so
897: * ffs_copyonwrite() and ffs_snapblkfree() will always work on
898: * indirect blocks.
1.75 hannken 899: */
1.121 dholland 900: for (loc = 0; loc < UFS_NDADDR; loc++) {
1.75 hannken 901: if (db_get(ip, loc) != 0)
902: continue;
1.126 dholland 903: error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)loc),
1.79 hannken 904: fs->fs_bsize, l->l_cred, 0, &bp);
1.75 hannken 905: if (error)
906: break;
1.79 hannken 907: error = rwfsblk(vp, B_READ, bp->b_data, loc);
1.75 hannken 908: if (error) {
1.79 hannken 909: brelse(bp, 0);
910: break;
1.75 hannken 911: }
1.79 hannken 912: bawrite(bp);
1.75 hannken 913: }
1.79 hannken 914:
915: out:
1.75 hannken 916: UFS_WAPBL_END(mp);
1.79 hannken 917: return error;
918: }
919:
920: /*
921: * Copy all cylinder group maps.
922: */
923: static int
924: cgaccount(struct vnode *vp, int passno, int *redo)
925: {
1.103 hannken 926: int cg, error = 0;
1.79 hannken 927: struct buf *nbp;
928: struct fs *fs = VTOI(vp)->i_fs;
929:
930: if (redo != NULL)
931: *redo = 0;
932: if (passno == 1)
933: fs->fs_active = malloc(howmany(fs->fs_ncg, NBBY),
934: M_DEVBUF, M_WAITOK | M_ZERO);
935: for (cg = 0; cg < fs->fs_ncg; cg++) {
936: if (passno == 2 && ACTIVECG_ISSET(fs, cg))
937: continue;
1.103 hannken 938:
1.79 hannken 939: if (redo != NULL)
940: *redo += 1;
1.103 hannken 941: error = UFS_WAPBL_BEGIN(vp->v_mount);
942: if (error)
943: return error;
1.126 dholland 944: error = ffs_balloc(vp, ffs_lfragtosize(fs, cgtod(fs, cg)),
1.79 hannken 945: fs->fs_bsize, curlwp->l_cred, 0, &nbp);
1.103 hannken 946: if (error) {
947: UFS_WAPBL_END(vp->v_mount);
1.80 hannken 948: break;
1.103 hannken 949: }
1.79 hannken 950: error = cgaccount1(cg, vp, nbp->b_data, passno);
951: bawrite(nbp);
1.103 hannken 952: UFS_WAPBL_END(vp->v_mount);
1.75 hannken 953: if (error)
1.80 hannken 954: break;
1.15 hannken 955: }
1.80 hannken 956: return error;
1.1 hannken 957: }
958:
959: /*
960: * Copy a cylinder group map. All the unallocated blocks are marked
961: * BLK_NOCOPY so that the snapshot knows that it need not copy them
962: * if they are later written. If passno is one, then this is a first
963: * pass, so only setting needs to be done. If passno is 2, then this
964: * is a revision to a previous pass which must be undone as the
965: * replacement pass is done.
966: */
967: static int
1.79 hannken 968: cgaccount1(int cg, struct vnode *vp, void *data, int passno)
1.1 hannken 969: {
970: struct buf *bp, *ibp;
971: struct inode *ip;
972: struct cg *cgp;
973: struct fs *fs;
1.79 hannken 974: struct lwp *l = curlwp;
975: daddr_t base, numblks;
1.131 martin 976: int error, len, loc, ns __unused, indiroff;
1.1 hannken 977:
978: ip = VTOI(vp);
979: fs = ip->i_fs;
980: ns = UFS_FSNEEDSWAP(fs);
1.125 dholland 981: error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
1.139 maxv 982: (int)fs->fs_cgsize, 0, &bp);
1.1 hannken 983: if (error) {
984: return (error);
985: }
986: cgp = (struct cg *)bp->b_data;
987: if (!cg_chkmagic(cgp, ns)) {
1.52 ad 988: brelse(bp, 0);
1.1 hannken 989: return (EIO);
990: }
991: ACTIVECG_SET(fs, cg);
992:
1.95 tsutsui 993: memcpy(data, bp->b_data, fs->fs_cgsize);
1.52 ad 994: brelse(bp, 0);
1.1 hannken 995: if (fs->fs_cgsize < fs->fs_bsize)
1.43 christos 996: memset((char *)data + fs->fs_cgsize, 0,
1.1 hannken 997: fs->fs_bsize - fs->fs_cgsize);
998: numblks = howmany(fs->fs_size, fs->fs_frag);
999: len = howmany(fs->fs_fpg, fs->fs_frag);
1000: base = cg * fs->fs_fpg / fs->fs_frag;
1001: if (base + len >= numblks)
1002: len = numblks - base - 1;
1003: loc = 0;
1.121 dholland 1004: if (base < UFS_NDADDR) {
1005: for ( ; loc < UFS_NDADDR; loc++) {
1.1 hannken 1006: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
1007: db_assign(ip, loc, BLK_NOCOPY);
1008: else if (db_get(ip, loc) == BLK_NOCOPY) {
1009: if (passno == 2)
1010: db_assign(ip, loc, 0);
1011: else if (passno == 1)
1012: panic("ffs_snapshot: lost direct block");
1013: }
1014: }
1015: }
1.126 dholland 1016: if ((error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)(base + loc)),
1.79 hannken 1017: fs->fs_bsize, l->l_cred, B_METAONLY, &ibp)) != 0)
1.1 hannken 1018: return (error);
1.124 dholland 1019: indiroff = (base + loc - UFS_NDADDR) % FFS_NINDIR(fs);
1.1 hannken 1020: for ( ; loc < len; loc++, indiroff++) {
1.124 dholland 1021: if (indiroff >= FFS_NINDIR(fs)) {
1.15 hannken 1022: bawrite(ibp);
1.22 yamt 1023: if ((error = ffs_balloc(vp,
1.126 dholland 1024: ffs_lblktosize(fs, (off_t)(base + loc)),
1.79 hannken 1025: fs->fs_bsize, l->l_cred, B_METAONLY, &ibp)) != 0)
1.1 hannken 1026: return (error);
1027: indiroff = 0;
1028: }
1029: if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
1030: idb_assign(ip, ibp->b_data, indiroff, BLK_NOCOPY);
1031: else if (idb_get(ip, ibp->b_data, indiroff) == BLK_NOCOPY) {
1032: if (passno == 2)
1033: idb_assign(ip, ibp->b_data, indiroff, 0);
1034: else if (passno == 1)
1035: panic("ffs_snapshot: lost indirect block");
1036: }
1037: }
1.15 hannken 1038: bdwrite(ibp);
1.1 hannken 1039: return (0);
1040: }
1041:
1042: /*
1043: * Before expunging a snapshot inode, note all the
1044: * blocks that it claims with BLK_SNAP so that fsck will
1045: * be able to account for those blocks properly and so
1046: * that this snapshot knows that it need not copy them
1.76 hannken 1047: * if the other snapshot holding them is freed.
1.1 hannken 1048: */
1049: static int
1.76 hannken 1050: expunge(struct vnode *snapvp, struct inode *cancelip, struct fs *fs,
1051: acctfunc_t acctfunc, int expungetype)
1.1 hannken 1052: {
1.131 martin 1053: int i, error, ns __unused;
1.76 hannken 1054: daddr_t lbn, rlbn;
1055: daddr_t len, blkno, numblks, blksperindir;
1056: struct ufs1_dinode *dip1;
1057: struct ufs2_dinode *dip2;
1.79 hannken 1058: struct lwp *l = curlwp;
1.76 hannken 1059: void *bap;
1.1 hannken 1060: struct buf *bp;
1.103 hannken 1061: struct mount *mp;
1.1 hannken 1062:
1063: ns = UFS_FSNEEDSWAP(fs);
1.103 hannken 1064: mp = snapvp->v_mount;
1065:
1066: error = UFS_WAPBL_BEGIN(mp);
1067: if (error)
1068: return error;
1.1 hannken 1069: /*
1070: * Prepare to expunge the inode. If its inode block has not
1071: * yet been copied, then allocate and fill the copy.
1072: */
1.127 dholland 1073: lbn = ffs_fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
1.72 hannken 1074: error = snapblkaddr(snapvp, lbn, &blkno);
1.71 hannken 1075: if (error)
1076: return error;
1077: if (blkno != 0) {
1.139 maxv 1078: error = bread(snapvp, lbn, fs->fs_bsize,
1.71 hannken 1079: B_MODIFY, &bp);
1.1 hannken 1080: } else {
1.126 dholland 1081: error = ffs_balloc(snapvp, ffs_lblktosize(fs, (off_t)lbn),
1.79 hannken 1082: fs->fs_bsize, l->l_cred, 0, &bp);
1.71 hannken 1083: if (! error)
1.75 hannken 1084: error = rwfsblk(snapvp, B_READ, bp->b_data, lbn);
1.1 hannken 1085: }
1.103 hannken 1086: if (error) {
1087: UFS_WAPBL_END(mp);
1.1 hannken 1088: return error;
1.103 hannken 1089: }
1.1 hannken 1090: /*
1091: * Set a snapshot inode to be a zero length file, regular files
1.48 hannken 1092: * or unlinked snapshots to be completely unallocated.
1.1 hannken 1093: */
1.76 hannken 1094: if (fs->fs_magic == FS_UFS1_MAGIC) {
1095: dip1 = (struct ufs1_dinode *)bp->b_data +
1096: ino_to_fsbo(fs, cancelip->i_number);
1.111 bouyer 1097: if (cancelip->i_flags & SF_SNAPSHOT) {
1098: dip1->di_flags =
1099: ufs_rw32(ufs_rw32(dip1->di_flags, ns) |
1100: SF_SNAPINVAL, ns);
1101: }
1.92 ad 1102: if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
1.76 hannken 1103: dip1->di_mode = 0;
1104: dip1->di_size = 0;
1105: dip1->di_blocks = 0;
1.121 dholland 1106: memset(&dip1->di_db[0], 0, (UFS_NDADDR + UFS_NIADDR) * sizeof(int32_t));
1.76 hannken 1107: } else {
1108: dip2 = (struct ufs2_dinode *)bp->b_data +
1109: ino_to_fsbo(fs, cancelip->i_number);
1.111 bouyer 1110: if (cancelip->i_flags & SF_SNAPSHOT) {
1111: dip2->di_flags =
1112: ufs_rw32(ufs_rw32(dip2->di_flags, ns) |
1113: SF_SNAPINVAL, ns);
1114: }
1.92 ad 1115: if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
1.76 hannken 1116: dip2->di_mode = 0;
1117: dip2->di_size = 0;
1118: dip2->di_blocks = 0;
1.121 dholland 1119: memset(&dip2->di_db[0], 0, (UFS_NDADDR + UFS_NIADDR) * sizeof(int64_t));
1.76 hannken 1120: }
1.71 hannken 1121: bdwrite(bp);
1.103 hannken 1122: UFS_WAPBL_END(mp);
1.1 hannken 1123: /*
1124: * Now go through and expunge all the blocks in the file
1125: * using the function requested.
1126: */
1127: numblks = howmany(cancelip->i_size, fs->fs_bsize);
1.76 hannken 1128: if (fs->fs_magic == FS_UFS1_MAGIC)
1129: bap = &cancelip->i_ffs1_db[0];
1130: else
1131: bap = &cancelip->i_ffs2_db[0];
1.121 dholland 1132: error = (*acctfunc)(snapvp, bap, 0, UFS_NDADDR, fs, 0, expungetype);
1.103 hannken 1133: if (error)
1.1 hannken 1134: return (error);
1.76 hannken 1135: if (fs->fs_magic == FS_UFS1_MAGIC)
1136: bap = &cancelip->i_ffs1_ib[0];
1137: else
1138: bap = &cancelip->i_ffs2_ib[0];
1.121 dholland 1139: error = (*acctfunc)(snapvp, bap, 0, UFS_NIADDR, fs, -1, expungetype);
1.103 hannken 1140: if (error)
1.1 hannken 1141: return (error);
1142: blksperindir = 1;
1.121 dholland 1143: lbn = -UFS_NDADDR;
1144: len = numblks - UFS_NDADDR;
1145: rlbn = UFS_NDADDR;
1146: for (i = 0; len > 0 && i < UFS_NIADDR; i++) {
1.76 hannken 1147: error = indiracct(snapvp, ITOV(cancelip), i,
1148: ib_get(cancelip, i), lbn, rlbn, len,
1.1 hannken 1149: blksperindir, fs, acctfunc, expungetype);
1150: if (error)
1151: return (error);
1.124 dholland 1152: blksperindir *= FFS_NINDIR(fs);
1.1 hannken 1153: lbn -= blksperindir + 1;
1154: len -= blksperindir;
1155: rlbn += blksperindir;
1156: }
1157: return (0);
1158: }
1159:
1160: /*
1161: * Descend an indirect block chain for vnode cancelvp accounting for all
1162: * its indirect blocks in snapvp.
1.11 perry 1163: */
1.1 hannken 1164: static int
1.76 hannken 1165: indiracct(struct vnode *snapvp, struct vnode *cancelvp, int level,
1166: daddr_t blkno, daddr_t lbn, daddr_t rlbn, daddr_t remblks,
1167: daddr_t blksperindir, struct fs *fs, acctfunc_t acctfunc, int expungetype)
1.1 hannken 1168: {
1.76 hannken 1169: int error, num, i;
1170: daddr_t subblksperindir;
1.121 dholland 1171: struct indir indirs[UFS_NIADDR + 2];
1.76 hannken 1172: daddr_t last;
1173: void *bap;
1.1 hannken 1174: struct buf *bp;
1175:
1176: if (blkno == 0) {
1177: if (expungetype == BLK_NOCOPY)
1178: return (0);
1.76 hannken 1179: panic("indiracct: missing indir");
1.1 hannken 1180: }
1181: if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
1182: return (error);
1183: if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
1.76 hannken 1184: panic("indiracct: botched params");
1.1 hannken 1185: /*
1186: * We have to expand bread here since it will deadlock looking
1187: * up the block number for any blocks that are not in the cache.
1188: */
1.125 dholland 1189: error = ffs_getblk(cancelvp, lbn, FFS_FSBTODB(fs, blkno), fs->fs_bsize,
1.69 hannken 1190: false, &bp);
1191: if (error)
1192: return error;
1.72 hannken 1193: if ((bp->b_oflags & (BO_DONE | BO_DELWRI)) == 0 && (error =
1.127 dholland 1194: rwfsblk(bp->b_vp, B_READ, bp->b_data, ffs_fragstoblks(fs, blkno)))) {
1.52 ad 1195: brelse(bp, 0);
1.1 hannken 1196: return (error);
1197: }
1198: /*
1199: * Account for the block pointers in this indirect block.
1200: */
1201: last = howmany(remblks, blksperindir);
1.124 dholland 1202: if (last > FFS_NINDIR(fs))
1203: last = FFS_NINDIR(fs);
1.88 cegger 1204: bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK | M_ZERO);
1.95 tsutsui 1205: memcpy((void *)bap, bp->b_data, fs->fs_bsize);
1.52 ad 1206: brelse(bp, 0);
1.76 hannken 1207: error = (*acctfunc)(snapvp, bap, 0, last,
1208: fs, level == 0 ? rlbn : -1, expungetype);
1.1 hannken 1209: if (error || level == 0)
1210: goto out;
1211: /*
1212: * Account for the block pointers in each of the indirect blocks
1213: * in the levels below us.
1214: */
1.124 dholland 1215: subblksperindir = blksperindir / FFS_NINDIR(fs);
1.1 hannken 1216: for (lbn++, level--, i = 0; i < last; i++) {
1.76 hannken 1217: error = indiracct(snapvp, cancelvp, level,
1218: idb_get(VTOI(snapvp), bap, i), lbn, rlbn, remblks,
1219: subblksperindir, fs, acctfunc, expungetype);
1.1 hannken 1220: if (error)
1221: goto out;
1222: rlbn += blksperindir;
1223: lbn -= blksperindir;
1224: remblks -= blksperindir;
1225: }
1226: out:
1.88 cegger 1227: free(bap, M_DEVBUF);
1.1 hannken 1228: return (error);
1229: }
1230:
1231: /*
1232: * Do both snap accounting and map accounting.
1233: */
1234: static int
1.76 hannken 1235: fullacct(struct vnode *vp, void *bap, int oldblkp, int lastblkp,
1236: struct fs *fs, daddr_t lblkno,
1.18 thorpej 1237: int exptype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1238: {
1239: int error;
1240:
1.76 hannken 1241: if ((error = snapacct(vp, bap, oldblkp, lastblkp, fs, lblkno, exptype)))
1.1 hannken 1242: return (error);
1.76 hannken 1243: return (mapacct(vp, bap, oldblkp, lastblkp, fs, lblkno, exptype));
1.1 hannken 1244: }
1245:
1246: /*
1247: * Identify a set of blocks allocated in a snapshot inode.
1248: */
1249: static int
1.76 hannken 1250: snapacct(struct vnode *vp, void *bap, int oldblkp, int lastblkp,
1251: struct fs *fs, daddr_t lblkno,
1.18 thorpej 1252: int expungetype /* BLK_SNAP or BLK_NOCOPY */)
1.1 hannken 1253: {
1254: struct inode *ip = VTOI(vp);
1.79 hannken 1255: struct lwp *l = curlwp;
1.103 hannken 1256: struct mount *mp = vp->v_mount;
1.76 hannken 1257: daddr_t blkno;
1258: daddr_t lbn;
1.1 hannken 1259: struct buf *ibp;
1.103 hannken 1260: int error, n;
1261: const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;
1.1 hannken 1262:
1.103 hannken 1263: error = UFS_WAPBL_BEGIN(mp);
1264: if (error)
1265: return error;
1266: for ( n = 0; oldblkp < lastblkp; oldblkp++) {
1.76 hannken 1267: blkno = idb_get(ip, bap, oldblkp);
1.1 hannken 1268: if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
1269: continue;
1.127 dholland 1270: lbn = ffs_fragstoblks(fs, blkno);
1.121 dholland 1271: if (lbn < UFS_NDADDR) {
1.76 hannken 1272: blkno = db_get(ip, lbn);
1.1 hannken 1273: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1274: } else {
1.126 dholland 1275: error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)lbn),
1.79 hannken 1276: fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
1.1 hannken 1277: if (error)
1.103 hannken 1278: break;
1.76 hannken 1279: blkno = idb_get(ip, ibp->b_data,
1.124 dholland 1280: (lbn - UFS_NDADDR) % FFS_NINDIR(fs));
1.1 hannken 1281: }
1282: /*
1283: * If we are expunging a snapshot vnode and we
1284: * find a block marked BLK_NOCOPY, then it is
1285: * one that has been allocated to this snapshot after
1286: * we took our current snapshot and can be ignored.
1287: */
1288: if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
1.121 dholland 1289: if (lbn >= UFS_NDADDR)
1.52 ad 1290: brelse(ibp, 0);
1.1 hannken 1291: } else {
1292: if (blkno != 0)
1.76 hannken 1293: panic("snapacct: bad block");
1.121 dholland 1294: if (lbn < UFS_NDADDR)
1.76 hannken 1295: db_assign(ip, lbn, expungetype);
1296: else {
1297: idb_assign(ip, ibp->b_data,
1.124 dholland 1298: (lbn - UFS_NDADDR) % FFS_NINDIR(fs), expungetype);
1.15 hannken 1299: bdwrite(ibp);
1.76 hannken 1300: }
1.1 hannken 1301: }
1.103 hannken 1302: if (wbreak > 0 && (++n % wbreak) == 0) {
1303: UFS_WAPBL_END(mp);
1304: error = UFS_WAPBL_BEGIN(mp);
1305: if (error)
1306: return error;
1307: }
1.1 hannken 1308: }
1.103 hannken 1309: UFS_WAPBL_END(mp);
1310: return error;
1.1 hannken 1311: }
1312:
1313: /*
1314: * Account for a set of blocks allocated in a snapshot inode.
1315: */
1316: static int
1.76 hannken 1317: mapacct(struct vnode *vp, void *bap, int oldblkp, int lastblkp,
1318: struct fs *fs, daddr_t lblkno, int expungetype)
1.1 hannken 1319: {
1.76 hannken 1320: daddr_t blkno;
1.1 hannken 1321: struct inode *ip;
1.103 hannken 1322: struct mount *mp = vp->v_mount;
1.1 hannken 1323: ino_t inum;
1.103 hannken 1324: int acctit, error, n;
1325: const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;
1.1 hannken 1326:
1.103 hannken 1327: error = UFS_WAPBL_BEGIN(mp);
1328: if (error)
1329: return error;
1.1 hannken 1330: ip = VTOI(vp);
1331: inum = ip->i_number;
1332: if (lblkno == -1)
1333: acctit = 0;
1334: else
1335: acctit = 1;
1.103 hannken 1336: for ( n = 0; oldblkp < lastblkp; oldblkp++, lblkno++) {
1.76 hannken 1337: blkno = idb_get(ip, bap, oldblkp);
1.1 hannken 1338: if (blkno == 0 || blkno == BLK_NOCOPY)
1339: continue;
1340: if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
1.15 hannken 1341: *ip->i_snapblklist++ = lblkno;
1.1 hannken 1342: if (blkno == BLK_SNAP)
1.127 dholland 1343: blkno = ffs_blkstofrags(fs, lblkno);
1.83 joerg 1344: ffs_blkfree_snap(fs, vp, blkno, fs->fs_bsize, inum);
1.103 hannken 1345: if (wbreak > 0 && (++n % wbreak) == 0) {
1346: UFS_WAPBL_END(mp);
1347: error = UFS_WAPBL_BEGIN(mp);
1348: if (error)
1349: return error;
1350: }
1.1 hannken 1351: }
1.103 hannken 1352: UFS_WAPBL_END(mp);
1.1 hannken 1353: return (0);
1354: }
1.107 he 1355:
1356: /*
1357: * Number of blocks that fit into the journal or zero if not logging.
1358: */
1359: static int
1360: blocks_in_journal(struct fs *fs)
1361: {
1362: off_t bpj;
1363:
1364: if ((fs->fs_flags & FS_DOWAPBL) == 0)
1365: return 0;
1366: bpj = 1;
1367: if (fs->fs_journal_version == UFS_WAPBL_VERSION) {
1368: switch (fs->fs_journal_location) {
1369: case UFS_WAPBL_JOURNALLOC_END_PARTITION:
1370: bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ]*
1371: fs->fs_journallocs[UFS_WAPBL_EPART_COUNT];
1372: break;
1373: case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
1374: bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ]*
1375: fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
1376: break;
1377: }
1378: }
1379: bpj /= fs->fs_bsize;
1380: return (bpj > 0 ? bpj : 1);
1381: }
1.10 hannken 1382: #endif /* defined(FFS_NO_SNAPSHOT) */
1.1 hannken 1383:
1384: /*
1385: * Decrement extra reference on snapshot when last name is removed.
1386: * It will not be freed until the last open reference goes away.
1387: */
1388: void
1.123 hannken 1389: ffs_snapgone(struct vnode *vp)
1.1 hannken 1390: {
1.123 hannken 1391: struct inode *xp, *ip = VTOI(vp);
1.129 hannken 1392: struct mount *mp = spec_node_getmountedfs(ip->i_devvp);
1.1 hannken 1393: struct fs *fs;
1.47 hannken 1394: struct snap_info *si;
1.1 hannken 1395: int snaploc;
1396:
1.66 hannken 1397: si = VFSTOUFS(mp)->um_snapinfo;
1398:
1.1 hannken 1399: /*
1400: * Find snapshot in incore list.
1401: */
1.49 hannken 1402: mutex_enter(&si->si_lock);
1.47 hannken 1403: TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap)
1.1 hannken 1404: if (xp == ip)
1405: break;
1.49 hannken 1406: mutex_exit(&si->si_lock);
1.104 hannken 1407: if (xp != NULL)
1408: vrele(ITOV(ip));
1.1 hannken 1409: #ifdef DEBUG
1.104 hannken 1410: else if (snapdebug)
1.19 christos 1411: printf("ffs_snapgone: lost snapshot vnode %llu\n",
1412: (unsigned long long)ip->i_number);
1.1 hannken 1413: #endif
1414: /*
1415: * Delete snapshot inode from superblock. Keep list dense.
1416: */
1.49 hannken 1417: mutex_enter(&si->si_lock);
1.1 hannken 1418: fs = ip->i_fs;
1419: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
1420: if (fs->fs_snapinum[snaploc] == ip->i_number)
1421: break;
1422: if (snaploc < FSMAXSNAP) {
1423: for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
1424: if (fs->fs_snapinum[snaploc] == 0)
1425: break;
1426: fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
1427: }
1428: fs->fs_snapinum[snaploc - 1] = 0;
1429: }
1.49 hannken 1430: si->si_gen++;
1431: mutex_exit(&si->si_lock);
1.1 hannken 1432: }
1433:
1434: /*
1435: * Prepare a snapshot file for being removed.
1436: */
1437: void
1.18 thorpej 1438: ffs_snapremove(struct vnode *vp)
1.1 hannken 1439: {
1.15 hannken 1440: struct inode *ip = VTOI(vp), *xp;
1.1 hannken 1441: struct vnode *devvp = ip->i_devvp;
1442: struct fs *fs = ip->i_fs;
1.129 hannken 1443: struct mount *mp = spec_node_getmountedfs(devvp);
1.1 hannken 1444: struct buf *ibp;
1.47 hannken 1445: struct snap_info *si;
1.79 hannken 1446: struct lwp *l = curlwp;
1447: daddr_t numblks, blkno, dblk;
1.106 hannken 1448: int error, loc, last;
1.1 hannken 1449:
1.66 hannken 1450: si = VFSTOUFS(mp)->um_snapinfo;
1.1 hannken 1451: /*
1452: * If active, delete from incore list (this snapshot may
1453: * already have been in the process of being deleted, so
1454: * would not have been active).
1455: *
1456: * Clear copy-on-write flag if last snapshot.
1457: */
1.106 hannken 1458: mutex_enter(&si->si_snaplock);
1.97 hannken 1459: mutex_enter(&si->si_lock);
1460: if (is_active_snapshot(si, ip)) {
1.47 hannken 1461: TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap);
1462: if (TAILQ_FIRST(&si->si_snapshots) != 0) {
1.15 hannken 1463: /* Roll back the list of preallocated blocks. */
1.47 hannken 1464: xp = TAILQ_LAST(&si->si_snapshots, inodelst);
1465: si->si_snapblklist = xp->i_snapblklist;
1.96 hannken 1466: si->si_gen++;
1467: mutex_exit(&si->si_lock);
1.106 hannken 1468: mutex_exit(&si->si_snaplock);
1.1 hannken 1469: } else {
1.47 hannken 1470: si->si_snapblklist = 0;
1.49 hannken 1471: si->si_gen++;
1472: mutex_exit(&si->si_lock);
1.106 hannken 1473: mutex_exit(&si->si_snaplock);
1.51 hannken 1474: fscow_disestablish(mp, ffs_copyonwrite, devvp);
1.1 hannken 1475: }
1.79 hannken 1476: if (ip->i_snapblklist != NULL) {
1477: free(ip->i_snapblklist, M_UFSMNT);
1478: ip->i_snapblklist = NULL;
1479: }
1.106 hannken 1480: } else {
1.97 hannken 1481: mutex_exit(&si->si_lock);
1.106 hannken 1482: mutex_exit(&si->si_snaplock);
1483: }
1.1 hannken 1484: /*
1485: * Clear all BLK_NOCOPY fields. Pass any block claims to other
1486: * snapshots that want them (see ffs_snapblkfree below).
1487: */
1.121 dholland 1488: for (blkno = 1; blkno < UFS_NDADDR; blkno++) {
1.1 hannken 1489: dblk = db_get(ip, blkno);
1490: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1491: db_assign(ip, blkno, 0);
1.127 dholland 1492: else if ((dblk == ffs_blkstofrags(fs, blkno) &&
1.1 hannken 1493: ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
1494: ip->i_number))) {
1495: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1496: db_assign(ip, blkno, 0);
1497: }
1498: }
1499: numblks = howmany(ip->i_size, fs->fs_bsize);
1.124 dholland 1500: for (blkno = UFS_NDADDR; blkno < numblks; blkno += FFS_NINDIR(fs)) {
1.126 dholland 1501: error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)blkno),
1.79 hannken 1502: fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
1.1 hannken 1503: if (error)
1504: continue;
1.124 dholland 1505: if (fs->fs_size - blkno > FFS_NINDIR(fs))
1506: last = FFS_NINDIR(fs);
1.1 hannken 1507: else
1508: last = fs->fs_size - blkno;
1509: for (loc = 0; loc < last; loc++) {
1510: dblk = idb_get(ip, ibp->b_data, loc);
1511: if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
1512: idb_assign(ip, ibp->b_data, loc, 0);
1.127 dholland 1513: else if (dblk == ffs_blkstofrags(fs, blkno) &&
1.1 hannken 1514: ffs_snapblkfree(fs, ip->i_devvp, dblk,
1515: fs->fs_bsize, ip->i_number)) {
1516: DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
1517: idb_assign(ip, ibp->b_data, loc, 0);
1518: }
1519: }
1.15 hannken 1520: bawrite(ibp);
1.106 hannken 1521: UFS_WAPBL_END(mp);
1522: error = UFS_WAPBL_BEGIN(mp);
1523: KASSERT(error == 0);
1.1 hannken 1524: }
1525: /*
1526: * Clear snapshot flag and drop reference.
1527: */
1.111 bouyer 1528: ip->i_flags &= ~(SF_SNAPSHOT | SF_SNAPINVAL);
1.1 hannken 1529: DIP_ASSIGN(ip, flags, ip->i_flags);
1530: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1.111 bouyer 1531: #if defined(QUOTA) || defined(QUOTA2)
1532: chkdq(ip, DIP(ip, blocks), l->l_cred, FORCE);
1533: chkiq(ip, 1, l->l_cred, FORCE);
1534: #endif
1.1 hannken 1535: }
1536:
1537: /*
1538: * Notification that a block is being freed. Return zero if the free
1539: * should be allowed to proceed. Return non-zero if the snapshot file
1540: * wants to claim the block. The block will be claimed if it is an
1541: * uncopied part of one of the snapshots. It will be freed if it is
1542: * either a BLK_NOCOPY or has already been copied in all of the snapshots.
1543: * If a fragment is being freed, then all snapshots that care about
1544: * it must make a copy since a snapshot file can only claim full sized
1545: * blocks. Note that if more than one snapshot file maps the block,
1546: * we can pick one at random to claim it. Since none of the snapshots
1547: * can change, we are assurred that they will all see the same unmodified
1548: * image. When deleting a snapshot file (see ffs_snapremove above), we
1549: * must push any of these claimed blocks to one of the other snapshots
1550: * that maps it. These claimed blocks are easily identified as they will
1551: * have a block number equal to their logical block number within the
1552: * snapshot. A copied block can never have this property because they
1553: * must always have been allocated from a BLK_NOCOPY location.
1554: */
1555: int
1.79 hannken 1556: ffs_snapblkfree(struct fs *fs, struct vnode *devvp, daddr_t bno,
1.36 christos 1557: long size, ino_t inum)
1.1 hannken 1558: {
1.129 hannken 1559: struct mount *mp = spec_node_getmountedfs(devvp);
1.1 hannken 1560: struct buf *ibp;
1561: struct inode *ip;
1.49 hannken 1562: struct vnode *vp = NULL;
1.47 hannken 1563: struct snap_info *si;
1.43 christos 1564: void *saved_data = NULL;
1.79 hannken 1565: daddr_t lbn;
1566: daddr_t blkno;
1.49 hannken 1567: uint32_t gen;
1.106 hannken 1568: int indiroff = 0, error = 0, claimedblk = 0;
1.1 hannken 1569:
1.66 hannken 1570: si = VFSTOUFS(mp)->um_snapinfo;
1.127 dholland 1571: lbn = ffs_fragstoblks(fs, bno);
1.106 hannken 1572: mutex_enter(&si->si_snaplock);
1.49 hannken 1573: mutex_enter(&si->si_lock);
1.106 hannken 1574: si->si_owner = curlwp;
1575:
1.1 hannken 1576: retry:
1.49 hannken 1577: gen = si->si_gen;
1.47 hannken 1578: TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
1.1 hannken 1579: vp = ITOV(ip);
1580: /*
1581: * Lookup block being written.
1582: */
1.121 dholland 1583: if (lbn < UFS_NDADDR) {
1.1 hannken 1584: blkno = db_get(ip, lbn);
1585: } else {
1.49 hannken 1586: mutex_exit(&si->si_lock);
1.126 dholland 1587: error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)lbn),
1.79 hannken 1588: fs->fs_bsize, FSCRED, B_METAONLY, &ibp);
1.49 hannken 1589: if (error) {
1590: mutex_enter(&si->si_lock);
1.1 hannken 1591: break;
1.49 hannken 1592: }
1.124 dholland 1593: indiroff = (lbn - UFS_NDADDR) % FFS_NINDIR(fs);
1.1 hannken 1594: blkno = idb_get(ip, ibp->b_data, indiroff);
1.49 hannken 1595: mutex_enter(&si->si_lock);
1596: if (gen != si->si_gen) {
1.52 ad 1597: brelse(ibp, 0);
1.49 hannken 1598: goto retry;
1599: }
1.1 hannken 1600: }
1601: /*
1602: * Check to see if block needs to be copied.
1603: */
1604: if (blkno == 0) {
1605: /*
1606: * A block that we map is being freed. If it has not
1607: * been claimed yet, we will claim or copy it (below).
1608: */
1609: claimedblk = 1;
1610: } else if (blkno == BLK_SNAP) {
1611: /*
1612: * No previous snapshot claimed the block,
1613: * so it will be freed and become a BLK_NOCOPY
1614: * (don't care) for us.
1615: */
1616: if (claimedblk)
1617: panic("snapblkfree: inconsistent block type");
1.121 dholland 1618: if (lbn < UFS_NDADDR) {
1.1 hannken 1619: db_assign(ip, lbn, BLK_NOCOPY);
1620: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1621: } else {
1622: idb_assign(ip, ibp->b_data, indiroff,
1623: BLK_NOCOPY);
1.49 hannken 1624: mutex_exit(&si->si_lock);
1.92 ad 1625: if (ip->i_nlink > 0)
1.72 hannken 1626: bwrite(ibp);
1627: else
1628: bdwrite(ibp);
1.49 hannken 1629: mutex_enter(&si->si_lock);
1630: if (gen != si->si_gen)
1631: goto retry;
1.1 hannken 1632: }
1633: continue;
1634: } else /* BLK_NOCOPY or default */ {
1635: /*
1636: * If the snapshot has already copied the block
1637: * (default), or does not care about the block,
1638: * it is not needed.
1639: */
1.121 dholland 1640: if (lbn >= UFS_NDADDR)
1.52 ad 1641: brelse(ibp, 0);
1.1 hannken 1642: continue;
1643: }
1644: /*
1645: * If this is a full size block, we will just grab it
1646: * and assign it to the snapshot inode. Otherwise we
1647: * will proceed to copy it. See explanation for this
1648: * routine as to why only a single snapshot needs to
1649: * claim this block.
1650: */
1651: if (size == fs->fs_bsize) {
1652: #ifdef DEBUG
1653: if (snapdebug)
1.19 christos 1654: printf("%s %llu lbn %" PRId64
1655: "from inum %llu\n",
1656: "Grabonremove: snapino",
1657: (unsigned long long)ip->i_number,
1658: lbn, (unsigned long long)inum);
1.1 hannken 1659: #endif
1.49 hannken 1660: mutex_exit(&si->si_lock);
1.121 dholland 1661: if (lbn < UFS_NDADDR) {
1.1 hannken 1662: db_assign(ip, lbn, bno);
1663: } else {
1664: idb_assign(ip, ibp->b_data, indiroff, bno);
1.92 ad 1665: if (ip->i_nlink > 0)
1.72 hannken 1666: bwrite(ibp);
1667: else
1668: bdwrite(ibp);
1.1 hannken 1669: }
1670: DIP_ADD(ip, blocks, btodb(size));
1671: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1.92 ad 1672: if (ip->i_nlink > 0 && mp->mnt_wapbl)
1.75 hannken 1673: error = syncsnap(vp);
1674: else
1675: error = 0;
1.106 hannken 1676: mutex_enter(&si->si_lock);
1677: si->si_owner = NULL;
1678: mutex_exit(&si->si_lock);
1.74 hannken 1679: mutex_exit(&si->si_snaplock);
1.75 hannken 1680: return (error == 0);
1.1 hannken 1681: }
1.121 dholland 1682: if (lbn >= UFS_NDADDR)
1.52 ad 1683: brelse(ibp, 0);
1.1 hannken 1684: #ifdef DEBUG
1685: if (snapdebug)
1.19 christos 1686: printf("%s%llu lbn %" PRId64 " %s %llu size %ld\n",
1687: "Copyonremove: snapino ",
1688: (unsigned long long)ip->i_number,
1689: lbn, "for inum", (unsigned long long)inum, size);
1.1 hannken 1690: #endif
1691: /*
1692: * If we have already read the old block contents, then
1693: * simply copy them to the new block. Note that we need
1694: * to synchronously write snapshots that have not been
1695: * unlinked, and hence will be visible after a crash,
1696: * to ensure their integrity.
1697: */
1.49 hannken 1698: mutex_exit(&si->si_lock);
1699: if (saved_data == NULL) {
1700: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.75 hannken 1701: error = rwfsblk(vp, B_READ, saved_data, lbn);
1.72 hannken 1702: if (error) {
1.49 hannken 1703: free(saved_data, M_UFSMNT);
1704: saved_data = NULL;
1705: mutex_enter(&si->si_lock);
1.1 hannken 1706: break;
1.49 hannken 1707: }
1.1 hannken 1708: }
1.72 hannken 1709: error = wrsnapblk(vp, saved_data, lbn);
1.92 ad 1710: if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl)
1.75 hannken 1711: error = syncsnap(vp);
1.49 hannken 1712: mutex_enter(&si->si_lock);
1713: if (error)
1.1 hannken 1714: break;
1.49 hannken 1715: if (gen != si->si_gen)
1716: goto retry;
1.1 hannken 1717: }
1.106 hannken 1718: si->si_owner = NULL;
1.49 hannken 1719: mutex_exit(&si->si_lock);
1.106 hannken 1720: mutex_exit(&si->si_snaplock);
1.49 hannken 1721: if (saved_data)
1.1 hannken 1722: free(saved_data, M_UFSMNT);
1723: /*
1724: * If we have been unable to allocate a block in which to do
1725: * the copy, then return non-zero so that the fragment will
1726: * not be freed. Although space will be lost, the snapshot
1727: * will stay consistent.
1728: */
1729: return (error);
1730: }
1731:
1732: /*
1733: * Associate snapshot files when mounting.
1734: */
1735: void
1.18 thorpej 1736: ffs_snapshot_mount(struct mount *mp)
1.1 hannken 1737: {
1.47 hannken 1738: struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
1739: struct fs *fs = VFSTOUFS(mp)->um_fs;
1.31 ad 1740: struct lwp *l = curlwp;
1.1 hannken 1741: struct vnode *vp;
1742: struct inode *ip, *xp;
1.47 hannken 1743: struct snap_info *si;
1.79 hannken 1744: daddr_t snaplistsize, *snapblklist;
1.130 martin 1745: int i, error, ns __unused, snaploc, loc;
1.1 hannken 1746:
1.46 hannken 1747: /*
1748: * No persistent snapshots on apple ufs file systems.
1749: */
1.47 hannken 1750: if (UFS_MPISAPPLEUFS(VFSTOUFS(mp)))
1.46 hannken 1751: return;
1752:
1.66 hannken 1753: si = VFSTOUFS(mp)->um_snapinfo;
1.1 hannken 1754: ns = UFS_FSNEEDSWAP(fs);
1755: /*
1.22 yamt 1756: * XXX The following needs to be set before ffs_truncate or
1.1 hannken 1757: * VOP_READ can be called.
1758: */
1759: mp->mnt_stat.f_iosize = fs->fs_bsize;
1760: /*
1761: * Process each snapshot listed in the superblock.
1762: */
1763: vp = NULL;
1.49 hannken 1764: mutex_enter(&si->si_lock);
1.1 hannken 1765: for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
1766: if (fs->fs_snapinum[snaploc] == 0)
1767: break;
1768: if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc],
1769: &vp)) != 0) {
1770: printf("ffs_snapshot_mount: vget failed %d\n", error);
1771: continue;
1772: }
1773: ip = VTOI(vp);
1.111 bouyer 1774: if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) !=
1775: SF_SNAPSHOT) {
1.1 hannken 1776: printf("ffs_snapshot_mount: non-snapshot inode %d\n",
1777: fs->fs_snapinum[snaploc]);
1778: vput(vp);
1779: vp = NULL;
1780: for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
1781: if (fs->fs_snapinum[loc] == 0)
1782: break;
1783: fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
1784: }
1785: fs->fs_snapinum[loc - 1] = 0;
1786: snaploc--;
1787: continue;
1788: }
1.15 hannken 1789:
1790: /*
1791: * Read the block hints list. Use an empty list on
1792: * read errors.
1793: */
1794: error = vn_rdwr(UIO_READ, vp,
1.43 christos 1795: (void *)&snaplistsize, sizeof(snaplistsize),
1.126 dholland 1796: ffs_lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1.87 hannken 1797: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT|IO_ALTSEMANTICS,
1.31 ad 1798: l->l_cred, NULL, NULL);
1.15 hannken 1799: if (error) {
1800: printf("ffs_snapshot_mount: read_1 failed %d\n", error);
1801: snaplistsize = 1;
1802: } else
1803: snaplistsize = ufs_rw64(snaplistsize, ns);
1.25 christos 1804: snapblklist = malloc(
1.79 hannken 1805: snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK);
1.15 hannken 1806: if (error)
1807: snapblklist[0] = 1;
1808: else {
1.43 christos 1809: error = vn_rdwr(UIO_READ, vp, (void *)snapblklist,
1.79 hannken 1810: snaplistsize * sizeof(daddr_t),
1.126 dholland 1811: ffs_lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
1.87 hannken 1812: UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT|IO_ALTSEMANTICS,
1.31 ad 1813: l->l_cred, NULL, NULL);
1.15 hannken 1814: for (i = 0; i < snaplistsize; i++)
1815: snapblklist[i] = ufs_rw64(snapblklist[i], ns);
1816: if (error) {
1817: printf("ffs_snapshot_mount: read_2 failed %d\n",
1818: error);
1819: snapblklist[0] = 1;
1820: }
1821: }
1822: ip->i_snapblklist = &snapblklist[0];
1823:
1.1 hannken 1824: /*
1825: * Link it onto the active snapshot list.
1826: */
1.97 hannken 1827: if (is_active_snapshot(si, ip))
1828: panic("ffs_snapshot_mount: %"PRIu64" already on list",
1829: ip->i_number);
1.1 hannken 1830: else
1.47 hannken 1831: TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
1.53 ad 1832: vp->v_vflag |= VV_SYSTEM;
1.99 hannken 1833: VOP_UNLOCK(vp);
1.1 hannken 1834: }
1835: /*
1836: * No usable snapshots found.
1837: */
1.49 hannken 1838: if (vp == NULL) {
1839: mutex_exit(&si->si_lock);
1.1 hannken 1840: return;
1.49 hannken 1841: }
1.1 hannken 1842: /*
1.15 hannken 1843: * Attach the block hints list. We always want to
1.1 hannken 1844: * use the list from the newest snapshot.
1.15 hannken 1845: */
1.47 hannken 1846: xp = TAILQ_LAST(&si->si_snapshots, inodelst);
1847: si->si_snapblklist = xp->i_snapblklist;
1.51 hannken 1848: fscow_establish(mp, ffs_copyonwrite, devvp);
1.49 hannken 1849: si->si_gen++;
1850: mutex_exit(&si->si_lock);
1.1 hannken 1851: }
1852:
1853: /*
1854: * Disassociate snapshot files when unmounting.
1855: */
1856: void
1.18 thorpej 1857: ffs_snapshot_unmount(struct mount *mp)
1.1 hannken 1858: {
1.47 hannken 1859: struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
1.104 hannken 1860: struct inode *xp;
1861: struct vnode *vp = NULL;
1.47 hannken 1862: struct snap_info *si;
1.1 hannken 1863:
1.66 hannken 1864: si = VFSTOUFS(mp)->um_snapinfo;
1.49 hannken 1865: mutex_enter(&si->si_lock);
1.104 hannken 1866: while ((xp = TAILQ_FIRST(&si->si_snapshots)) != 0) {
1867: vp = ITOV(xp);
1868: TAILQ_REMOVE(&si->si_snapshots, xp, i_nextsnap);
1869: if (xp->i_snapblklist == si->si_snapblklist)
1.47 hannken 1870: si->si_snapblklist = NULL;
1.104 hannken 1871: free(xp->i_snapblklist, M_UFSMNT);
1872: if (xp->i_nlink > 0) {
1873: si->si_gen++;
1874: mutex_exit(&si->si_lock);
1875: vrele(vp);
1876: mutex_enter(&si->si_lock);
1877: }
1.1 hannken 1878: }
1.104 hannken 1879: si->si_gen++;
1.96 hannken 1880: mutex_exit(&si->si_lock);
1.104 hannken 1881: if (vp)
1.51 hannken 1882: fscow_disestablish(mp, ffs_copyonwrite, devvp);
1.1 hannken 1883: }
1884:
1885: /*
1886: * Check for need to copy block that is about to be written,
1887: * copying the block if necessary.
1888: */
1889: static int
1.55 hannken 1890: ffs_copyonwrite(void *v, struct buf *bp, bool data_valid)
1.1 hannken 1891: {
1892: struct fs *fs;
1893: struct inode *ip;
1.49 hannken 1894: struct vnode *devvp = v, *vp = NULL;
1.129 hannken 1895: struct mount *mp = spec_node_getmountedfs(devvp);
1.47 hannken 1896: struct snap_info *si;
1.43 christos 1897: void *saved_data = NULL;
1.79 hannken 1898: daddr_t lbn, blkno, *snapblklist;
1.49 hannken 1899: uint32_t gen;
1.76 hannken 1900: int lower, upper, mid, snapshot_locked = 0, error = 0;
1.1 hannken 1901:
1902: /*
1903: * Check for valid snapshots.
1904: */
1.66 hannken 1905: si = VFSTOUFS(mp)->um_snapinfo;
1.49 hannken 1906: mutex_enter(&si->si_lock);
1.47 hannken 1907: ip = TAILQ_FIRST(&si->si_snapshots);
1.1 hannken 1908: if (ip == NULL) {
1.49 hannken 1909: mutex_exit(&si->si_lock);
1.11 perry 1910: return 0;
1.1 hannken 1911: }
1912: /*
1.117 hannken 1913: * First check to see if it is after the file system,
1914: * in the journal or in the preallocated list.
1915: * By doing these checks we avoid several potential deadlocks.
1.1 hannken 1916: */
1917: fs = ip->i_fs;
1.127 dholland 1918: lbn = ffs_fragstoblks(fs, FFS_DBTOFSB(fs, bp->b_blkno));
1.125 dholland 1919: if (bp->b_blkno >= FFS_FSBTODB(fs, fs->fs_size)) {
1.75 hannken 1920: mutex_exit(&si->si_lock);
1921: return 0;
1922: }
1.117 hannken 1923: if ((fs->fs_flags & FS_DOWAPBL) &&
1924: fs->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM) {
1925: off_t blk_off, log_start, log_end;
1926:
1927: log_start = (off_t)fs->fs_journallocs[UFS_WAPBL_INFS_ADDR] *
1928: fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
1929: log_end = log_start + fs->fs_journallocs[UFS_WAPBL_INFS_COUNT] *
1930: fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
1931: blk_off = dbtob(bp->b_blkno);
1932: if (blk_off >= log_start && blk_off < log_end) {
1933: mutex_exit(&si->si_lock);
1934: return 0;
1935: }
1936: }
1.47 hannken 1937: snapblklist = si->si_snapblklist;
1.87 hannken 1938: upper = (snapblklist != NULL ? snapblklist[0] - 1 : 0);
1.1 hannken 1939: lower = 1;
1940: while (lower <= upper) {
1941: mid = (lower + upper) / 2;
1.15 hannken 1942: if (snapblklist[mid] == lbn)
1.1 hannken 1943: break;
1.15 hannken 1944: if (snapblklist[mid] < lbn)
1.1 hannken 1945: lower = mid + 1;
1946: else
1947: upper = mid - 1;
1948: }
1949: if (lower <= upper) {
1.49 hannken 1950: mutex_exit(&si->si_lock);
1.1 hannken 1951: return 0;
1952: }
1953: /*
1954: * Not in the precomputed list, so check the snapshots.
1955: */
1.106 hannken 1956: if (si->si_owner != curlwp) {
1957: if (!mutex_tryenter(&si->si_snaplock)) {
1958: mutex_exit(&si->si_lock);
1959: mutex_enter(&si->si_snaplock);
1960: mutex_enter(&si->si_lock);
1961: }
1962: si->si_owner = curlwp;
1963: snapshot_locked = 1;
1964: }
1.55 hannken 1965: if (data_valid && bp->b_bcount == fs->fs_bsize)
1966: saved_data = bp->b_data;
1.1 hannken 1967: retry:
1.49 hannken 1968: gen = si->si_gen;
1.47 hannken 1969: TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
1.1 hannken 1970: vp = ITOV(ip);
1971: /*
1972: * We ensure that everything of our own that needs to be
1973: * copied will be done at the time that ffs_snapshot is
1974: * called. Thus we can skip the check here which can
1.22 yamt 1975: * deadlock in doing the lookup in ffs_balloc.
1.1 hannken 1976: */
1977: if (bp->b_vp == vp)
1978: continue;
1979: /*
1.68 hannken 1980: * Check to see if block needs to be copied.
1.1 hannken 1981: */
1.121 dholland 1982: if (lbn < UFS_NDADDR) {
1.1 hannken 1983: blkno = db_get(ip, lbn);
1984: } else {
1.49 hannken 1985: mutex_exit(&si->si_lock);
1.109 dyoung 1986: blkno = 0; /* XXX: GCC */
1.72 hannken 1987: if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) {
1.49 hannken 1988: mutex_enter(&si->si_lock);
1.1 hannken 1989: break;
1.49 hannken 1990: }
1991: mutex_enter(&si->si_lock);
1992: if (gen != si->si_gen)
1993: goto retry;
1.1 hannken 1994: }
1.147 riastrad 1995: KASSERTMSG((blkno != BLK_SNAP || bp->b_lblkno < 0),
1996: "ffs_copyonwrite: bad copy block: blkno %jd, lblkno %jd",
1997: (intmax_t)blkno, (intmax_t)bp->b_lblkno);
1.1 hannken 1998: if (blkno != 0)
1999: continue;
1.72 hannken 2000:
1.74 hannken 2001: if (curlwp == uvm.pagedaemon_lwp) {
2002: error = ENOMEM;
2003: break;
2004: }
1.106 hannken 2005: /* Only one level of recursion allowed. */
2006: KASSERT(snapshot_locked);
1.1 hannken 2007: /*
2008: * Allocate the block into which to do the copy. Since
2009: * multiple processes may all try to copy the same block,
2010: * we have to recheck our need to do a copy if we sleep
2011: * waiting for the lock.
2012: *
2013: * Because all snapshots on a filesystem share a single
2014: * lock, we ensure that we will never be in competition
2015: * with another process to allocate a block.
2016: */
2017: #ifdef DEBUG
2018: if (snapdebug) {
1.19 christos 2019: printf("Copyonwrite: snapino %llu lbn %" PRId64 " for ",
2020: (unsigned long long)ip->i_number, lbn);
1.1 hannken 2021: if (bp->b_vp == devvp)
2022: printf("fs metadata");
2023: else
1.19 christos 2024: printf("inum %llu", (unsigned long long)
2025: VTOI(bp->b_vp)->i_number);
1.4 hannken 2026: printf(" lblkno %" PRId64 "\n", bp->b_lblkno);
1.1 hannken 2027: }
2028: #endif
2029: /*
2030: * If we have already read the old block contents, then
2031: * simply copy them to the new block. Note that we need
2032: * to synchronously write snapshots that have not been
2033: * unlinked, and hence will be visible after a crash,
2034: * to ensure their integrity.
2035: */
1.49 hannken 2036: mutex_exit(&si->si_lock);
2037: if (saved_data == NULL) {
2038: saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
1.75 hannken 2039: error = rwfsblk(vp, B_READ, saved_data, lbn);
1.72 hannken 2040: if (error) {
1.49 hannken 2041: free(saved_data, M_UFSMNT);
2042: saved_data = NULL;
2043: mutex_enter(&si->si_lock);
1.1 hannken 2044: break;
1.49 hannken 2045: }
1.1 hannken 2046: }
1.72 hannken 2047: error = wrsnapblk(vp, saved_data, lbn);
1.92 ad 2048: if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl)
1.75 hannken 2049: error = syncsnap(vp);
1.49 hannken 2050: mutex_enter(&si->si_lock);
2051: if (error)
1.1 hannken 2052: break;
1.49 hannken 2053: if (gen != si->si_gen)
2054: goto retry;
1.1 hannken 2055: }
2056: /*
2057: * Note that we need to synchronously write snapshots that
2058: * have not been unlinked, and hence will be visible after
2059: * a crash, to ensure their integrity.
2060: */
1.106 hannken 2061: if (snapshot_locked) {
2062: si->si_owner = NULL;
2063: mutex_exit(&si->si_lock);
2064: mutex_exit(&si->si_snaplock);
2065: } else
2066: mutex_exit(&si->si_lock);
1.55 hannken 2067: if (saved_data && saved_data != bp->b_data)
1.1 hannken 2068: free(saved_data, M_UFSMNT);
1.74 hannken 2069: return error;
2070: }
2071:
2072: /*
2073: * Read from a snapshot.
2074: */
2075: int
2076: ffs_snapshot_read(struct vnode *vp, struct uio *uio, int ioflag)
2077: {
2078: struct inode *ip = VTOI(vp);
2079: struct fs *fs = ip->i_fs;
2080: struct snap_info *si = VFSTOUFS(vp->v_mount)->um_snapinfo;
2081: struct buf *bp;
2082: daddr_t lbn, nextlbn;
1.81 hannken 2083: off_t fsbytes, bytesinfile;
1.74 hannken 2084: long size, xfersize, blkoffset;
2085: int error;
2086:
2087: mutex_enter(&si->si_snaplock);
2088:
1.87 hannken 2089: if (ioflag & IO_ALTSEMANTICS)
2090: fsbytes = ip->i_size;
2091: else
1.126 dholland 2092: fsbytes = ffs_lfragtosize(fs, fs->fs_size);
1.74 hannken 2093: for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
1.81 hannken 2094: bytesinfile = fsbytes - uio->uio_offset;
1.74 hannken 2095: if (bytesinfile <= 0)
2096: break;
1.126 dholland 2097: lbn = ffs_lblkno(fs, uio->uio_offset);
1.74 hannken 2098: nextlbn = lbn + 1;
1.82 hannken 2099: size = fs->fs_bsize;
1.124 dholland 2100: blkoffset = ffs_blkoff(fs, uio->uio_offset);
1.74 hannken 2101: xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
2102: bytesinfile);
2103:
1.126 dholland 2104: if (ffs_lblktosize(fs, nextlbn + 1) >= fsbytes) {
2105: if (ffs_lblktosize(fs, lbn) + size > fsbytes)
2106: size = ffs_fragroundup(fs,
2107: fsbytes - ffs_lblktosize(fs, lbn));
1.139 maxv 2108: error = bread(vp, lbn, size, 0, &bp);
1.82 hannken 2109: } else {
2110: int nextsize = fs->fs_bsize;
1.74 hannken 2111: error = breadn(vp, lbn,
1.138 maxv 2112: size, &nextlbn, &nextsize, 1, 0, &bp);
1.74 hannken 2113: }
2114: if (error)
2115: break;
2116:
2117: /*
2118: * We should only get non-zero b_resid when an I/O error
2119: * has occurred, which should cause us to break above.
2120: * However, if the short read did not cause an error,
2121: * then we want to ensure that we do not uiomove bad
2122: * or uninitialized data.
2123: */
2124: size -= bp->b_resid;
1.82 hannken 2125: if (size < blkoffset + xfersize) {
2126: xfersize = size - blkoffset;
2127: if (xfersize <= 0)
1.74 hannken 2128: break;
2129: }
2130: error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
2131: if (error)
2132: break;
1.75 hannken 2133: brelse(bp, BC_AGE);
1.74 hannken 2134: }
2135: if (bp != NULL)
1.75 hannken 2136: brelse(bp, BC_AGE);
1.74 hannken 2137:
2138: mutex_exit(&si->si_snaplock);
1.1 hannken 2139: return error;
2140: }
2141:
2142: /*
1.79 hannken 2143: * Lookup a snapshots data block address.
2144: * Simpler than UFS_BALLOC() as we know all metadata is already allocated
2145: * and safe even for the pagedaemon where we cannot bread().
2146: */
2147: static int
2148: snapblkaddr(struct vnode *vp, daddr_t lbn, daddr_t *res)
2149: {
1.121 dholland 2150: struct indir indirs[UFS_NIADDR + 2];
1.79 hannken 2151: struct inode *ip = VTOI(vp);
2152: struct fs *fs = ip->i_fs;
2153: struct buf *bp;
2154: int error, num;
2155:
2156: KASSERT(lbn >= 0);
2157:
1.121 dholland 2158: if (lbn < UFS_NDADDR) {
1.79 hannken 2159: *res = db_get(ip, lbn);
2160: return 0;
2161: }
2162: if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
2163: return error;
2164: if (curlwp == uvm.pagedaemon_lwp) {
2165: mutex_enter(&bufcache_lock);
2166: bp = incore(vp, indirs[num-1].in_lbn);
2167: if (bp && (bp->b_oflags & (BO_DONE | BO_DELWRI))) {
2168: *res = idb_get(ip, bp->b_data, indirs[num-1].in_off);
2169: error = 0;
2170: } else
2171: error = ENOMEM;
2172: mutex_exit(&bufcache_lock);
2173: return error;
2174: }
1.139 maxv 2175: error = bread(vp, indirs[num-1].in_lbn, fs->fs_bsize, 0, &bp);
1.120 hannken 2176: if (error == 0) {
1.79 hannken 2177: *res = idb_get(ip, bp->b_data, indirs[num-1].in_off);
1.120 hannken 2178: brelse(bp, 0);
2179: }
1.79 hannken 2180:
2181: return error;
2182: }
2183:
2184: /*
1.75 hannken 2185: * Read or write the specified block of the filesystem vp resides on
2186: * from or to the disk bypassing the buffer cache.
1.1 hannken 2187: */
2188: static int
1.79 hannken 2189: rwfsblk(struct vnode *vp, int flags, void *data, daddr_t lbn)
1.1 hannken 2190: {
1.24 yamt 2191: int error;
1.1 hannken 2192: struct inode *ip = VTOI(vp);
2193: struct fs *fs = ip->i_fs;
2194: struct buf *nbp;
2195:
1.57 ad 2196: nbp = getiobuf(NULL, true);
1.75 hannken 2197: nbp->b_flags = flags;
1.1 hannken 2198: nbp->b_bcount = nbp->b_bufsize = fs->fs_bsize;
2199: nbp->b_error = 0;
2200: nbp->b_data = data;
1.127 dholland 2201: nbp->b_blkno = nbp->b_rawblkno = FFS_FSBTODB(fs, ffs_blkstofrags(fs, lbn));
1.1 hannken 2202: nbp->b_proc = NULL;
2203: nbp->b_dev = ip->i_devvp->v_rdev;
1.70 reinoud 2204: SET(nbp->b_cflags, BC_BUSY); /* mark buffer busy */
1.1 hannken 2205:
1.44 ad 2206: bdev_strategy(nbp);
1.1 hannken 2207:
2208: error = biowait(nbp);
2209:
1.24 yamt 2210: putiobuf(nbp);
1.1 hannken 2211:
2212: return error;
2213: }
2214:
2215: /*
1.75 hannken 2216: * Write all dirty buffers to disk and invalidate them.
2217: */
2218: static int
2219: syncsnap(struct vnode *vp)
2220: {
2221: int error;
2222: buf_t *bp;
2223: struct fs *fs = VTOI(vp)->i_fs;
2224:
2225: mutex_enter(&bufcache_lock);
2226: while ((bp = LIST_FIRST(&vp->v_dirtyblkhd))) {
1.100 hannken 2227: error = bbusy(bp, false, 0, NULL);
2228: if (error == EPASSTHROUGH)
2229: continue;
2230: else if (error != 0) {
2231: mutex_exit(&bufcache_lock);
2232: return error;
2233: }
1.75 hannken 2234: KASSERT(bp->b_bcount == fs->fs_bsize);
2235: mutex_exit(&bufcache_lock);
2236: error = rwfsblk(vp, B_WRITE, bp->b_data,
1.127 dholland 2237: ffs_fragstoblks(fs, FFS_DBTOFSB(fs, bp->b_blkno)));
1.75 hannken 2238: brelse(bp, BC_INVAL | BC_VFLUSH);
2239: if (error)
2240: return error;
2241: mutex_enter(&bufcache_lock);
2242: }
2243: mutex_exit(&bufcache_lock);
2244:
2245: return 0;
2246: }
2247:
2248: /*
1.72 hannken 2249: * Write the specified block to a snapshot.
1.1 hannken 2250: */
2251: static int
1.79 hannken 2252: wrsnapblk(struct vnode *vp, void *data, daddr_t lbn)
1.1 hannken 2253: {
2254: struct inode *ip = VTOI(vp);
2255: struct fs *fs = ip->i_fs;
1.74 hannken 2256: struct buf *bp;
2257: int error;
1.1 hannken 2258:
1.126 dholland 2259: error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)lbn), fs->fs_bsize,
1.92 ad 2260: FSCRED, (ip->i_nlink > 0 ? B_SYNC : 0), &bp);
1.74 hannken 2261: if (error)
2262: return error;
1.95 tsutsui 2263: memcpy(bp->b_data, data, fs->fs_bsize);
1.92 ad 2264: if (ip->i_nlink > 0)
1.74 hannken 2265: error = bwrite(bp);
2266: else
2267: bawrite(bp);
1.4 hannken 2268:
1.72 hannken 2269: return error;
1.4 hannken 2270: }
2271:
2272: /*
1.97 hannken 2273: * Check if this inode is present on the active snapshot list.
2274: * Must be called with snapinfo locked.
2275: */
2276: static inline bool
2277: is_active_snapshot(struct snap_info *si, struct inode *ip)
2278: {
2279: struct inode *xp;
2280:
2281: KASSERT(mutex_owned(&si->si_lock));
2282:
2283: TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap)
2284: if (xp == ip)
2285: return true;
2286: return false;
2287: }
2288:
2289: /*
1.1 hannken 2290: * Get/Put direct block from inode or buffer containing disk addresses. Take
2291: * care for fs type (UFS1/UFS2) and byte swapping. These functions should go
2292: * into a global include.
2293: */
1.79 hannken 2294: static inline daddr_t
1.1 hannken 2295: db_get(struct inode *ip, int loc)
2296: {
2297: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2298: return ufs_rw32(ip->i_ffs1_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2299: else
1.2 hannken 2300: return ufs_rw64(ip->i_ffs2_db[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2301: }
2302:
2303: static inline void
1.79 hannken 2304: db_assign(struct inode *ip, int loc, daddr_t val)
1.1 hannken 2305: {
2306: if (ip->i_ump->um_fstype == UFS1)
1.2 hannken 2307: ip->i_ffs1_db[loc] = ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2308: else
1.2 hannken 2309: ip->i_ffs2_db[loc] = ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2310: }
2311:
1.132 joerg 2312: __unused static inline daddr_t
1.76 hannken 2313: ib_get(struct inode *ip, int loc)
2314: {
2315: if (ip->i_ump->um_fstype == UFS1)
2316: return ufs_rw32(ip->i_ffs1_ib[loc], UFS_IPNEEDSWAP(ip));
2317: else
2318: return ufs_rw64(ip->i_ffs2_ib[loc], UFS_IPNEEDSWAP(ip));
2319: }
2320:
1.79 hannken 2321: static inline daddr_t
1.43 christos 2322: idb_get(struct inode *ip, void *bf, int loc)
1.1 hannken 2323: {
2324: if (ip->i_ump->um_fstype == UFS1)
1.79 hannken 2325: return ufs_rw32(((int32_t *)(bf))[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2326: else
1.79 hannken 2327: return ufs_rw64(((int64_t *)(bf))[loc], UFS_IPNEEDSWAP(ip));
1.1 hannken 2328: }
2329:
2330: static inline void
1.79 hannken 2331: idb_assign(struct inode *ip, void *bf, int loc, daddr_t val)
1.1 hannken 2332: {
2333: if (ip->i_ump->um_fstype == UFS1)
1.79 hannken 2334: ((int32_t *)(bf))[loc] = ufs_rw32(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2335: else
1.79 hannken 2336: ((int64_t *)(bf))[loc] = ufs_rw64(val, UFS_IPNEEDSWAP(ip));
1.1 hannken 2337: }
CVSweb <webmaster@jp.NetBSD.org>