Annotation of src/sys/kern/vfs_wapbl.c, Revision 1.61.2.1
1.61.2.1! skrll 1: /* $NetBSD: vfs_wapbl.c,v 1.62 2015/08/09 07:40:59 mlelstv Exp $ */
1.2 simonb 2:
3: /*-
1.23 ad 4: * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
1.2 simonb 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Wasabi Systems, Inc.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: /*
33: * This implements file system independent write ahead filesystem logging.
34: */
1.4 joerg 35:
36: #define WAPBL_INTERNAL
37:
1.2 simonb 38: #include <sys/cdefs.h>
1.61.2.1! skrll 39: __KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.62 2015/08/09 07:40:59 mlelstv Exp $");
1.2 simonb 40:
41: #include <sys/param.h>
1.31 mlelstv 42: #include <sys/bitops.h>
1.2 simonb 43:
44: #ifdef _KERNEL
45: #include <sys/param.h>
46: #include <sys/namei.h>
47: #include <sys/proc.h>
1.39 christos 48: #include <sys/sysctl.h>
1.2 simonb 49: #include <sys/uio.h>
50: #include <sys/vnode.h>
51: #include <sys/file.h>
1.35 pooka 52: #include <sys/module.h>
1.2 simonb 53: #include <sys/resourcevar.h>
54: #include <sys/conf.h>
55: #include <sys/mount.h>
56: #include <sys/kernel.h>
57: #include <sys/kauth.h>
58: #include <sys/mutex.h>
59: #include <sys/atomic.h>
60: #include <sys/wapbl.h>
1.16 joerg 61: #include <sys/wapbl_replay.h>
1.2 simonb 62:
63: #include <miscfs/specfs/specdev.h>
64:
1.51 para 65: #define wapbl_alloc(s) kmem_alloc((s), KM_SLEEP)
66: #define wapbl_free(a, s) kmem_free((a), (s))
67: #define wapbl_calloc(n, s) kmem_zalloc((n)*(s), KM_SLEEP)
1.2 simonb 68:
1.39 christos 69: static struct sysctllog *wapbl_sysctl;
70: static int wapbl_flush_disk_cache = 1;
71: static int wapbl_verbose_commit = 0;
72:
1.57 joerg 73: static inline size_t wapbl_space_free(size_t, off_t, off_t);
74:
1.2 simonb 75: #else /* !_KERNEL */
76: #include <assert.h>
77: #include <errno.h>
78: #include <stdio.h>
79: #include <stdbool.h>
80: #include <stdlib.h>
81: #include <string.h>
82:
83: #include <sys/time.h>
84: #include <sys/wapbl.h>
1.16 joerg 85: #include <sys/wapbl_replay.h>
1.2 simonb 86:
87: #define KDASSERT(x) assert(x)
88: #define KASSERT(x) assert(x)
1.51 para 89: #define wapbl_alloc(s) malloc(s)
1.18 yamt 90: #define wapbl_free(a, s) free(a)
1.2 simonb 91: #define wapbl_calloc(n, s) calloc((n), (s))
92:
93: #endif /* !_KERNEL */
94:
95: /*
96: * INTERNAL DATA STRUCTURES
97: */
98:
99: /*
100: * This structure holds per-mount log information.
101: *
102: * Legend: a = atomic access only
103: * r = read-only after init
104: * l = rwlock held
105: * m = mutex held
1.38 hannken 106: * lm = rwlock held writing or mutex held
1.2 simonb 107: * u = unlocked access ok
108: * b = bufcache_lock held
109: */
1.60 matt 110: LIST_HEAD(wapbl_ino_head, wapbl_ino);
1.2 simonb 111: struct wapbl {
112: struct vnode *wl_logvp; /* r: log here */
113: struct vnode *wl_devvp; /* r: log on this device */
114: struct mount *wl_mount; /* r: mountpoint wl is associated with */
115: daddr_t wl_logpbn; /* r: Physical block number of start of log */
116: int wl_log_dev_bshift; /* r: logarithm of device block size of log
117: device */
118: int wl_fs_dev_bshift; /* r: logarithm of device block size of
119: filesystem device */
120:
1.3 yamt 121: unsigned wl_lock_count; /* m: Count of transactions in progress */
1.2 simonb 122:
123: size_t wl_circ_size; /* r: Number of bytes in buffer of log */
124: size_t wl_circ_off; /* r: Number of bytes reserved at start */
125:
126: size_t wl_bufcount_max; /* r: Number of buffers reserved for log */
127: size_t wl_bufbytes_max; /* r: Number of buf bytes reserved for log */
128:
129: off_t wl_head; /* l: Byte offset of log head */
130: off_t wl_tail; /* l: Byte offset of log tail */
131: /*
132: * head == tail == 0 means log is empty
133: * head == tail != 0 means log is full
134: * see assertions in wapbl_advance() for other boundary conditions.
135: * only truncate moves the tail, except when flush sets it to
136: * wl_header_size only flush moves the head, except when truncate
137: * sets it to 0.
138: */
139:
140: struct wapbl_wc_header *wl_wc_header; /* l */
141: void *wl_wc_scratch; /* l: scratch space (XXX: por que?!?) */
142:
143: kmutex_t wl_mtx; /* u: short-term lock */
144: krwlock_t wl_rwlock; /* u: File system transaction lock */
145:
146: /*
147: * Must be held while accessing
148: * wl_count or wl_bufs or head or tail
149: */
150:
151: /*
152: * Callback called from within the flush routine to flush any extra
153: * bits. Note that flush may be skipped without calling this if
154: * there are no outstanding buffers in the transaction.
155: */
1.5 joerg 156: #if _KERNEL
1.2 simonb 157: wapbl_flush_fn_t wl_flush; /* r */
158: wapbl_flush_fn_t wl_flush_abort;/* r */
1.5 joerg 159: #endif
1.2 simonb 160:
161: size_t wl_bufbytes; /* m: Byte count of pages in wl_bufs */
162: size_t wl_bufcount; /* m: Count of buffers in wl_bufs */
163: size_t wl_bcount; /* m: Total bcount of wl_bufs */
164:
165: LIST_HEAD(, buf) wl_bufs; /* m: Buffers in current transaction */
166:
167: kcondvar_t wl_reclaimable_cv; /* m (obviously) */
168: size_t wl_reclaimable_bytes; /* m: Amount of space available for
169: reclamation by truncate */
170: int wl_error_count; /* m: # of wl_entries with errors */
171: size_t wl_reserved_bytes; /* never truncate log smaller than this */
172:
173: #ifdef WAPBL_DEBUG_BUFBYTES
174: size_t wl_unsynced_bufbytes; /* Byte count of unsynced buffers */
175: #endif
176:
1.38 hannken 177: daddr_t *wl_deallocblks;/* lm: address of block */
178: int *wl_dealloclens; /* lm: size of block */
179: int wl_dealloccnt; /* lm: total count */
1.2 simonb 180: int wl_dealloclim; /* l: max count */
181:
182: /* hashtable of inode numbers for allocated but unlinked inodes */
183: /* synch ??? */
1.60 matt 184: struct wapbl_ino_head *wl_inohash;
1.2 simonb 185: u_long wl_inohashmask;
186: int wl_inohashcnt;
187:
188: SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
189: accounting */
1.54 hannken 190:
191: u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */
192: daddr_t wl_buffer_dblk; /* l: buffer disk block address */
193: size_t wl_buffer_used; /* l: buffer current use */
1.2 simonb 194: };
195:
196: #ifdef WAPBL_DEBUG_PRINT
197: int wapbl_debug_print = WAPBL_DEBUG_PRINT;
198: #endif
199:
200: /****************************************************************/
201: #ifdef _KERNEL
202:
203: #ifdef WAPBL_DEBUG
204: struct wapbl *wapbl_debug_wl;
205: #endif
206:
207: static int wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail);
208: static int wapbl_write_blocks(struct wapbl *wl, off_t *offp);
209: static int wapbl_write_revocations(struct wapbl *wl, off_t *offp);
210: static int wapbl_write_inodes(struct wapbl *wl, off_t *offp);
211: #endif /* _KERNEL */
212:
1.14 joerg 213: static int wapbl_replay_process(struct wapbl_replay *wr, off_t, off_t);
1.2 simonb 214:
1.30 uebayasi 215: static inline size_t wapbl_space_used(size_t avail, off_t head,
1.2 simonb 216: off_t tail);
217:
218: #ifdef _KERNEL
219:
1.51 para 220: static struct pool wapbl_entry_pool;
221:
1.2 simonb 222: #define WAPBL_INODETRK_SIZE 83
223: static int wapbl_ino_pool_refcount;
224: static struct pool wapbl_ino_pool;
225: struct wapbl_ino {
226: LIST_ENTRY(wapbl_ino) wi_hash;
227: ino_t wi_ino;
228: mode_t wi_mode;
229: };
230:
231: static void wapbl_inodetrk_init(struct wapbl *wl, u_int size);
232: static void wapbl_inodetrk_free(struct wapbl *wl);
233: static struct wapbl_ino *wapbl_inodetrk_get(struct wapbl *wl, ino_t ino);
234:
235: static size_t wapbl_transaction_len(struct wapbl *wl);
1.30 uebayasi 236: static inline size_t wapbl_transaction_inodes_len(struct wapbl *wl);
1.2 simonb 237:
1.13 joerg 238: #if 0
1.4 joerg 239: int wapbl_replay_verify(struct wapbl_replay *, struct vnode *);
240: #endif
241:
242: static int wapbl_replay_isopen1(struct wapbl_replay *);
243:
1.2 simonb 244: /*
245: * This is useful for debugging. If set, the log will
246: * only be truncated when necessary.
247: */
248: int wapbl_lazy_truncate = 0;
249:
250: struct wapbl_ops wapbl_ops = {
251: .wo_wapbl_discard = wapbl_discard,
252: .wo_wapbl_replay_isopen = wapbl_replay_isopen1,
1.6 joerg 253: .wo_wapbl_replay_can_read = wapbl_replay_can_read,
1.2 simonb 254: .wo_wapbl_replay_read = wapbl_replay_read,
255: .wo_wapbl_add_buf = wapbl_add_buf,
256: .wo_wapbl_remove_buf = wapbl_remove_buf,
257: .wo_wapbl_resize_buf = wapbl_resize_buf,
258: .wo_wapbl_begin = wapbl_begin,
259: .wo_wapbl_end = wapbl_end,
260: .wo_wapbl_junlock_assert= wapbl_junlock_assert,
261:
262: /* XXX: the following is only used to say "this is a wapbl buf" */
263: .wo_wapbl_biodone = wapbl_biodone,
264: };
265:
1.21 yamt 266: static int
1.39 christos 267: wapbl_sysctl_init(void)
268: {
269: int rv;
270: const struct sysctlnode *rnode, *cnode;
271:
272: wapbl_sysctl = NULL;
273:
274: rv = sysctl_createv(&wapbl_sysctl, 0, NULL, &rnode,
275: CTLFLAG_PERMANENT,
276: CTLTYPE_NODE, "wapbl",
277: SYSCTL_DESCR("WAPBL journaling options"),
278: NULL, 0, NULL, 0,
1.59 pooka 279: CTL_VFS, CTL_CREATE, CTL_EOL);
1.39 christos 280: if (rv)
281: return rv;
282:
283: rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
284: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
285: CTLTYPE_INT, "flush_disk_cache",
286: SYSCTL_DESCR("flush disk cache"),
287: NULL, 0, &wapbl_flush_disk_cache, 0,
288: CTL_CREATE, CTL_EOL);
289: if (rv)
290: return rv;
291:
292: rv = sysctl_createv(&wapbl_sysctl, 0, &rnode, &cnode,
293: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
294: CTLTYPE_INT, "verbose_commit",
295: SYSCTL_DESCR("show time and size of wapbl log commits"),
296: NULL, 0, &wapbl_verbose_commit, 0,
297: CTL_CREATE, CTL_EOL);
298: return rv;
299: }
300:
301: static void
302: wapbl_init(void)
303: {
1.51 para 304:
305: pool_init(&wapbl_entry_pool, sizeof(struct wapbl_entry), 0, 0, 0,
306: "wapblentrypl", &pool_allocator_kmem, IPL_VM);
307:
1.39 christos 308: wapbl_sysctl_init();
309: }
310:
311: #ifdef notyet
312: static int
313: wapbl_fini(bool interface)
314: {
1.51 para 315:
1.39 christos 316: if (aio_sysctl != NULL)
317: sysctl_teardown(&aio_sysctl);
1.51 para 318:
319: pool_destroy(&wapbl_entry_pool);
320:
1.39 christos 321: return 0;
322: }
323: #endif
324:
325: static int
1.15 joerg 326: wapbl_start_flush_inodes(struct wapbl *wl, struct wapbl_replay *wr)
327: {
328: int error, i;
329:
330: WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
331: ("wapbl_start: reusing log with %d inodes\n", wr->wr_inodescnt));
332:
333: /*
334: * Its only valid to reuse the replay log if its
335: * the same as the new log we just opened.
336: */
337: KDASSERT(!wapbl_replay_isopen(wr));
1.47 christos 338: KASSERT(wl->wl_devvp->v_type == VBLK);
339: KASSERT(wr->wr_devvp->v_type == VBLK);
1.15 joerg 340: KASSERT(wl->wl_devvp->v_rdev == wr->wr_devvp->v_rdev);
341: KASSERT(wl->wl_logpbn == wr->wr_logpbn);
342: KASSERT(wl->wl_circ_size == wr->wr_circ_size);
343: KASSERT(wl->wl_circ_off == wr->wr_circ_off);
344: KASSERT(wl->wl_log_dev_bshift == wr->wr_log_dev_bshift);
345: KASSERT(wl->wl_fs_dev_bshift == wr->wr_fs_dev_bshift);
346:
347: wl->wl_wc_header->wc_generation = wr->wr_generation + 1;
348:
349: for (i = 0; i < wr->wr_inodescnt; i++)
350: wapbl_register_inode(wl, wr->wr_inodes[i].wr_inumber,
351: wr->wr_inodes[i].wr_imode);
352:
353: /* Make sure new transaction won't overwrite old inodes list */
354: KDASSERT(wapbl_transaction_len(wl) <=
355: wapbl_space_free(wl->wl_circ_size, wr->wr_inodeshead,
356: wr->wr_inodestail));
357:
358: wl->wl_head = wl->wl_tail = wr->wr_inodeshead;
359: wl->wl_reclaimable_bytes = wl->wl_reserved_bytes =
360: wapbl_transaction_len(wl);
361:
362: error = wapbl_write_inodes(wl, &wl->wl_head);
363: if (error)
364: return error;
365:
366: KASSERT(wl->wl_head != wl->wl_tail);
367: KASSERT(wl->wl_head != 0);
368:
369: return 0;
370: }
371:
1.2 simonb 372: int
373: wapbl_start(struct wapbl ** wlp, struct mount *mp, struct vnode *vp,
374: daddr_t off, size_t count, size_t blksize, struct wapbl_replay *wr,
375: wapbl_flush_fn_t flushfn, wapbl_flush_fn_t flushabortfn)
376: {
377: struct wapbl *wl;
378: struct vnode *devvp;
379: daddr_t logpbn;
380: int error;
1.31 mlelstv 381: int log_dev_bshift = ilog2(blksize);
1.32 mlelstv 382: int fs_dev_bshift = log_dev_bshift;
1.2 simonb 383: int run;
384:
385: WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_start: vp=%p off=%" PRId64
386: " count=%zu blksize=%zu\n", vp, off, count, blksize));
387:
388: if (log_dev_bshift > fs_dev_bshift) {
389: WAPBL_PRINTF(WAPBL_PRINT_OPEN,
390: ("wapbl: log device's block size cannot be larger "
391: "than filesystem's\n"));
392: /*
393: * Not currently implemented, although it could be if
394: * needed someday.
395: */
396: return ENOSYS;
397: }
398:
399: if (off < 0)
400: return EINVAL;
401:
402: if (blksize < DEV_BSIZE)
403: return EINVAL;
404: if (blksize % DEV_BSIZE)
405: return EINVAL;
406:
407: /* XXXTODO: verify that the full load is writable */
408:
409: /*
410: * XXX check for minimum log size
411: * minimum is governed by minimum amount of space
412: * to complete a transaction. (probably truncate)
413: */
414: /* XXX for now pick something minimal */
415: if ((count * blksize) < MAXPHYS) {
416: return ENOSPC;
417: }
418:
419: if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, &run)) != 0) {
420: return error;
421: }
422:
423: wl = wapbl_calloc(1, sizeof(*wl));
424: rw_init(&wl->wl_rwlock);
425: mutex_init(&wl->wl_mtx, MUTEX_DEFAULT, IPL_NONE);
426: cv_init(&wl->wl_reclaimable_cv, "wapblrec");
427: LIST_INIT(&wl->wl_bufs);
428: SIMPLEQ_INIT(&wl->wl_entries);
429:
430: wl->wl_logvp = vp;
431: wl->wl_devvp = devvp;
432: wl->wl_mount = mp;
433: wl->wl_logpbn = logpbn;
434: wl->wl_log_dev_bshift = log_dev_bshift;
435: wl->wl_fs_dev_bshift = fs_dev_bshift;
436:
437: wl->wl_flush = flushfn;
438: wl->wl_flush_abort = flushabortfn;
439:
440: /* Reserve two log device blocks for the commit headers */
441: wl->wl_circ_off = 2<<wl->wl_log_dev_bshift;
1.34 mlelstv 442: wl->wl_circ_size = ((count * blksize) - wl->wl_circ_off);
1.2 simonb 443: /* truncate the log usage to a multiple of log_dev_bshift */
444: wl->wl_circ_size >>= wl->wl_log_dev_bshift;
445: wl->wl_circ_size <<= wl->wl_log_dev_bshift;
446:
447: /*
448: * wl_bufbytes_max limits the size of the in memory transaction space.
449: * - Since buffers are allocated and accounted for in units of
450: * PAGE_SIZE it is required to be a multiple of PAGE_SIZE
451: * (i.e. 1<<PAGE_SHIFT)
452: * - Since the log device has to be written in units of
453: * 1<<wl_log_dev_bshift it is required to be a mulitple of
454: * 1<<wl_log_dev_bshift.
455: * - Since filesystem will provide data in units of 1<<wl_fs_dev_bshift,
456: * it is convenient to be a multiple of 1<<wl_fs_dev_bshift.
457: * Therefore it must be multiple of the least common multiple of those
458: * three quantities. Fortunately, all of those quantities are
459: * guaranteed to be a power of two, and the least common multiple of
460: * a set of numbers which are all powers of two is simply the maximum
461: * of those numbers. Finally, the maximum logarithm of a power of two
462: * is the same as the log of the maximum power of two. So we can do
463: * the following operations to size wl_bufbytes_max:
464: */
465:
466: /* XXX fix actual number of pages reserved per filesystem. */
467: wl->wl_bufbytes_max = MIN(wl->wl_circ_size, buf_memcalc() / 2);
468:
469: /* Round wl_bufbytes_max to the largest power of two constraint */
470: wl->wl_bufbytes_max >>= PAGE_SHIFT;
471: wl->wl_bufbytes_max <<= PAGE_SHIFT;
472: wl->wl_bufbytes_max >>= wl->wl_log_dev_bshift;
473: wl->wl_bufbytes_max <<= wl->wl_log_dev_bshift;
474: wl->wl_bufbytes_max >>= wl->wl_fs_dev_bshift;
475: wl->wl_bufbytes_max <<= wl->wl_fs_dev_bshift;
476:
477: /* XXX maybe use filesystem fragment size instead of 1024 */
478: /* XXX fix actual number of buffers reserved per filesystem. */
479: wl->wl_bufcount_max = (nbuf / 2) * 1024;
480:
481: /* XXX tie this into resource estimation */
1.41 hannken 482: wl->wl_dealloclim = wl->wl_bufbytes_max / mp->mnt_stat.f_bsize / 2;
1.2 simonb 483:
1.51 para 484: wl->wl_deallocblks = wapbl_alloc(sizeof(*wl->wl_deallocblks) *
1.2 simonb 485: wl->wl_dealloclim);
1.51 para 486: wl->wl_dealloclens = wapbl_alloc(sizeof(*wl->wl_dealloclens) *
1.2 simonb 487: wl->wl_dealloclim);
488:
1.54 hannken 489: wl->wl_buffer = wapbl_alloc(MAXPHYS);
490: wl->wl_buffer_used = 0;
491:
1.2 simonb 492: wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
493:
494: /* Initialize the commit header */
495: {
496: struct wapbl_wc_header *wc;
1.14 joerg 497: size_t len = 1 << wl->wl_log_dev_bshift;
1.2 simonb 498: wc = wapbl_calloc(1, len);
499: wc->wc_type = WAPBL_WC_HEADER;
500: wc->wc_len = len;
501: wc->wc_circ_off = wl->wl_circ_off;
502: wc->wc_circ_size = wl->wl_circ_size;
503: /* XXX wc->wc_fsid */
504: wc->wc_log_dev_bshift = wl->wl_log_dev_bshift;
505: wc->wc_fs_dev_bshift = wl->wl_fs_dev_bshift;
506: wl->wl_wc_header = wc;
1.51 para 507: wl->wl_wc_scratch = wapbl_alloc(len);
1.2 simonb 508: }
509:
510: /*
511: * if there was an existing set of unlinked but
512: * allocated inodes, preserve it in the new
513: * log.
514: */
515: if (wr && wr->wr_inodescnt) {
1.15 joerg 516: error = wapbl_start_flush_inodes(wl, wr);
1.2 simonb 517: if (error)
518: goto errout;
519: }
520:
521: error = wapbl_write_commit(wl, wl->wl_head, wl->wl_tail);
522: if (error) {
523: goto errout;
524: }
525:
526: *wlp = wl;
527: #if defined(WAPBL_DEBUG)
528: wapbl_debug_wl = wl;
529: #endif
530:
531: return 0;
532: errout:
533: wapbl_discard(wl);
1.18 yamt 534: wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len);
535: wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len);
536: wapbl_free(wl->wl_deallocblks,
537: sizeof(*wl->wl_deallocblks) * wl->wl_dealloclim);
538: wapbl_free(wl->wl_dealloclens,
539: sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
1.54 hannken 540: wapbl_free(wl->wl_buffer, MAXPHYS);
1.2 simonb 541: wapbl_inodetrk_free(wl);
1.18 yamt 542: wapbl_free(wl, sizeof(*wl));
1.2 simonb 543:
544: return error;
545: }
546:
547: /*
548: * Like wapbl_flush, only discards the transaction
549: * completely
550: */
551:
552: void
553: wapbl_discard(struct wapbl *wl)
554: {
555: struct wapbl_entry *we;
556: struct buf *bp;
557: int i;
558:
559: /*
560: * XXX we may consider using upgrade here
561: * if we want to call flush from inside a transaction
562: */
563: rw_enter(&wl->wl_rwlock, RW_WRITER);
564: wl->wl_flush(wl->wl_mount, wl->wl_deallocblks, wl->wl_dealloclens,
565: wl->wl_dealloccnt);
566:
567: #ifdef WAPBL_DEBUG_PRINT
568: {
569: pid_t pid = -1;
570: lwpid_t lid = -1;
571: if (curproc)
572: pid = curproc->p_pid;
573: if (curlwp)
574: lid = curlwp->l_lid;
575: #ifdef WAPBL_DEBUG_BUFBYTES
576: WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
577: ("wapbl_discard: thread %d.%d discarding "
578: "transaction\n"
579: "\tbufcount=%zu bufbytes=%zu bcount=%zu "
580: "deallocs=%d inodes=%d\n"
581: "\terrcnt = %u, reclaimable=%zu reserved=%zu "
582: "unsynced=%zu\n",
583: pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
584: wl->wl_bcount, wl->wl_dealloccnt,
585: wl->wl_inohashcnt, wl->wl_error_count,
586: wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
587: wl->wl_unsynced_bufbytes));
588: SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
589: WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
590: ("\tentry: bufcount = %zu, reclaimable = %zu, "
591: "error = %d, unsynced = %zu\n",
592: we->we_bufcount, we->we_reclaimable_bytes,
593: we->we_error, we->we_unsynced_bufbytes));
594: }
595: #else /* !WAPBL_DEBUG_BUFBYTES */
596: WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
597: ("wapbl_discard: thread %d.%d discarding transaction\n"
598: "\tbufcount=%zu bufbytes=%zu bcount=%zu "
599: "deallocs=%d inodes=%d\n"
600: "\terrcnt = %u, reclaimable=%zu reserved=%zu\n",
601: pid, lid, wl->wl_bufcount, wl->wl_bufbytes,
602: wl->wl_bcount, wl->wl_dealloccnt,
603: wl->wl_inohashcnt, wl->wl_error_count,
604: wl->wl_reclaimable_bytes, wl->wl_reserved_bytes));
605: SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
606: WAPBL_PRINTF(WAPBL_PRINT_DISCARD,
607: ("\tentry: bufcount = %zu, reclaimable = %zu, "
608: "error = %d\n",
609: we->we_bufcount, we->we_reclaimable_bytes,
610: we->we_error));
611: }
612: #endif /* !WAPBL_DEBUG_BUFBYTES */
613: }
614: #endif /* WAPBL_DEBUG_PRINT */
615:
616: for (i = 0; i <= wl->wl_inohashmask; i++) {
617: struct wapbl_ino_head *wih;
618: struct wapbl_ino *wi;
619:
620: wih = &wl->wl_inohash[i];
621: while ((wi = LIST_FIRST(wih)) != NULL) {
622: LIST_REMOVE(wi, wi_hash);
623: pool_put(&wapbl_ino_pool, wi);
624: KASSERT(wl->wl_inohashcnt > 0);
625: wl->wl_inohashcnt--;
626: }
627: }
628:
629: /*
630: * clean buffer list
631: */
632: mutex_enter(&bufcache_lock);
633: mutex_enter(&wl->wl_mtx);
634: while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
635: if (bbusy(bp, 0, 0, &wl->wl_mtx) == 0) {
636: /*
637: * The buffer will be unlocked and
638: * removed from the transaction in brelse
639: */
640: mutex_exit(&wl->wl_mtx);
641: brelsel(bp, 0);
642: mutex_enter(&wl->wl_mtx);
643: }
644: }
645: mutex_exit(&wl->wl_mtx);
646: mutex_exit(&bufcache_lock);
647:
648: /*
649: * Remove references to this wl from wl_entries, free any which
650: * no longer have buffers, others will be freed in wapbl_biodone
651: * when they no longer have any buffers.
652: */
653: while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) != NULL) {
654: SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
655: /* XXX should we be accumulating wl_error_count
656: * and increasing reclaimable bytes ? */
657: we->we_wapbl = NULL;
658: if (we->we_bufcount == 0) {
659: #ifdef WAPBL_DEBUG_BUFBYTES
660: KASSERT(we->we_unsynced_bufbytes == 0);
661: #endif
1.51 para 662: pool_put(&wapbl_entry_pool, we);
1.2 simonb 663: }
664: }
665:
666: /* Discard list of deallocs */
667: wl->wl_dealloccnt = 0;
668: /* XXX should we clear wl_reserved_bytes? */
669:
670: KASSERT(wl->wl_bufbytes == 0);
671: KASSERT(wl->wl_bcount == 0);
672: KASSERT(wl->wl_bufcount == 0);
673: KASSERT(LIST_EMPTY(&wl->wl_bufs));
674: KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
675: KASSERT(wl->wl_inohashcnt == 0);
676:
677: rw_exit(&wl->wl_rwlock);
678: }
679:
680: int
681: wapbl_stop(struct wapbl *wl, int force)
682: {
683: int error;
684:
685: WAPBL_PRINTF(WAPBL_PRINT_OPEN, ("wapbl_stop called\n"));
686: error = wapbl_flush(wl, 1);
687: if (error) {
688: if (force)
689: wapbl_discard(wl);
690: else
691: return error;
692: }
693:
694: /* Unlinked inodes persist after a flush */
695: if (wl->wl_inohashcnt) {
696: if (force) {
697: wapbl_discard(wl);
698: } else {
699: return EBUSY;
700: }
701: }
702:
703: KASSERT(wl->wl_bufbytes == 0);
704: KASSERT(wl->wl_bcount == 0);
705: KASSERT(wl->wl_bufcount == 0);
706: KASSERT(LIST_EMPTY(&wl->wl_bufs));
707: KASSERT(wl->wl_dealloccnt == 0);
708: KASSERT(SIMPLEQ_EMPTY(&wl->wl_entries));
709: KASSERT(wl->wl_inohashcnt == 0);
710:
1.18 yamt 711: wapbl_free(wl->wl_wc_scratch, wl->wl_wc_header->wc_len);
712: wapbl_free(wl->wl_wc_header, wl->wl_wc_header->wc_len);
713: wapbl_free(wl->wl_deallocblks,
714: sizeof(*wl->wl_deallocblks) * wl->wl_dealloclim);
715: wapbl_free(wl->wl_dealloclens,
716: sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
1.54 hannken 717: wapbl_free(wl->wl_buffer, MAXPHYS);
1.2 simonb 718: wapbl_inodetrk_free(wl);
719:
720: cv_destroy(&wl->wl_reclaimable_cv);
721: mutex_destroy(&wl->wl_mtx);
722: rw_destroy(&wl->wl_rwlock);
1.18 yamt 723: wapbl_free(wl, sizeof(*wl));
1.2 simonb 724:
725: return 0;
726: }
727:
728: static int
729: wapbl_doio(void *data, size_t len, struct vnode *devvp, daddr_t pbn, int flags)
730: {
731: struct pstats *pstats = curlwp->l_proc->p_stats;
732: struct buf *bp;
733: int error;
734:
735: KASSERT((flags & ~(B_WRITE | B_READ)) == 0);
736: KASSERT(devvp->v_type == VBLK);
737:
738: if ((flags & (B_WRITE | B_READ)) == B_WRITE) {
1.45 rmind 739: mutex_enter(devvp->v_interlock);
1.2 simonb 740: devvp->v_numoutput++;
1.45 rmind 741: mutex_exit(devvp->v_interlock);
1.2 simonb 742: pstats->p_ru.ru_oublock++;
743: } else {
744: pstats->p_ru.ru_inblock++;
745: }
746:
747: bp = getiobuf(devvp, true);
748: bp->b_flags = flags;
749: bp->b_cflags = BC_BUSY; /* silly & dubious */
750: bp->b_dev = devvp->v_rdev;
751: bp->b_data = data;
752: bp->b_bufsize = bp->b_resid = bp->b_bcount = len;
753: bp->b_blkno = pbn;
1.52 chs 754: BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
1.2 simonb 755:
756: WAPBL_PRINTF(WAPBL_PRINT_IO,
1.29 pooka 757: ("wapbl_doio: %s %d bytes at block %"PRId64" on dev 0x%"PRIx64"\n",
1.2 simonb 758: BUF_ISWRITE(bp) ? "write" : "read", bp->b_bcount,
759: bp->b_blkno, bp->b_dev));
760:
761: VOP_STRATEGY(devvp, bp);
762:
763: error = biowait(bp);
764: putiobuf(bp);
765:
766: if (error) {
767: WAPBL_PRINTF(WAPBL_PRINT_ERROR,
768: ("wapbl_doio: %s %zu bytes at block %" PRId64
1.29 pooka 769: " on dev 0x%"PRIx64" failed with error %d\n",
1.2 simonb 770: (((flags & (B_WRITE | B_READ)) == B_WRITE) ?
771: "write" : "read"),
772: len, pbn, devvp->v_rdev, error));
773: }
774:
775: return error;
776: }
777:
778: int
779: wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
780: {
781:
782: return wapbl_doio(data, len, devvp, pbn, B_WRITE);
783: }
784:
785: int
786: wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
787: {
788:
789: return wapbl_doio(data, len, devvp, pbn, B_READ);
790: }
791:
792: /*
1.54 hannken 793: * Flush buffered data if any.
794: */
795: static int
796: wapbl_buffered_flush(struct wapbl *wl)
797: {
798: int error;
799:
800: if (wl->wl_buffer_used == 0)
801: return 0;
802:
803: error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
804: wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
805: wl->wl_buffer_used = 0;
806:
807: return error;
808: }
809:
810: /*
811: * Write data to the log.
812: * Try to coalesce writes and emit MAXPHYS aligned blocks.
813: */
814: static int
815: wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
816: {
817: int error;
818: size_t resid;
819:
820: /*
821: * If not adjacent to buffered data flush first. Disk block
822: * address is always valid for non-empty buffer.
823: */
824: if (wl->wl_buffer_used > 0 &&
825: pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) {
826: error = wapbl_buffered_flush(wl);
827: if (error)
828: return error;
829: }
830: /*
831: * If this write goes to an empty buffer we have to
832: * save the disk block address first.
833: */
834: if (wl->wl_buffer_used == 0)
835: wl->wl_buffer_dblk = pbn;
836: /*
837: * Remaining space so this buffer ends on a MAXPHYS boundary.
838: *
839: * Cannot become less or equal zero as the buffer would have been
840: * flushed on the last call then.
841: */
842: resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) -
843: wl->wl_buffer_used;
844: KASSERT(resid > 0);
845: KASSERT(dbtob(btodb(resid)) == resid);
846: if (len >= resid) {
847: memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid);
848: wl->wl_buffer_used += resid;
849: error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
850: wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
851: data = (uint8_t *)data + resid;
852: len -= resid;
853: wl->wl_buffer_dblk = pbn + btodb(resid);
854: wl->wl_buffer_used = 0;
855: if (error)
856: return error;
857: }
858: KASSERT(len < MAXPHYS);
859: if (len > 0) {
860: memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len);
861: wl->wl_buffer_used += len;
862: }
863:
864: return 0;
865: }
866:
867: /*
1.2 simonb 868: * Off is byte offset returns new offset for next write
869: * handles log wraparound
870: */
871: static int
872: wapbl_circ_write(struct wapbl *wl, void *data, size_t len, off_t *offp)
873: {
874: size_t slen;
875: off_t off = *offp;
876: int error;
1.34 mlelstv 877: daddr_t pbn;
1.2 simonb 878:
879: KDASSERT(((len >> wl->wl_log_dev_bshift) <<
880: wl->wl_log_dev_bshift) == len);
881:
882: if (off < wl->wl_circ_off)
883: off = wl->wl_circ_off;
884: slen = wl->wl_circ_off + wl->wl_circ_size - off;
885: if (slen < len) {
1.34 mlelstv 886: pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift);
887: #ifdef _KERNEL
888: pbn = btodb(pbn << wl->wl_log_dev_bshift);
889: #endif
1.54 hannken 890: error = wapbl_buffered_write(data, slen, wl, pbn);
1.2 simonb 891: if (error)
892: return error;
893: data = (uint8_t *)data + slen;
894: len -= slen;
895: off = wl->wl_circ_off;
896: }
1.34 mlelstv 897: pbn = wl->wl_logpbn + (off >> wl->wl_log_dev_bshift);
898: #ifdef _KERNEL
899: pbn = btodb(pbn << wl->wl_log_dev_bshift);
900: #endif
1.54 hannken 901: error = wapbl_buffered_write(data, len, wl, pbn);
1.2 simonb 902: if (error)
903: return error;
904: off += len;
905: if (off >= wl->wl_circ_off + wl->wl_circ_size)
906: off = wl->wl_circ_off;
907: *offp = off;
908: return 0;
909: }
910:
911: /****************************************************************/
912:
913: int
914: wapbl_begin(struct wapbl *wl, const char *file, int line)
915: {
916: int doflush;
917: unsigned lockcount;
918:
919: KDASSERT(wl);
920:
921: /*
922: * XXX this needs to be made much more sophisticated.
923: * perhaps each wapbl_begin could reserve a specified
924: * number of buffers and bytes.
925: */
926: mutex_enter(&wl->wl_mtx);
927: lockcount = wl->wl_lock_count;
928: doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) >
929: wl->wl_bufbytes_max / 2) ||
930: ((wl->wl_bufcount + (lockcount * 10)) >
931: wl->wl_bufcount_max / 2) ||
1.28 pooka 932: (wapbl_transaction_len(wl) > wl->wl_circ_size / 2) ||
1.42 hannken 933: (wl->wl_dealloccnt >= (wl->wl_dealloclim / 2));
1.2 simonb 934: mutex_exit(&wl->wl_mtx);
935:
936: if (doflush) {
937: WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
938: ("force flush lockcnt=%d bufbytes=%zu "
1.28 pooka 939: "(max=%zu) bufcount=%zu (max=%zu) "
940: "dealloccnt %d (lim=%d)\n",
1.2 simonb 941: lockcount, wl->wl_bufbytes,
942: wl->wl_bufbytes_max, wl->wl_bufcount,
1.28 pooka 943: wl->wl_bufcount_max,
944: wl->wl_dealloccnt, wl->wl_dealloclim));
1.2 simonb 945: }
946:
947: if (doflush) {
948: int error = wapbl_flush(wl, 0);
949: if (error)
950: return error;
951: }
952:
1.23 ad 953: rw_enter(&wl->wl_rwlock, RW_READER);
1.2 simonb 954: mutex_enter(&wl->wl_mtx);
955: wl->wl_lock_count++;
956: mutex_exit(&wl->wl_mtx);
957:
1.23 ad 958: #if defined(WAPBL_DEBUG_PRINT)
1.2 simonb 959: WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
960: ("wapbl_begin thread %d.%d with bufcount=%zu "
961: "bufbytes=%zu bcount=%zu at %s:%d\n",
962: curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
963: wl->wl_bufbytes, wl->wl_bcount, file, line));
964: #endif
965:
966: return 0;
967: }
968:
969: void
970: wapbl_end(struct wapbl *wl)
971: {
972:
1.23 ad 973: #if defined(WAPBL_DEBUG_PRINT)
1.2 simonb 974: WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION,
975: ("wapbl_end thread %d.%d with bufcount=%zu "
976: "bufbytes=%zu bcount=%zu\n",
977: curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
978: wl->wl_bufbytes, wl->wl_bcount));
979: #endif
980:
1.40 bouyer 981: #ifdef DIAGNOSTIC
982: size_t flushsize = wapbl_transaction_len(wl);
983: if (flushsize > (wl->wl_circ_size - wl->wl_reserved_bytes)) {
984: /*
985: * XXX this could be handled more gracefully, perhaps place
986: * only a partial transaction in the log and allow the
987: * remaining to flush without the protection of the journal.
988: */
989: panic("wapbl_end: current transaction too big to flush\n");
990: }
991: #endif
992:
1.2 simonb 993: mutex_enter(&wl->wl_mtx);
994: KASSERT(wl->wl_lock_count > 0);
995: wl->wl_lock_count--;
996: mutex_exit(&wl->wl_mtx);
997:
998: rw_exit(&wl->wl_rwlock);
999: }
1000:
1001: void
1002: wapbl_add_buf(struct wapbl *wl, struct buf * bp)
1003: {
1004:
1005: KASSERT(bp->b_cflags & BC_BUSY);
1006: KASSERT(bp->b_vp);
1007:
1008: wapbl_jlock_assert(wl);
1009:
1010: #if 0
1011: /*
1012: * XXX this might be an issue for swapfiles.
1013: * see uvm_swap.c:1702
1014: *
1015: * XXX2 why require it then? leap of semantics?
1016: */
1017: KASSERT((bp->b_cflags & BC_NOCACHE) == 0);
1018: #endif
1019:
1020: mutex_enter(&wl->wl_mtx);
1021: if (bp->b_flags & B_LOCKED) {
1022: LIST_REMOVE(bp, b_wapbllist);
1023: WAPBL_PRINTF(WAPBL_PRINT_BUFFER2,
1024: ("wapbl_add_buf thread %d.%d re-adding buf %p "
1025: "with %d bytes %d bcount\n",
1026: curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
1027: bp->b_bcount));
1028: } else {
1029: /* unlocked by dirty buffers shouldn't exist */
1030: KASSERT(!(bp->b_oflags & BO_DELWRI));
1031: wl->wl_bufbytes += bp->b_bufsize;
1032: wl->wl_bcount += bp->b_bcount;
1033: wl->wl_bufcount++;
1034: WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
1035: ("wapbl_add_buf thread %d.%d adding buf %p "
1036: "with %d bytes %d bcount\n",
1037: curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize,
1038: bp->b_bcount));
1039: }
1040: LIST_INSERT_HEAD(&wl->wl_bufs, bp, b_wapbllist);
1041: mutex_exit(&wl->wl_mtx);
1042:
1043: bp->b_flags |= B_LOCKED;
1044: }
1045:
1046: static void
1047: wapbl_remove_buf_locked(struct wapbl * wl, struct buf *bp)
1048: {
1049:
1050: KASSERT(mutex_owned(&wl->wl_mtx));
1051: KASSERT(bp->b_cflags & BC_BUSY);
1052: wapbl_jlock_assert(wl);
1053:
1054: #if 0
1055: /*
1056: * XXX this might be an issue for swapfiles.
1057: * see uvm_swap.c:1725
1058: *
1059: * XXXdeux: see above
1060: */
1061: KASSERT((bp->b_flags & BC_NOCACHE) == 0);
1062: #endif
1063: KASSERT(bp->b_flags & B_LOCKED);
1064:
1065: WAPBL_PRINTF(WAPBL_PRINT_BUFFER,
1066: ("wapbl_remove_buf thread %d.%d removing buf %p with "
1067: "%d bytes %d bcount\n",
1068: curproc->p_pid, curlwp->l_lid, bp, bp->b_bufsize, bp->b_bcount));
1069:
1070: KASSERT(wl->wl_bufbytes >= bp->b_bufsize);
1071: wl->wl_bufbytes -= bp->b_bufsize;
1072: KASSERT(wl->wl_bcount >= bp->b_bcount);
1073: wl->wl_bcount -= bp->b_bcount;
1074: KASSERT(wl->wl_bufcount > 0);
1075: wl->wl_bufcount--;
1076: KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
1077: KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
1078: LIST_REMOVE(bp, b_wapbllist);
1079:
1080: bp->b_flags &= ~B_LOCKED;
1081: }
1082:
1083: /* called from brelsel() in vfs_bio among other places */
1084: void
1085: wapbl_remove_buf(struct wapbl * wl, struct buf *bp)
1086: {
1087:
1088: mutex_enter(&wl->wl_mtx);
1089: wapbl_remove_buf_locked(wl, bp);
1090: mutex_exit(&wl->wl_mtx);
1091: }
1092:
1093: void
1094: wapbl_resize_buf(struct wapbl *wl, struct buf *bp, long oldsz, long oldcnt)
1095: {
1096:
1097: KASSERT(bp->b_cflags & BC_BUSY);
1098:
1099: /*
1100: * XXX: why does this depend on B_LOCKED? otherwise the buf
1101: * is not for a transaction? if so, why is this called in the
1102: * first place?
1103: */
1104: if (bp->b_flags & B_LOCKED) {
1105: mutex_enter(&wl->wl_mtx);
1106: wl->wl_bufbytes += bp->b_bufsize - oldsz;
1107: wl->wl_bcount += bp->b_bcount - oldcnt;
1108: mutex_exit(&wl->wl_mtx);
1109: }
1110: }
1111:
1112: #endif /* _KERNEL */
1113:
1114: /****************************************************************/
1115: /* Some utility inlines */
1116:
1.56 joerg 1117: static inline size_t
1118: wapbl_space_used(size_t avail, off_t head, off_t tail)
1119: {
1120:
1121: if (tail == 0) {
1122: KASSERT(head == 0);
1123: return 0;
1124: }
1125: return ((head + (avail - 1) - tail) % avail) + 1;
1126: }
1127:
1128: #ifdef _KERNEL
1.2 simonb 1129: /* This is used to advance the pointer at old to new value at old+delta */
1.30 uebayasi 1130: static inline off_t
1.60 matt 1131: wapbl_advance(size_t size, size_t off, off_t oldoff, size_t delta)
1.2 simonb 1132: {
1.60 matt 1133: off_t newoff;
1.2 simonb 1134:
1135: /* Define acceptable ranges for inputs. */
1.46 christos 1136: KASSERT(delta <= (size_t)size);
1.60 matt 1137: KASSERT((oldoff == 0) || ((size_t)oldoff >= off));
1138: KASSERT(oldoff < (off_t)(size + off));
1.2 simonb 1139:
1.60 matt 1140: if ((oldoff == 0) && (delta != 0))
1141: newoff = off + delta;
1142: else if ((oldoff + delta) < (size + off))
1143: newoff = oldoff + delta;
1.2 simonb 1144: else
1.60 matt 1145: newoff = (oldoff + delta) - size;
1.2 simonb 1146:
1147: /* Note some interesting axioms */
1.60 matt 1148: KASSERT((delta != 0) || (newoff == oldoff));
1149: KASSERT((delta == 0) || (newoff != 0));
1150: KASSERT((delta != (size)) || (newoff == oldoff));
1.2 simonb 1151:
1152: /* Define acceptable ranges for output. */
1.60 matt 1153: KASSERT((newoff == 0) || ((size_t)newoff >= off));
1154: KASSERT((size_t)newoff < (size + off));
1155: return newoff;
1.2 simonb 1156: }
1157:
1.30 uebayasi 1158: static inline size_t
1.2 simonb 1159: wapbl_space_free(size_t avail, off_t head, off_t tail)
1160: {
1161:
1162: return avail - wapbl_space_used(avail, head, tail);
1163: }
1164:
1.30 uebayasi 1165: static inline void
1.2 simonb 1166: wapbl_advance_head(size_t size, size_t off, size_t delta, off_t *headp,
1167: off_t *tailp)
1168: {
1169: off_t head = *headp;
1170: off_t tail = *tailp;
1171:
1172: KASSERT(delta <= wapbl_space_free(size, head, tail));
1173: head = wapbl_advance(size, off, head, delta);
1174: if ((tail == 0) && (head != 0))
1175: tail = off;
1176: *headp = head;
1177: *tailp = tail;
1178: }
1179:
1.30 uebayasi 1180: static inline void
1.2 simonb 1181: wapbl_advance_tail(size_t size, size_t off, size_t delta, off_t *headp,
1182: off_t *tailp)
1183: {
1184: off_t head = *headp;
1185: off_t tail = *tailp;
1186:
1187: KASSERT(delta <= wapbl_space_used(size, head, tail));
1188: tail = wapbl_advance(size, off, tail, delta);
1189: if (head == tail) {
1190: head = tail = 0;
1191: }
1192: *headp = head;
1193: *tailp = tail;
1194: }
1195:
1196:
1197: /****************************************************************/
1198:
1199: /*
1200: * Remove transactions whose buffers are completely flushed to disk.
1201: * Will block until at least minfree space is available.
1202: * only intended to be called from inside wapbl_flush and therefore
1203: * does not protect against commit races with itself or with flush.
1204: */
1205: static int
1206: wapbl_truncate(struct wapbl *wl, size_t minfree, int waitonly)
1207: {
1208: size_t delta;
1209: size_t avail;
1210: off_t head;
1211: off_t tail;
1212: int error = 0;
1213:
1214: KASSERT(minfree <= (wl->wl_circ_size - wl->wl_reserved_bytes));
1215: KASSERT(rw_write_held(&wl->wl_rwlock));
1216:
1217: mutex_enter(&wl->wl_mtx);
1218:
1219: /*
1220: * First check to see if we have to do a commit
1221: * at all.
1222: */
1223: avail = wapbl_space_free(wl->wl_circ_size, wl->wl_head, wl->wl_tail);
1224: if (minfree < avail) {
1225: mutex_exit(&wl->wl_mtx);
1226: return 0;
1227: }
1228: minfree -= avail;
1229: while ((wl->wl_error_count == 0) &&
1230: (wl->wl_reclaimable_bytes < minfree)) {
1231: WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
1232: ("wapbl_truncate: sleeping on %p wl=%p bytes=%zd "
1233: "minfree=%zd\n",
1234: &wl->wl_reclaimable_bytes, wl, wl->wl_reclaimable_bytes,
1235: minfree));
1236:
1237: cv_wait(&wl->wl_reclaimable_cv, &wl->wl_mtx);
1238: }
1239: if (wl->wl_reclaimable_bytes < minfree) {
1240: KASSERT(wl->wl_error_count);
1241: /* XXX maybe get actual error from buffer instead someday? */
1242: error = EIO;
1243: }
1244: head = wl->wl_head;
1245: tail = wl->wl_tail;
1246: delta = wl->wl_reclaimable_bytes;
1247:
1248: /* If all of of the entries are flushed, then be sure to keep
1249: * the reserved bytes reserved. Watch out for discarded transactions,
1250: * which could leave more bytes reserved than are reclaimable.
1251: */
1252: if (SIMPLEQ_EMPTY(&wl->wl_entries) &&
1253: (delta >= wl->wl_reserved_bytes)) {
1254: delta -= wl->wl_reserved_bytes;
1255: }
1256: wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta, &head,
1257: &tail);
1258: KDASSERT(wl->wl_reserved_bytes <=
1259: wapbl_space_used(wl->wl_circ_size, head, tail));
1260: mutex_exit(&wl->wl_mtx);
1261:
1262: if (error)
1263: return error;
1264:
1265: if (waitonly)
1266: return 0;
1267:
1268: /*
1269: * This is where head, tail and delta are unprotected
1270: * from races against itself or flush. This is ok since
1271: * we only call this routine from inside flush itself.
1272: *
1273: * XXX: how can it race against itself when accessed only
1274: * from behind the write-locked rwlock?
1275: */
1276: error = wapbl_write_commit(wl, head, tail);
1277: if (error)
1278: return error;
1279:
1280: wl->wl_head = head;
1281: wl->wl_tail = tail;
1282:
1283: mutex_enter(&wl->wl_mtx);
1284: KASSERT(wl->wl_reclaimable_bytes >= delta);
1285: wl->wl_reclaimable_bytes -= delta;
1286: mutex_exit(&wl->wl_mtx);
1287: WAPBL_PRINTF(WAPBL_PRINT_TRUNCATE,
1288: ("wapbl_truncate thread %d.%d truncating %zu bytes\n",
1289: curproc->p_pid, curlwp->l_lid, delta));
1290:
1291: return 0;
1292: }
1293:
1294: /****************************************************************/
1295:
1296: void
1297: wapbl_biodone(struct buf *bp)
1298: {
1299: struct wapbl_entry *we = bp->b_private;
1300: struct wapbl *wl = we->we_wapbl;
1.53 hannken 1301: #ifdef WAPBL_DEBUG_BUFBYTES
1302: const int bufsize = bp->b_bufsize;
1303: #endif
1.2 simonb 1304:
1305: /*
1306: * Handle possible flushing of buffers after log has been
1307: * decomissioned.
1308: */
1309: if (!wl) {
1310: KASSERT(we->we_bufcount > 0);
1311: we->we_bufcount--;
1312: #ifdef WAPBL_DEBUG_BUFBYTES
1.53 hannken 1313: KASSERT(we->we_unsynced_bufbytes >= bufsize);
1314: we->we_unsynced_bufbytes -= bufsize;
1.2 simonb 1315: #endif
1316:
1317: if (we->we_bufcount == 0) {
1318: #ifdef WAPBL_DEBUG_BUFBYTES
1319: KASSERT(we->we_unsynced_bufbytes == 0);
1320: #endif
1.51 para 1321: pool_put(&wapbl_entry_pool, we);
1.2 simonb 1322: }
1323:
1324: brelse(bp, 0);
1325: return;
1326: }
1327:
1328: #ifdef ohbother
1.44 uebayasi 1329: KDASSERT(bp->b_oflags & BO_DONE);
1330: KDASSERT(!(bp->b_oflags & BO_DELWRI));
1.2 simonb 1331: KDASSERT(bp->b_flags & B_ASYNC);
1.44 uebayasi 1332: KDASSERT(bp->b_cflags & BC_BUSY);
1.2 simonb 1333: KDASSERT(!(bp->b_flags & B_LOCKED));
1334: KDASSERT(!(bp->b_flags & B_READ));
1.44 uebayasi 1335: KDASSERT(!(bp->b_cflags & BC_INVAL));
1336: KDASSERT(!(bp->b_cflags & BC_NOCACHE));
1.2 simonb 1337: #endif
1338:
1339: if (bp->b_error) {
1340: #ifdef notyet /* Can't currently handle possible dirty buffer reuse */
1.26 apb 1341: /*
1342: * XXXpooka: interfaces not fully updated
1343: * Note: this was not enabled in the original patch
1344: * against netbsd4 either. I don't know if comment
1345: * above is true or not.
1346: */
1.2 simonb 1347:
1348: /*
1349: * If an error occurs, report the error and leave the
1350: * buffer as a delayed write on the LRU queue.
1351: * restarting the write would likely result in
1352: * an error spinloop, so let it be done harmlessly
1353: * by the syncer.
1354: */
1355: bp->b_flags &= ~(B_DONE);
1356: simple_unlock(&bp->b_interlock);
1357:
1358: if (we->we_error == 0) {
1359: mutex_enter(&wl->wl_mtx);
1360: wl->wl_error_count++;
1361: mutex_exit(&wl->wl_mtx);
1362: cv_broadcast(&wl->wl_reclaimable_cv);
1363: }
1364: we->we_error = bp->b_error;
1365: bp->b_error = 0;
1366: brelse(bp);
1367: return;
1368: #else
1369: /* For now, just mark the log permanently errored out */
1370:
1371: mutex_enter(&wl->wl_mtx);
1372: if (wl->wl_error_count == 0) {
1373: wl->wl_error_count++;
1374: cv_broadcast(&wl->wl_reclaimable_cv);
1375: }
1376: mutex_exit(&wl->wl_mtx);
1377: #endif
1378: }
1379:
1.53 hannken 1380: /*
1381: * Release the buffer here. wapbl_flush() may wait for the
1382: * log to become empty and we better unbusy the buffer before
1383: * wapbl_flush() returns.
1384: */
1385: brelse(bp, 0);
1386:
1.2 simonb 1387: mutex_enter(&wl->wl_mtx);
1388:
1389: KASSERT(we->we_bufcount > 0);
1390: we->we_bufcount--;
1391: #ifdef WAPBL_DEBUG_BUFBYTES
1.53 hannken 1392: KASSERT(we->we_unsynced_bufbytes >= bufsize);
1393: we->we_unsynced_bufbytes -= bufsize;
1394: KASSERT(wl->wl_unsynced_bufbytes >= bufsize);
1395: wl->wl_unsynced_bufbytes -= bufsize;
1.2 simonb 1396: #endif
1397:
1398: /*
1399: * If the current transaction can be reclaimed, start
1400: * at the beginning and reclaim any consecutive reclaimable
1401: * transactions. If we successfully reclaim anything,
1402: * then wakeup anyone waiting for the reclaim.
1403: */
1404: if (we->we_bufcount == 0) {
1405: size_t delta = 0;
1406: int errcnt = 0;
1407: #ifdef WAPBL_DEBUG_BUFBYTES
1408: KDASSERT(we->we_unsynced_bufbytes == 0);
1409: #endif
1410: /*
1411: * clear any posted error, since the buffer it came from
1412: * has successfully flushed by now
1413: */
1414: while ((we = SIMPLEQ_FIRST(&wl->wl_entries)) &&
1415: (we->we_bufcount == 0)) {
1416: delta += we->we_reclaimable_bytes;
1417: if (we->we_error)
1418: errcnt++;
1419: SIMPLEQ_REMOVE_HEAD(&wl->wl_entries, we_entries);
1.51 para 1420: pool_put(&wapbl_entry_pool, we);
1.2 simonb 1421: }
1422:
1423: if (delta) {
1424: wl->wl_reclaimable_bytes += delta;
1425: KASSERT(wl->wl_error_count >= errcnt);
1426: wl->wl_error_count -= errcnt;
1427: cv_broadcast(&wl->wl_reclaimable_cv);
1428: }
1429: }
1430:
1431: mutex_exit(&wl->wl_mtx);
1432: }
1433:
1434: /*
1435: * Write transactions to disk + start I/O for contents
1436: */
1437: int
1438: wapbl_flush(struct wapbl *wl, int waitfor)
1439: {
1440: struct buf *bp;
1441: struct wapbl_entry *we;
1442: off_t off;
1443: off_t head;
1444: off_t tail;
1445: size_t delta = 0;
1446: size_t flushsize;
1447: size_t reserved;
1448: int error = 0;
1449:
1450: /*
1451: * Do a quick check to see if a full flush can be skipped
1452: * This assumes that the flush callback does not need to be called
1453: * unless there are other outstanding bufs.
1454: */
1455: if (!waitfor) {
1456: size_t nbufs;
1457: mutex_enter(&wl->wl_mtx); /* XXX need mutex here to
1458: protect the KASSERTS */
1459: nbufs = wl->wl_bufcount;
1460: KASSERT((wl->wl_bufcount == 0) == (wl->wl_bufbytes == 0));
1461: KASSERT((wl->wl_bufcount == 0) == (wl->wl_bcount == 0));
1462: mutex_exit(&wl->wl_mtx);
1463: if (nbufs == 0)
1464: return 0;
1465: }
1466:
1467: /*
1468: * XXX we may consider using LK_UPGRADE here
1469: * if we want to call flush from inside a transaction
1470: */
1471: rw_enter(&wl->wl_rwlock, RW_WRITER);
1472: wl->wl_flush(wl->wl_mount, wl->wl_deallocblks, wl->wl_dealloclens,
1473: wl->wl_dealloccnt);
1474:
1475: /*
1476: * Now that we are fully locked and flushed,
1477: * do another check for nothing to do.
1478: */
1479: if (wl->wl_bufcount == 0) {
1480: goto out;
1481: }
1482:
1483: #if 0
1484: WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1485: ("wapbl_flush thread %d.%d flushing entries with "
1486: "bufcount=%zu bufbytes=%zu\n",
1487: curproc->p_pid, curlwp->l_lid, wl->wl_bufcount,
1488: wl->wl_bufbytes));
1489: #endif
1490:
1491: /* Calculate amount of space needed to flush */
1492: flushsize = wapbl_transaction_len(wl);
1.39 christos 1493: if (wapbl_verbose_commit) {
1494: struct timespec ts;
1495: getnanotime(&ts);
1.43 nakayama 1496: printf("%s: %lld.%09ld this transaction = %zu bytes\n",
1.39 christos 1497: __func__, (long long)ts.tv_sec,
1498: (long)ts.tv_nsec, flushsize);
1499: }
1.2 simonb 1500:
1501: if (flushsize > (wl->wl_circ_size - wl->wl_reserved_bytes)) {
1502: /*
1503: * XXX this could be handled more gracefully, perhaps place
1504: * only a partial transaction in the log and allow the
1505: * remaining to flush without the protection of the journal.
1506: */
1507: panic("wapbl_flush: current transaction too big to flush\n");
1508: }
1509:
1510: error = wapbl_truncate(wl, flushsize, 0);
1511: if (error)
1512: goto out2;
1513:
1514: off = wl->wl_head;
1515: KASSERT((off == 0) || ((off >= wl->wl_circ_off) &&
1516: (off < wl->wl_circ_off + wl->wl_circ_size)));
1517: error = wapbl_write_blocks(wl, &off);
1518: if (error)
1519: goto out2;
1520: error = wapbl_write_revocations(wl, &off);
1521: if (error)
1522: goto out2;
1523: error = wapbl_write_inodes(wl, &off);
1524: if (error)
1525: goto out2;
1526:
1527: reserved = 0;
1528: if (wl->wl_inohashcnt)
1529: reserved = wapbl_transaction_inodes_len(wl);
1530:
1531: head = wl->wl_head;
1532: tail = wl->wl_tail;
1533:
1534: wapbl_advance_head(wl->wl_circ_size, wl->wl_circ_off, flushsize,
1535: &head, &tail);
1536: #ifdef WAPBL_DEBUG
1537: if (head != off) {
1538: panic("lost head! head=%"PRIdMAX" tail=%" PRIdMAX
1539: " off=%"PRIdMAX" flush=%zu\n",
1540: (intmax_t)head, (intmax_t)tail, (intmax_t)off,
1541: flushsize);
1542: }
1543: #else
1544: KASSERT(head == off);
1545: #endif
1546:
1547: /* Opportunistically move the tail forward if we can */
1548: if (!wapbl_lazy_truncate) {
1549: mutex_enter(&wl->wl_mtx);
1550: delta = wl->wl_reclaimable_bytes;
1551: mutex_exit(&wl->wl_mtx);
1552: wapbl_advance_tail(wl->wl_circ_size, wl->wl_circ_off, delta,
1553: &head, &tail);
1554: }
1555:
1556: error = wapbl_write_commit(wl, head, tail);
1557: if (error)
1558: goto out2;
1559:
1.51 para 1560: we = pool_get(&wapbl_entry_pool, PR_WAITOK);
1.2 simonb 1561:
1562: #ifdef WAPBL_DEBUG_BUFBYTES
1563: WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1564: ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
1565: " unsynced=%zu"
1566: "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
1567: "inodes=%d\n",
1568: curproc->p_pid, curlwp->l_lid, flushsize, delta,
1569: wapbl_space_used(wl->wl_circ_size, head, tail),
1570: wl->wl_unsynced_bufbytes, wl->wl_bufcount,
1571: wl->wl_bufbytes, wl->wl_bcount, wl->wl_dealloccnt,
1572: wl->wl_inohashcnt));
1573: #else
1574: WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1575: ("wapbl_flush: thread %d.%d head+=%zu tail+=%zu used=%zu"
1576: "\n\tbufcount=%zu bufbytes=%zu bcount=%zu deallocs=%d "
1577: "inodes=%d\n",
1578: curproc->p_pid, curlwp->l_lid, flushsize, delta,
1579: wapbl_space_used(wl->wl_circ_size, head, tail),
1580: wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
1581: wl->wl_dealloccnt, wl->wl_inohashcnt));
1582: #endif
1583:
1584:
1585: mutex_enter(&bufcache_lock);
1586: mutex_enter(&wl->wl_mtx);
1587:
1588: wl->wl_reserved_bytes = reserved;
1589: wl->wl_head = head;
1590: wl->wl_tail = tail;
1591: KASSERT(wl->wl_reclaimable_bytes >= delta);
1592: wl->wl_reclaimable_bytes -= delta;
1593: wl->wl_dealloccnt = 0;
1594: #ifdef WAPBL_DEBUG_BUFBYTES
1595: wl->wl_unsynced_bufbytes += wl->wl_bufbytes;
1596: #endif
1597:
1598: we->we_wapbl = wl;
1599: we->we_bufcount = wl->wl_bufcount;
1600: #ifdef WAPBL_DEBUG_BUFBYTES
1601: we->we_unsynced_bufbytes = wl->wl_bufbytes;
1602: #endif
1603: we->we_reclaimable_bytes = flushsize;
1604: we->we_error = 0;
1605: SIMPLEQ_INSERT_TAIL(&wl->wl_entries, we, we_entries);
1606:
1607: /*
1608: * this flushes bufs in reverse order than they were queued
1609: * it shouldn't matter, but if we care we could use TAILQ instead.
1610: * XXX Note they will get put on the lru queue when they flush
1611: * so we might actually want to change this to preserve order.
1612: */
1613: while ((bp = LIST_FIRST(&wl->wl_bufs)) != NULL) {
1614: if (bbusy(bp, 0, 0, &wl->wl_mtx)) {
1615: continue;
1616: }
1617: bp->b_iodone = wapbl_biodone;
1618: bp->b_private = we;
1619: bremfree(bp);
1620: wapbl_remove_buf_locked(wl, bp);
1621: mutex_exit(&wl->wl_mtx);
1622: mutex_exit(&bufcache_lock);
1623: bawrite(bp);
1624: mutex_enter(&bufcache_lock);
1625: mutex_enter(&wl->wl_mtx);
1626: }
1627: mutex_exit(&wl->wl_mtx);
1628: mutex_exit(&bufcache_lock);
1629:
1630: #if 0
1631: WAPBL_PRINTF(WAPBL_PRINT_FLUSH,
1632: ("wapbl_flush thread %d.%d done flushing entries...\n",
1633: curproc->p_pid, curlwp->l_lid));
1634: #endif
1635:
1636: out:
1637:
1638: /*
1639: * If the waitfor flag is set, don't return until everything is
1640: * fully flushed and the on disk log is empty.
1641: */
1642: if (waitfor) {
1643: error = wapbl_truncate(wl, wl->wl_circ_size -
1644: wl->wl_reserved_bytes, wapbl_lazy_truncate);
1645: }
1646:
1647: out2:
1648: if (error) {
1649: wl->wl_flush_abort(wl->wl_mount, wl->wl_deallocblks,
1650: wl->wl_dealloclens, wl->wl_dealloccnt);
1651: }
1652:
1653: #ifdef WAPBL_DEBUG_PRINT
1654: if (error) {
1655: pid_t pid = -1;
1656: lwpid_t lid = -1;
1657: if (curproc)
1658: pid = curproc->p_pid;
1659: if (curlwp)
1660: lid = curlwp->l_lid;
1661: mutex_enter(&wl->wl_mtx);
1662: #ifdef WAPBL_DEBUG_BUFBYTES
1663: WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1664: ("wapbl_flush: thread %d.%d aborted flush: "
1665: "error = %d\n"
1666: "\tbufcount=%zu bufbytes=%zu bcount=%zu "
1667: "deallocs=%d inodes=%d\n"
1668: "\terrcnt = %d, reclaimable=%zu reserved=%zu "
1669: "unsynced=%zu\n",
1670: pid, lid, error, wl->wl_bufcount,
1671: wl->wl_bufbytes, wl->wl_bcount,
1672: wl->wl_dealloccnt, wl->wl_inohashcnt,
1673: wl->wl_error_count, wl->wl_reclaimable_bytes,
1674: wl->wl_reserved_bytes, wl->wl_unsynced_bufbytes));
1675: SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1676: WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1677: ("\tentry: bufcount = %zu, reclaimable = %zu, "
1678: "error = %d, unsynced = %zu\n",
1679: we->we_bufcount, we->we_reclaimable_bytes,
1680: we->we_error, we->we_unsynced_bufbytes));
1681: }
1682: #else
1683: WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1684: ("wapbl_flush: thread %d.%d aborted flush: "
1685: "error = %d\n"
1686: "\tbufcount=%zu bufbytes=%zu bcount=%zu "
1687: "deallocs=%d inodes=%d\n"
1688: "\terrcnt = %d, reclaimable=%zu reserved=%zu\n",
1689: pid, lid, error, wl->wl_bufcount,
1690: wl->wl_bufbytes, wl->wl_bcount,
1691: wl->wl_dealloccnt, wl->wl_inohashcnt,
1692: wl->wl_error_count, wl->wl_reclaimable_bytes,
1693: wl->wl_reserved_bytes));
1694: SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1695: WAPBL_PRINTF(WAPBL_PRINT_ERROR,
1696: ("\tentry: bufcount = %zu, reclaimable = %zu, "
1697: "error = %d\n", we->we_bufcount,
1698: we->we_reclaimable_bytes, we->we_error));
1699: }
1700: #endif
1701: mutex_exit(&wl->wl_mtx);
1702: }
1703: #endif
1704:
1705: rw_exit(&wl->wl_rwlock);
1706: return error;
1707: }
1708:
1709: /****************************************************************/
1710:
1711: void
1712: wapbl_jlock_assert(struct wapbl *wl)
1713: {
1714:
1.23 ad 1715: KASSERT(rw_lock_held(&wl->wl_rwlock));
1.2 simonb 1716: }
1717:
1718: void
1719: wapbl_junlock_assert(struct wapbl *wl)
1720: {
1721:
1722: KASSERT(!rw_write_held(&wl->wl_rwlock));
1723: }
1724:
1725: /****************************************************************/
1726:
1727: /* locks missing */
1728: void
1729: wapbl_print(struct wapbl *wl,
1730: int full,
1731: void (*pr)(const char *, ...))
1732: {
1733: struct buf *bp;
1734: struct wapbl_entry *we;
1735: (*pr)("wapbl %p", wl);
1736: (*pr)("\nlogvp = %p, devvp = %p, logpbn = %"PRId64"\n",
1737: wl->wl_logvp, wl->wl_devvp, wl->wl_logpbn);
1738: (*pr)("circ = %zu, header = %zu, head = %"PRIdMAX" tail = %"PRIdMAX"\n",
1739: wl->wl_circ_size, wl->wl_circ_off,
1740: (intmax_t)wl->wl_head, (intmax_t)wl->wl_tail);
1741: (*pr)("fs_dev_bshift = %d, log_dev_bshift = %d\n",
1742: wl->wl_log_dev_bshift, wl->wl_fs_dev_bshift);
1743: #ifdef WAPBL_DEBUG_BUFBYTES
1744: (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
1745: "reserved = %zu errcnt = %d unsynced = %zu\n",
1746: wl->wl_bufcount, wl->wl_bufbytes, wl->wl_bcount,
1747: wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
1748: wl->wl_error_count, wl->wl_unsynced_bufbytes);
1749: #else
1750: (*pr)("bufcount = %zu, bufbytes = %zu bcount = %zu reclaimable = %zu "
1751: "reserved = %zu errcnt = %d\n", wl->wl_bufcount, wl->wl_bufbytes,
1752: wl->wl_bcount, wl->wl_reclaimable_bytes, wl->wl_reserved_bytes,
1753: wl->wl_error_count);
1754: #endif
1755: (*pr)("\tdealloccnt = %d, dealloclim = %d\n",
1756: wl->wl_dealloccnt, wl->wl_dealloclim);
1757: (*pr)("\tinohashcnt = %d, inohashmask = 0x%08x\n",
1758: wl->wl_inohashcnt, wl->wl_inohashmask);
1759: (*pr)("entries:\n");
1760: SIMPLEQ_FOREACH(we, &wl->wl_entries, we_entries) {
1761: #ifdef WAPBL_DEBUG_BUFBYTES
1762: (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d, "
1763: "unsynced = %zu\n",
1764: we->we_bufcount, we->we_reclaimable_bytes,
1765: we->we_error, we->we_unsynced_bufbytes);
1766: #else
1767: (*pr)("\tbufcount = %zu, reclaimable = %zu, error = %d\n",
1768: we->we_bufcount, we->we_reclaimable_bytes, we->we_error);
1769: #endif
1770: }
1771: if (full) {
1772: int cnt = 0;
1773: (*pr)("bufs =");
1774: LIST_FOREACH(bp, &wl->wl_bufs, b_wapbllist) {
1775: if (!LIST_NEXT(bp, b_wapbllist)) {
1776: (*pr)(" %p", bp);
1777: } else if ((++cnt % 6) == 0) {
1778: (*pr)(" %p,\n\t", bp);
1779: } else {
1780: (*pr)(" %p,", bp);
1781: }
1782: }
1783: (*pr)("\n");
1784:
1785: (*pr)("dealloced blks = ");
1786: {
1787: int i;
1788: cnt = 0;
1789: for (i = 0; i < wl->wl_dealloccnt; i++) {
1790: (*pr)(" %"PRId64":%d,",
1791: wl->wl_deallocblks[i],
1792: wl->wl_dealloclens[i]);
1793: if ((++cnt % 4) == 0) {
1794: (*pr)("\n\t");
1795: }
1796: }
1797: }
1798: (*pr)("\n");
1799:
1800: (*pr)("registered inodes = ");
1801: {
1802: int i;
1803: cnt = 0;
1804: for (i = 0; i <= wl->wl_inohashmask; i++) {
1805: struct wapbl_ino_head *wih;
1806: struct wapbl_ino *wi;
1807:
1808: wih = &wl->wl_inohash[i];
1809: LIST_FOREACH(wi, wih, wi_hash) {
1810: if (wi->wi_ino == 0)
1811: continue;
1.55 christos 1812: (*pr)(" %"PRIu64"/0%06"PRIo32",",
1.2 simonb 1813: wi->wi_ino, wi->wi_mode);
1814: if ((++cnt % 4) == 0) {
1815: (*pr)("\n\t");
1816: }
1817: }
1818: }
1819: (*pr)("\n");
1820: }
1821: }
1822: }
1823:
1824: #if defined(WAPBL_DEBUG) || defined(DDB)
1825: void
1826: wapbl_dump(struct wapbl *wl)
1827: {
1828: #if defined(WAPBL_DEBUG)
1829: if (!wl)
1830: wl = wapbl_debug_wl;
1831: #endif
1832: if (!wl)
1833: return;
1834: wapbl_print(wl, 1, printf);
1835: }
1836: #endif
1837:
1838: /****************************************************************/
1839:
1840: void
1841: wapbl_register_deallocation(struct wapbl *wl, daddr_t blk, int len)
1842: {
1843:
1844: wapbl_jlock_assert(wl);
1845:
1.38 hannken 1846: mutex_enter(&wl->wl_mtx);
1.2 simonb 1847: /* XXX should eventually instead tie this into resource estimation */
1.27 pooka 1848: /*
1849: * XXX this panic needs locking/mutex analysis and the
1850: * ability to cope with the failure.
1851: */
1852: /* XXX this XXX doesn't have enough XXX */
1853: if (__predict_false(wl->wl_dealloccnt >= wl->wl_dealloclim))
1854: panic("wapbl_register_deallocation: out of resources");
1855:
1.2 simonb 1856: wl->wl_deallocblks[wl->wl_dealloccnt] = blk;
1857: wl->wl_dealloclens[wl->wl_dealloccnt] = len;
1858: wl->wl_dealloccnt++;
1859: WAPBL_PRINTF(WAPBL_PRINT_ALLOC,
1860: ("wapbl_register_deallocation: blk=%"PRId64" len=%d\n", blk, len));
1.38 hannken 1861: mutex_exit(&wl->wl_mtx);
1.2 simonb 1862: }
1863:
1864: /****************************************************************/
1865:
1866: static void
1867: wapbl_inodetrk_init(struct wapbl *wl, u_int size)
1868: {
1869:
1870: wl->wl_inohash = hashinit(size, HASH_LIST, true, &wl->wl_inohashmask);
1871: if (atomic_inc_uint_nv(&wapbl_ino_pool_refcount) == 1) {
1872: pool_init(&wapbl_ino_pool, sizeof(struct wapbl_ino), 0, 0, 0,
1873: "wapblinopl", &pool_allocator_nointr, IPL_NONE);
1874: }
1875: }
1876:
1877: static void
1878: wapbl_inodetrk_free(struct wapbl *wl)
1879: {
1880:
1881: /* XXX this KASSERT needs locking/mutex analysis */
1882: KASSERT(wl->wl_inohashcnt == 0);
1883: hashdone(wl->wl_inohash, HASH_LIST, wl->wl_inohashmask);
1884: if (atomic_dec_uint_nv(&wapbl_ino_pool_refcount) == 0) {
1885: pool_destroy(&wapbl_ino_pool);
1886: }
1887: }
1888:
1889: static struct wapbl_ino *
1890: wapbl_inodetrk_get(struct wapbl *wl, ino_t ino)
1891: {
1892: struct wapbl_ino_head *wih;
1893: struct wapbl_ino *wi;
1894:
1895: KASSERT(mutex_owned(&wl->wl_mtx));
1896:
1897: wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
1898: LIST_FOREACH(wi, wih, wi_hash) {
1899: if (ino == wi->wi_ino)
1900: return wi;
1901: }
1902: return 0;
1903: }
1904:
1905: void
1906: wapbl_register_inode(struct wapbl *wl, ino_t ino, mode_t mode)
1907: {
1908: struct wapbl_ino_head *wih;
1909: struct wapbl_ino *wi;
1910:
1911: wi = pool_get(&wapbl_ino_pool, PR_WAITOK);
1912:
1913: mutex_enter(&wl->wl_mtx);
1914: if (wapbl_inodetrk_get(wl, ino) == NULL) {
1915: wi->wi_ino = ino;
1916: wi->wi_mode = mode;
1917: wih = &wl->wl_inohash[ino & wl->wl_inohashmask];
1918: LIST_INSERT_HEAD(wih, wi, wi_hash);
1919: wl->wl_inohashcnt++;
1920: WAPBL_PRINTF(WAPBL_PRINT_INODE,
1921: ("wapbl_register_inode: ino=%"PRId64"\n", ino));
1922: mutex_exit(&wl->wl_mtx);
1923: } else {
1924: mutex_exit(&wl->wl_mtx);
1925: pool_put(&wapbl_ino_pool, wi);
1926: }
1927: }
1928:
1929: void
1930: wapbl_unregister_inode(struct wapbl *wl, ino_t ino, mode_t mode)
1931: {
1932: struct wapbl_ino *wi;
1933:
1934: mutex_enter(&wl->wl_mtx);
1935: wi = wapbl_inodetrk_get(wl, ino);
1936: if (wi) {
1937: WAPBL_PRINTF(WAPBL_PRINT_INODE,
1938: ("wapbl_unregister_inode: ino=%"PRId64"\n", ino));
1939: KASSERT(wl->wl_inohashcnt > 0);
1940: wl->wl_inohashcnt--;
1941: LIST_REMOVE(wi, wi_hash);
1942: mutex_exit(&wl->wl_mtx);
1943:
1944: pool_put(&wapbl_ino_pool, wi);
1945: } else {
1946: mutex_exit(&wl->wl_mtx);
1947: }
1948: }
1949:
1950: /****************************************************************/
1951:
1.30 uebayasi 1952: static inline size_t
1.2 simonb 1953: wapbl_transaction_inodes_len(struct wapbl *wl)
1954: {
1955: int blocklen = 1<<wl->wl_log_dev_bshift;
1956: int iph;
1957:
1958: /* Calculate number of inodes described in a inodelist header */
1959: iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
1960: sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
1961:
1962: KASSERT(iph > 0);
1963:
1.39 christos 1964: return MAX(1, howmany(wl->wl_inohashcnt, iph)) * blocklen;
1.2 simonb 1965: }
1966:
1967:
1968: /* Calculate amount of space a transaction will take on disk */
1969: static size_t
1970: wapbl_transaction_len(struct wapbl *wl)
1971: {
1972: int blocklen = 1<<wl->wl_log_dev_bshift;
1973: size_t len;
1974: int bph;
1975:
1976: /* Calculate number of blocks described in a blocklist header */
1977: bph = (blocklen - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
1978: sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
1979:
1980: KASSERT(bph > 0);
1981:
1982: len = wl->wl_bcount;
1.39 christos 1983: len += howmany(wl->wl_bufcount, bph) * blocklen;
1984: len += howmany(wl->wl_dealloccnt, bph) * blocklen;
1.2 simonb 1985: len += wapbl_transaction_inodes_len(wl);
1986:
1987: return len;
1988: }
1989:
1990: /*
1.48 yamt 1991: * wapbl_cache_sync: issue DIOCCACHESYNC
1992: */
1993: static int
1994: wapbl_cache_sync(struct wapbl *wl, const char *msg)
1995: {
1996: const bool verbose = wapbl_verbose_commit >= 2;
1997: struct bintime start_time;
1998: int force = 1;
1999: int error;
2000:
2001: if (!wapbl_flush_disk_cache) {
2002: return 0;
2003: }
2004: if (verbose) {
2005: bintime(&start_time);
2006: }
2007: error = VOP_IOCTL(wl->wl_devvp, DIOCCACHESYNC, &force,
2008: FWRITE, FSCRED);
2009: if (error) {
2010: WAPBL_PRINTF(WAPBL_PRINT_ERROR,
2011: ("wapbl_cache_sync: DIOCCACHESYNC on dev 0x%x "
2012: "returned %d\n", wl->wl_devvp->v_rdev, error));
2013: }
2014: if (verbose) {
2015: struct bintime d;
2016: struct timespec ts;
2017:
2018: bintime(&d);
2019: bintime_sub(&d, &start_time);
2020: bintime2timespec(&d, &ts);
2021: printf("wapbl_cache_sync: %s: dev 0x%jx %ju.%09lu\n",
2022: msg, (uintmax_t)wl->wl_devvp->v_rdev,
2023: (uintmax_t)ts.tv_sec, ts.tv_nsec);
2024: }
2025: return error;
2026: }
2027:
2028: /*
1.2 simonb 2029: * Perform commit operation
2030: *
2031: * Note that generation number incrementation needs to
2032: * be protected against racing with other invocations
1.48 yamt 2033: * of wapbl_write_commit. This is ok since this routine
1.2 simonb 2034: * is only invoked from wapbl_flush
2035: */
2036: static int
2037: wapbl_write_commit(struct wapbl *wl, off_t head, off_t tail)
2038: {
2039: struct wapbl_wc_header *wc = wl->wl_wc_header;
2040: struct timespec ts;
2041: int error;
1.34 mlelstv 2042: daddr_t pbn;
1.2 simonb 2043:
1.54 hannken 2044: error = wapbl_buffered_flush(wl);
2045: if (error)
2046: return error;
1.49 yamt 2047: /*
2048: * flush disk cache to ensure that blocks we've written are actually
2049: * written to the stable storage before the commit header.
2050: *
2051: * XXX Calc checksum here, instead we do this for now
2052: */
1.48 yamt 2053: wapbl_cache_sync(wl, "1");
1.2 simonb 2054:
2055: wc->wc_head = head;
2056: wc->wc_tail = tail;
2057: wc->wc_checksum = 0;
2058: wc->wc_version = 1;
2059: getnanotime(&ts);
1.17 yamt 2060: wc->wc_time = ts.tv_sec;
1.2 simonb 2061: wc->wc_timensec = ts.tv_nsec;
2062:
2063: WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2064: ("wapbl_write_commit: head = %"PRIdMAX "tail = %"PRIdMAX"\n",
2065: (intmax_t)head, (intmax_t)tail));
2066:
2067: /*
1.49 yamt 2068: * write the commit header.
2069: *
1.2 simonb 2070: * XXX if generation will rollover, then first zero
2071: * over second commit header before trying to write both headers.
2072: */
2073:
1.34 mlelstv 2074: pbn = wl->wl_logpbn + (wc->wc_generation % 2);
2075: #ifdef _KERNEL
2076: pbn = btodb(pbn << wc->wc_log_dev_bshift);
2077: #endif
1.54 hannken 2078: error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
2079: if (error)
2080: return error;
2081: error = wapbl_buffered_flush(wl);
1.2 simonb 2082: if (error)
2083: return error;
2084:
1.49 yamt 2085: /*
2086: * flush disk cache to ensure that the commit header is actually
2087: * written before meta data blocks.
2088: */
1.48 yamt 2089: wapbl_cache_sync(wl, "2");
1.2 simonb 2090:
2091: /*
2092: * If the generation number was zero, write it out a second time.
2093: * This handles initialization and generation number rollover
2094: */
2095: if (wc->wc_generation++ == 0) {
2096: error = wapbl_write_commit(wl, head, tail);
2097: /*
2098: * This panic should be able to be removed if we do the
2099: * zero'ing mentioned above, and we are certain to roll
2100: * back generation number on failure.
2101: */
2102: if (error)
2103: panic("wapbl_write_commit: error writing duplicate "
2104: "log header: %d\n", error);
2105: }
2106: return 0;
2107: }
2108:
2109: /* Returns new offset value */
2110: static int
2111: wapbl_write_blocks(struct wapbl *wl, off_t *offp)
2112: {
2113: struct wapbl_wc_blocklist *wc =
2114: (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
2115: int blocklen = 1<<wl->wl_log_dev_bshift;
2116: int bph;
2117: struct buf *bp;
2118: off_t off = *offp;
2119: int error;
1.7 joerg 2120: size_t padding;
1.2 simonb 2121:
2122: KASSERT(rw_write_held(&wl->wl_rwlock));
2123:
2124: bph = (blocklen - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
2125: sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
2126:
2127: bp = LIST_FIRST(&wl->wl_bufs);
2128:
2129: while (bp) {
2130: int cnt;
2131: struct buf *obp = bp;
2132:
2133: KASSERT(bp->b_flags & B_LOCKED);
2134:
2135: wc->wc_type = WAPBL_WC_BLOCKS;
2136: wc->wc_len = blocklen;
2137: wc->wc_blkcount = 0;
2138: while (bp && (wc->wc_blkcount < bph)) {
2139: /*
2140: * Make sure all the physical block numbers are up to
2141: * date. If this is not always true on a given
2142: * filesystem, then VOP_BMAP must be called. We
2143: * could call VOP_BMAP here, or else in the filesystem
2144: * specific flush callback, although neither of those
2145: * solutions allow us to take the vnode lock. If a
2146: * filesystem requires that we must take the vnode lock
2147: * to call VOP_BMAP, then we can probably do it in
2148: * bwrite when the vnode lock should already be held
2149: * by the invoking code.
2150: */
2151: KASSERT((bp->b_vp->v_type == VBLK) ||
2152: (bp->b_blkno != bp->b_lblkno));
2153: KASSERT(bp->b_blkno > 0);
2154:
2155: wc->wc_blocks[wc->wc_blkcount].wc_daddr = bp->b_blkno;
2156: wc->wc_blocks[wc->wc_blkcount].wc_dlen = bp->b_bcount;
2157: wc->wc_len += bp->b_bcount;
2158: wc->wc_blkcount++;
2159: bp = LIST_NEXT(bp, b_wapbllist);
2160: }
1.7 joerg 2161: if (wc->wc_len % blocklen != 0) {
2162: padding = blocklen - wc->wc_len % blocklen;
2163: wc->wc_len += padding;
2164: } else {
2165: padding = 0;
2166: }
2167:
1.2 simonb 2168: WAPBL_PRINTF(WAPBL_PRINT_WRITE,
1.7 joerg 2169: ("wapbl_write_blocks: len = %u (padding %zu) off = %"PRIdMAX"\n",
2170: wc->wc_len, padding, (intmax_t)off));
1.2 simonb 2171:
2172: error = wapbl_circ_write(wl, wc, blocklen, &off);
2173: if (error)
2174: return error;
2175: bp = obp;
2176: cnt = 0;
2177: while (bp && (cnt++ < bph)) {
2178: error = wapbl_circ_write(wl, bp->b_data,
2179: bp->b_bcount, &off);
2180: if (error)
2181: return error;
2182: bp = LIST_NEXT(bp, b_wapbllist);
2183: }
1.7 joerg 2184: if (padding) {
2185: void *zero;
2186:
1.51 para 2187: zero = wapbl_alloc(padding);
1.7 joerg 2188: memset(zero, 0, padding);
2189: error = wapbl_circ_write(wl, zero, padding, &off);
1.18 yamt 2190: wapbl_free(zero, padding);
1.7 joerg 2191: if (error)
2192: return error;
2193: }
1.2 simonb 2194: }
2195: *offp = off;
2196: return 0;
2197: }
2198:
2199: static int
2200: wapbl_write_revocations(struct wapbl *wl, off_t *offp)
2201: {
2202: struct wapbl_wc_blocklist *wc =
2203: (struct wapbl_wc_blocklist *)wl->wl_wc_scratch;
2204: int i;
2205: int blocklen = 1<<wl->wl_log_dev_bshift;
2206: int bph;
2207: off_t off = *offp;
2208: int error;
2209:
2210: if (wl->wl_dealloccnt == 0)
2211: return 0;
2212:
2213: bph = (blocklen - offsetof(struct wapbl_wc_blocklist, wc_blocks)) /
2214: sizeof(((struct wapbl_wc_blocklist *)0)->wc_blocks[0]);
2215:
2216: i = 0;
2217: while (i < wl->wl_dealloccnt) {
2218: wc->wc_type = WAPBL_WC_REVOCATIONS;
2219: wc->wc_len = blocklen;
2220: wc->wc_blkcount = 0;
2221: while ((i < wl->wl_dealloccnt) && (wc->wc_blkcount < bph)) {
2222: wc->wc_blocks[wc->wc_blkcount].wc_daddr =
2223: wl->wl_deallocblks[i];
2224: wc->wc_blocks[wc->wc_blkcount].wc_dlen =
2225: wl->wl_dealloclens[i];
2226: wc->wc_blkcount++;
2227: i++;
2228: }
2229: WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2230: ("wapbl_write_revocations: len = %u off = %"PRIdMAX"\n",
2231: wc->wc_len, (intmax_t)off));
2232: error = wapbl_circ_write(wl, wc, blocklen, &off);
2233: if (error)
2234: return error;
2235: }
2236: *offp = off;
2237: return 0;
2238: }
2239:
2240: static int
2241: wapbl_write_inodes(struct wapbl *wl, off_t *offp)
2242: {
2243: struct wapbl_wc_inodelist *wc =
2244: (struct wapbl_wc_inodelist *)wl->wl_wc_scratch;
2245: int i;
1.14 joerg 2246: int blocklen = 1 << wl->wl_log_dev_bshift;
1.2 simonb 2247: off_t off = *offp;
2248: int error;
2249:
2250: struct wapbl_ino_head *wih;
2251: struct wapbl_ino *wi;
2252: int iph;
2253:
2254: iph = (blocklen - offsetof(struct wapbl_wc_inodelist, wc_inodes)) /
2255: sizeof(((struct wapbl_wc_inodelist *)0)->wc_inodes[0]);
2256:
2257: i = 0;
2258: wih = &wl->wl_inohash[0];
2259: wi = 0;
2260: do {
2261: wc->wc_type = WAPBL_WC_INODES;
2262: wc->wc_len = blocklen;
2263: wc->wc_inocnt = 0;
2264: wc->wc_clear = (i == 0);
2265: while ((i < wl->wl_inohashcnt) && (wc->wc_inocnt < iph)) {
2266: while (!wi) {
2267: KASSERT((wih - &wl->wl_inohash[0])
2268: <= wl->wl_inohashmask);
2269: wi = LIST_FIRST(wih++);
2270: }
2271: wc->wc_inodes[wc->wc_inocnt].wc_inumber = wi->wi_ino;
2272: wc->wc_inodes[wc->wc_inocnt].wc_imode = wi->wi_mode;
2273: wc->wc_inocnt++;
2274: i++;
2275: wi = LIST_NEXT(wi, wi_hash);
2276: }
2277: WAPBL_PRINTF(WAPBL_PRINT_WRITE,
2278: ("wapbl_write_inodes: len = %u off = %"PRIdMAX"\n",
2279: wc->wc_len, (intmax_t)off));
2280: error = wapbl_circ_write(wl, wc, blocklen, &off);
2281: if (error)
2282: return error;
2283: } while (i < wl->wl_inohashcnt);
2284:
2285: *offp = off;
2286: return 0;
2287: }
2288:
2289: #endif /* _KERNEL */
2290:
2291: /****************************************************************/
2292:
2293: struct wapbl_blk {
2294: LIST_ENTRY(wapbl_blk) wb_hash;
2295: daddr_t wb_blk;
2296: off_t wb_off; /* Offset of this block in the log */
2297: };
2298: #define WAPBL_BLKPOOL_MIN 83
2299:
2300: static void
2301: wapbl_blkhash_init(struct wapbl_replay *wr, u_int size)
2302: {
2303: if (size < WAPBL_BLKPOOL_MIN)
2304: size = WAPBL_BLKPOOL_MIN;
2305: KASSERT(wr->wr_blkhash == 0);
2306: #ifdef _KERNEL
2307: wr->wr_blkhash = hashinit(size, HASH_LIST, true, &wr->wr_blkhashmask);
2308: #else /* ! _KERNEL */
2309: /* Manually implement hashinit */
2310: {
1.25 lukem 2311: unsigned long i, hashsize;
1.2 simonb 2312: for (hashsize = 1; hashsize < size; hashsize <<= 1)
2313: continue;
1.51 para 2314: wr->wr_blkhash = wapbl_alloc(hashsize * sizeof(*wr->wr_blkhash));
1.37 drochner 2315: for (i = 0; i < hashsize; i++)
1.2 simonb 2316: LIST_INIT(&wr->wr_blkhash[i]);
2317: wr->wr_blkhashmask = hashsize - 1;
2318: }
2319: #endif /* ! _KERNEL */
2320: }
2321:
2322: static void
2323: wapbl_blkhash_free(struct wapbl_replay *wr)
2324: {
2325: KASSERT(wr->wr_blkhashcnt == 0);
2326: #ifdef _KERNEL
2327: hashdone(wr->wr_blkhash, HASH_LIST, wr->wr_blkhashmask);
2328: #else /* ! _KERNEL */
1.18 yamt 2329: wapbl_free(wr->wr_blkhash,
2330: (wr->wr_blkhashmask + 1) * sizeof(*wr->wr_blkhash));
1.2 simonb 2331: #endif /* ! _KERNEL */
2332: }
2333:
2334: static struct wapbl_blk *
2335: wapbl_blkhash_get(struct wapbl_replay *wr, daddr_t blk)
2336: {
2337: struct wapbl_blk_head *wbh;
2338: struct wapbl_blk *wb;
2339: wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
2340: LIST_FOREACH(wb, wbh, wb_hash) {
2341: if (blk == wb->wb_blk)
2342: return wb;
2343: }
2344: return 0;
2345: }
2346:
2347: static void
2348: wapbl_blkhash_ins(struct wapbl_replay *wr, daddr_t blk, off_t off)
2349: {
2350: struct wapbl_blk_head *wbh;
2351: struct wapbl_blk *wb;
2352: wb = wapbl_blkhash_get(wr, blk);
2353: if (wb) {
2354: KASSERT(wb->wb_blk == blk);
2355: wb->wb_off = off;
2356: } else {
1.51 para 2357: wb = wapbl_alloc(sizeof(*wb));
1.2 simonb 2358: wb->wb_blk = blk;
2359: wb->wb_off = off;
2360: wbh = &wr->wr_blkhash[blk & wr->wr_blkhashmask];
2361: LIST_INSERT_HEAD(wbh, wb, wb_hash);
2362: wr->wr_blkhashcnt++;
2363: }
2364: }
2365:
2366: static void
2367: wapbl_blkhash_rem(struct wapbl_replay *wr, daddr_t blk)
2368: {
2369: struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
2370: if (wb) {
2371: KASSERT(wr->wr_blkhashcnt > 0);
2372: wr->wr_blkhashcnt--;
2373: LIST_REMOVE(wb, wb_hash);
1.18 yamt 2374: wapbl_free(wb, sizeof(*wb));
1.2 simonb 2375: }
2376: }
2377:
2378: static void
2379: wapbl_blkhash_clear(struct wapbl_replay *wr)
2380: {
1.25 lukem 2381: unsigned long i;
1.2 simonb 2382: for (i = 0; i <= wr->wr_blkhashmask; i++) {
2383: struct wapbl_blk *wb;
2384:
2385: while ((wb = LIST_FIRST(&wr->wr_blkhash[i]))) {
2386: KASSERT(wr->wr_blkhashcnt > 0);
2387: wr->wr_blkhashcnt--;
2388: LIST_REMOVE(wb, wb_hash);
1.18 yamt 2389: wapbl_free(wb, sizeof(*wb));
1.2 simonb 2390: }
2391: }
2392: KASSERT(wr->wr_blkhashcnt == 0);
2393: }
2394:
2395: /****************************************************************/
2396:
2397: static int
2398: wapbl_circ_read(struct wapbl_replay *wr, void *data, size_t len, off_t *offp)
2399: {
2400: size_t slen;
2401: off_t off = *offp;
2402: int error;
1.34 mlelstv 2403: daddr_t pbn;
1.2 simonb 2404:
1.14 joerg 2405: KASSERT(((len >> wr->wr_log_dev_bshift) <<
2406: wr->wr_log_dev_bshift) == len);
1.34 mlelstv 2407:
1.14 joerg 2408: if (off < wr->wr_circ_off)
2409: off = wr->wr_circ_off;
2410: slen = wr->wr_circ_off + wr->wr_circ_size - off;
1.2 simonb 2411: if (slen < len) {
1.34 mlelstv 2412: pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift);
2413: #ifdef _KERNEL
2414: pbn = btodb(pbn << wr->wr_log_dev_bshift);
2415: #endif
2416: error = wapbl_read(data, slen, wr->wr_devvp, pbn);
1.2 simonb 2417: if (error)
2418: return error;
2419: data = (uint8_t *)data + slen;
2420: len -= slen;
1.14 joerg 2421: off = wr->wr_circ_off;
1.2 simonb 2422: }
1.34 mlelstv 2423: pbn = wr->wr_logpbn + (off >> wr->wr_log_dev_bshift);
2424: #ifdef _KERNEL
2425: pbn = btodb(pbn << wr->wr_log_dev_bshift);
2426: #endif
2427: error = wapbl_read(data, len, wr->wr_devvp, pbn);
1.2 simonb 2428: if (error)
2429: return error;
2430: off += len;
1.14 joerg 2431: if (off >= wr->wr_circ_off + wr->wr_circ_size)
2432: off = wr->wr_circ_off;
1.2 simonb 2433: *offp = off;
2434: return 0;
2435: }
2436:
2437: static void
2438: wapbl_circ_advance(struct wapbl_replay *wr, size_t len, off_t *offp)
2439: {
2440: size_t slen;
2441: off_t off = *offp;
2442:
1.14 joerg 2443: KASSERT(((len >> wr->wr_log_dev_bshift) <<
2444: wr->wr_log_dev_bshift) == len);
1.2 simonb 2445:
1.14 joerg 2446: if (off < wr->wr_circ_off)
2447: off = wr->wr_circ_off;
2448: slen = wr->wr_circ_off + wr->wr_circ_size - off;
1.2 simonb 2449: if (slen < len) {
2450: len -= slen;
1.14 joerg 2451: off = wr->wr_circ_off;
1.2 simonb 2452: }
2453: off += len;
1.14 joerg 2454: if (off >= wr->wr_circ_off + wr->wr_circ_size)
2455: off = wr->wr_circ_off;
1.2 simonb 2456: *offp = off;
2457: }
2458:
2459: /****************************************************************/
2460:
2461: int
2462: wapbl_replay_start(struct wapbl_replay **wrp, struct vnode *vp,
2463: daddr_t off, size_t count, size_t blksize)
2464: {
2465: struct wapbl_replay *wr;
2466: int error;
2467: struct vnode *devvp;
2468: daddr_t logpbn;
2469: uint8_t *scratch;
2470: struct wapbl_wc_header *wch;
2471: struct wapbl_wc_header *wch2;
2472: /* Use this until we read the actual log header */
1.31 mlelstv 2473: int log_dev_bshift = ilog2(blksize);
1.2 simonb 2474: size_t used;
1.34 mlelstv 2475: daddr_t pbn;
1.2 simonb 2476:
2477: WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
2478: ("wapbl_replay_start: vp=%p off=%"PRId64 " count=%zu blksize=%zu\n",
2479: vp, off, count, blksize));
2480:
2481: if (off < 0)
2482: return EINVAL;
2483:
2484: if (blksize < DEV_BSIZE)
2485: return EINVAL;
2486: if (blksize % DEV_BSIZE)
2487: return EINVAL;
2488:
2489: #ifdef _KERNEL
2490: #if 0
2491: /* XXX vp->v_size isn't reliably set for VBLK devices,
2492: * especially root. However, we might still want to verify
2493: * that the full load is readable */
2494: if ((off + count) * blksize > vp->v_size)
2495: return EINVAL;
2496: #endif
2497: if ((error = VOP_BMAP(vp, off, &devvp, &logpbn, 0)) != 0) {
2498: return error;
2499: }
2500: #else /* ! _KERNEL */
2501: devvp = vp;
2502: logpbn = off;
2503: #endif /* ! _KERNEL */
2504:
1.51 para 2505: scratch = wapbl_alloc(MAXBSIZE);
1.2 simonb 2506:
1.34 mlelstv 2507: pbn = logpbn;
2508: #ifdef _KERNEL
2509: pbn = btodb(pbn << log_dev_bshift);
2510: #endif
2511: error = wapbl_read(scratch, 2<<log_dev_bshift, devvp, pbn);
1.2 simonb 2512: if (error)
2513: goto errout;
2514:
2515: wch = (struct wapbl_wc_header *)scratch;
2516: wch2 =
2517: (struct wapbl_wc_header *)(scratch + (1<<log_dev_bshift));
2518: /* XXX verify checksums and magic numbers */
2519: if (wch->wc_type != WAPBL_WC_HEADER) {
2520: printf("Unrecognized wapbl magic: 0x%08x\n", wch->wc_type);
2521: error = EFTYPE;
2522: goto errout;
2523: }
2524:
2525: if (wch2->wc_generation > wch->wc_generation)
2526: wch = wch2;
2527:
2528: wr = wapbl_calloc(1, sizeof(*wr));
2529:
2530: wr->wr_logvp = vp;
2531: wr->wr_devvp = devvp;
2532: wr->wr_logpbn = logpbn;
2533:
2534: wr->wr_scratch = scratch;
2535:
1.14 joerg 2536: wr->wr_log_dev_bshift = wch->wc_log_dev_bshift;
2537: wr->wr_fs_dev_bshift = wch->wc_fs_dev_bshift;
2538: wr->wr_circ_off = wch->wc_circ_off;
2539: wr->wr_circ_size = wch->wc_circ_size;
2540: wr->wr_generation = wch->wc_generation;
1.2 simonb 2541:
2542: used = wapbl_space_used(wch->wc_circ_size, wch->wc_head, wch->wc_tail);
2543:
2544: WAPBL_PRINTF(WAPBL_PRINT_REPLAY,
2545: ("wapbl_replay: head=%"PRId64" tail=%"PRId64" off=%"PRId64
2546: " len=%"PRId64" used=%zu\n",
2547: wch->wc_head, wch->wc_tail, wch->wc_circ_off,
2548: wch->wc_circ_size, used));
2549:
2550: wapbl_blkhash_init(wr, (used >> wch->wc_fs_dev_bshift));
1.11 joerg 2551:
1.14 joerg 2552: error = wapbl_replay_process(wr, wch->wc_head, wch->wc_tail);
1.2 simonb 2553: if (error) {
2554: wapbl_replay_stop(wr);
2555: wapbl_replay_free(wr);
2556: return error;
2557: }
2558:
2559: *wrp = wr;
2560: return 0;
2561:
2562: errout:
1.18 yamt 2563: wapbl_free(scratch, MAXBSIZE);
1.2 simonb 2564: return error;
2565: }
2566:
2567: void
2568: wapbl_replay_stop(struct wapbl_replay *wr)
2569: {
2570:
1.4 joerg 2571: if (!wapbl_replay_isopen(wr))
2572: return;
2573:
1.2 simonb 2574: WAPBL_PRINTF(WAPBL_PRINT_REPLAY, ("wapbl_replay_stop called\n"));
2575:
1.18 yamt 2576: wapbl_free(wr->wr_scratch, MAXBSIZE);
2577: wr->wr_scratch = NULL;
1.2 simonb 2578:
1.18 yamt 2579: wr->wr_logvp = NULL;
1.2 simonb 2580:
2581: wapbl_blkhash_clear(wr);
2582: wapbl_blkhash_free(wr);
2583: }
2584:
2585: void
2586: wapbl_replay_free(struct wapbl_replay *wr)
2587: {
2588:
2589: KDASSERT(!wapbl_replay_isopen(wr));
2590:
2591: if (wr->wr_inodes)
1.18 yamt 2592: wapbl_free(wr->wr_inodes,
2593: wr->wr_inodescnt * sizeof(wr->wr_inodes[0]));
2594: wapbl_free(wr, sizeof(*wr));
1.2 simonb 2595: }
2596:
1.4 joerg 2597: #ifdef _KERNEL
1.2 simonb 2598: int
2599: wapbl_replay_isopen1(struct wapbl_replay *wr)
2600: {
2601:
2602: return wapbl_replay_isopen(wr);
2603: }
1.4 joerg 2604: #endif
1.2 simonb 2605:
1.61.2.1! skrll 2606: /*
! 2607: * calculate the disk address for the i'th block in the wc_blockblist
! 2608: * offset by j blocks of size blen.
! 2609: *
! 2610: * wc_daddr is always a kernel disk address in DEV_BSIZE units that
! 2611: * was written to the journal.
! 2612: *
! 2613: * The kernel needs that address plus the offset in DEV_BSIZE units.
! 2614: *
! 2615: * Userland needs that address plus the offset in blen units.
! 2616: *
! 2617: */
! 2618: static daddr_t
! 2619: wapbl_block_daddr(struct wapbl_wc_blocklist *wc, int i, int j, int blen)
! 2620: {
! 2621: daddr_t pbn;
! 2622:
! 2623: #ifdef _KERNEL
! 2624: pbn = wc->wc_blocks[i].wc_daddr + btodb(j * blen);
! 2625: #else
! 2626: pbn = dbtob(wc->wc_blocks[i].wc_daddr) / blen + j;
! 2627: #endif
! 2628:
! 2629: return pbn;
! 2630: }
! 2631:
1.10 joerg 2632: static void
2633: wapbl_replay_process_blocks(struct wapbl_replay *wr, off_t *offp)
2634: {
2635: struct wapbl_wc_blocklist *wc =
2636: (struct wapbl_wc_blocklist *)wr->wr_scratch;
1.14 joerg 2637: int fsblklen = 1 << wr->wr_fs_dev_bshift;
1.10 joerg 2638: int i, j, n;
2639:
2640: for (i = 0; i < wc->wc_blkcount; i++) {
2641: /*
2642: * Enter each physical block into the hashtable independently.
2643: */
1.14 joerg 2644: n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift;
1.10 joerg 2645: for (j = 0; j < n; j++) {
1.61.2.1! skrll 2646: wapbl_blkhash_ins(wr, wapbl_block_daddr(wc, i, j, fsblklen),
1.10 joerg 2647: *offp);
2648: wapbl_circ_advance(wr, fsblklen, offp);
2649: }
2650: }
2651: }
2652:
2653: static void
2654: wapbl_replay_process_revocations(struct wapbl_replay *wr)
2655: {
2656: struct wapbl_wc_blocklist *wc =
2657: (struct wapbl_wc_blocklist *)wr->wr_scratch;
1.34 mlelstv 2658: int fsblklen = 1 << wr->wr_fs_dev_bshift;
1.10 joerg 2659: int i, j, n;
2660:
2661: for (i = 0; i < wc->wc_blkcount; i++) {
2662: /*
2663: * Remove any blocks found from the hashtable.
2664: */
1.14 joerg 2665: n = wc->wc_blocks[i].wc_dlen >> wr->wr_fs_dev_bshift;
1.10 joerg 2666: for (j = 0; j < n; j++)
1.61.2.1! skrll 2667: wapbl_blkhash_rem(wr, wapbl_block_daddr(wc, i, j, fsblklen));
1.10 joerg 2668: }
2669: }
2670:
2671: static void
2672: wapbl_replay_process_inodes(struct wapbl_replay *wr, off_t oldoff, off_t newoff)
2673: {
2674: struct wapbl_wc_inodelist *wc =
2675: (struct wapbl_wc_inodelist *)wr->wr_scratch;
1.18 yamt 2676: void *new_inodes;
2677: const size_t oldsize = wr->wr_inodescnt * sizeof(wr->wr_inodes[0]);
2678:
2679: KASSERT(sizeof(wr->wr_inodes[0]) == sizeof(wc->wc_inodes[0]));
2680:
1.10 joerg 2681: /*
2682: * Keep track of where we found this so location won't be
2683: * overwritten.
2684: */
2685: if (wc->wc_clear) {
2686: wr->wr_inodestail = oldoff;
2687: wr->wr_inodescnt = 0;
1.12 joerg 2688: if (wr->wr_inodes != NULL) {
1.18 yamt 2689: wapbl_free(wr->wr_inodes, oldsize);
1.12 joerg 2690: wr->wr_inodes = NULL;
2691: }
1.10 joerg 2692: }
2693: wr->wr_inodeshead = newoff;
2694: if (wc->wc_inocnt == 0)
2695: return;
2696:
1.51 para 2697: new_inodes = wapbl_alloc((wr->wr_inodescnt + wc->wc_inocnt) *
1.18 yamt 2698: sizeof(wr->wr_inodes[0]));
2699: if (wr->wr_inodes != NULL) {
2700: memcpy(new_inodes, wr->wr_inodes, oldsize);
2701: wapbl_free(wr->wr_inodes, oldsize);
2702: }
2703: wr->wr_inodes = new_inodes;
1.10 joerg 2704: memcpy(&wr->wr_inodes[wr->wr_inodescnt], wc->wc_inodes,
1.18 yamt 2705: wc->wc_inocnt * sizeof(wr->wr_inodes[0]));
1.10 joerg 2706: wr->wr_inodescnt += wc->wc_inocnt;
2707: }
2708:
1.2 simonb 2709: static int
1.14 joerg 2710: wapbl_replay_process(struct wapbl_replay *wr, off_t head, off_t tail)
1.2 simonb 2711: {
2712: off_t off;
2713: int error;
2714:
1.14 joerg 2715: int logblklen = 1 << wr->wr_log_dev_bshift;
1.2 simonb 2716:
2717: wapbl_blkhash_clear(wr);
2718:
1.14 joerg 2719: off = tail;
2720: while (off != head) {
1.2 simonb 2721: struct wapbl_wc_null *wcn;
2722: off_t saveoff = off;
2723: error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
2724: if (error)
2725: goto errout;
2726: wcn = (struct wapbl_wc_null *)wr->wr_scratch;
2727: switch (wcn->wc_type) {
2728: case WAPBL_WC_BLOCKS:
1.10 joerg 2729: wapbl_replay_process_blocks(wr, &off);
1.2 simonb 2730: break;
2731:
2732: case WAPBL_WC_REVOCATIONS:
1.10 joerg 2733: wapbl_replay_process_revocations(wr);
1.2 simonb 2734: break;
2735:
2736: case WAPBL_WC_INODES:
1.10 joerg 2737: wapbl_replay_process_inodes(wr, saveoff, off);
1.2 simonb 2738: break;
1.10 joerg 2739:
1.2 simonb 2740: default:
2741: printf("Unrecognized wapbl type: 0x%08x\n",
2742: wcn->wc_type);
2743: error = EFTYPE;
2744: goto errout;
2745: }
2746: wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
2747: if (off != saveoff) {
2748: printf("wapbl_replay: corrupted records\n");
2749: error = EFTYPE;
2750: goto errout;
2751: }
2752: }
2753: return 0;
2754:
2755: errout:
2756: wapbl_blkhash_clear(wr);
2757: return error;
2758: }
2759:
1.13 joerg 2760: #if 0
1.2 simonb 2761: int
2762: wapbl_replay_verify(struct wapbl_replay *wr, struct vnode *fsdevvp)
2763: {
2764: off_t off;
2765: int mismatchcnt = 0;
1.14 joerg 2766: int logblklen = 1 << wr->wr_log_dev_bshift;
2767: int fsblklen = 1 << wr->wr_fs_dev_bshift;
1.51 para 2768: void *scratch1 = wapbl_alloc(MAXBSIZE);
2769: void *scratch2 = wapbl_alloc(MAXBSIZE);
1.2 simonb 2770: int error = 0;
2771:
2772: KDASSERT(wapbl_replay_isopen(wr));
2773:
2774: off = wch->wc_tail;
2775: while (off != wch->wc_head) {
2776: struct wapbl_wc_null *wcn;
2777: #ifdef DEBUG
2778: off_t saveoff = off;
2779: #endif
2780: error = wapbl_circ_read(wr, wr->wr_scratch, logblklen, &off);
2781: if (error)
2782: goto out;
2783: wcn = (struct wapbl_wc_null *)wr->wr_scratch;
2784: switch (wcn->wc_type) {
2785: case WAPBL_WC_BLOCKS:
2786: {
2787: struct wapbl_wc_blocklist *wc =
2788: (struct wapbl_wc_blocklist *)wr->wr_scratch;
2789: int i;
2790: for (i = 0; i < wc->wc_blkcount; i++) {
2791: int foundcnt = 0;
2792: int dirtycnt = 0;
2793: int j, n;
2794: /*
2795: * Check each physical block into the
2796: * hashtable independently
2797: */
2798: n = wc->wc_blocks[i].wc_dlen >>
2799: wch->wc_fs_dev_bshift;
2800: for (j = 0; j < n; j++) {
2801: struct wapbl_blk *wb =
2802: wapbl_blkhash_get(wr,
1.61.2.1! skrll 2803: wapbl_block_daddr(wc, i, j, fsblklen));
1.2 simonb 2804: if (wb && (wb->wb_off == off)) {
2805: foundcnt++;
2806: error =
2807: wapbl_circ_read(wr,
2808: scratch1, fsblklen,
2809: &off);
2810: if (error)
2811: goto out;
2812: error =
2813: wapbl_read(scratch2,
2814: fsblklen, fsdevvp,
2815: wb->wb_blk);
2816: if (error)
2817: goto out;
2818: if (memcmp(scratch1,
2819: scratch2,
2820: fsblklen)) {
2821: printf(
2822: "wapbl_verify: mismatch block %"PRId64" at off %"PRIdMAX"\n",
2823: wb->wb_blk, (intmax_t)off);
2824: dirtycnt++;
2825: mismatchcnt++;
2826: }
2827: } else {
2828: wapbl_circ_advance(wr,
2829: fsblklen, &off);
2830: }
2831: }
2832: #if 0
2833: /*
2834: * If all of the blocks in an entry
2835: * are clean, then remove all of its
2836: * blocks from the hashtable since they
2837: * never will need replay.
2838: */
2839: if ((foundcnt != 0) &&
2840: (dirtycnt == 0)) {
2841: off = saveoff;
2842: wapbl_circ_advance(wr,
2843: logblklen, &off);
2844: for (j = 0; j < n; j++) {
2845: struct wapbl_blk *wb =
2846: wapbl_blkhash_get(wr,
1.61.2.1! skrll 2847: wapbl_block_daddr(wc, i, j, fsblklen));
1.2 simonb 2848: if (wb &&
2849: (wb->wb_off == off)) {
2850: wapbl_blkhash_rem(wr, wb->wb_blk);
2851: }
2852: wapbl_circ_advance(wr,
2853: fsblklen, &off);
2854: }
2855: }
2856: #endif
2857: }
2858: }
2859: break;
2860: case WAPBL_WC_REVOCATIONS:
2861: case WAPBL_WC_INODES:
2862: break;
2863: default:
2864: KASSERT(0);
2865: }
2866: #ifdef DEBUG
2867: wapbl_circ_advance(wr, wcn->wc_len, &saveoff);
2868: KASSERT(off == saveoff);
2869: #endif
2870: }
2871: out:
1.18 yamt 2872: wapbl_free(scratch1, MAXBSIZE);
2873: wapbl_free(scratch2, MAXBSIZE);
1.2 simonb 2874: if (!error && mismatchcnt)
2875: error = EFTYPE;
2876: return error;
2877: }
2878: #endif
2879:
2880: int
2881: wapbl_replay_write(struct wapbl_replay *wr, struct vnode *fsdevvp)
2882: {
1.9 joerg 2883: struct wapbl_blk *wb;
2884: size_t i;
1.2 simonb 2885: off_t off;
1.9 joerg 2886: void *scratch;
1.2 simonb 2887: int error = 0;
1.14 joerg 2888: int fsblklen = 1 << wr->wr_fs_dev_bshift;
1.2 simonb 2889:
2890: KDASSERT(wapbl_replay_isopen(wr));
2891:
1.51 para 2892: scratch = wapbl_alloc(MAXBSIZE);
1.2 simonb 2893:
1.37 drochner 2894: for (i = 0; i <= wr->wr_blkhashmask; ++i) {
1.9 joerg 2895: LIST_FOREACH(wb, &wr->wr_blkhash[i], wb_hash) {
2896: off = wb->wb_off;
2897: error = wapbl_circ_read(wr, scratch, fsblklen, &off);
2898: if (error)
2899: break;
2900: error = wapbl_write(scratch, fsblklen, fsdevvp,
2901: wb->wb_blk);
2902: if (error)
2903: break;
1.2 simonb 2904: }
2905: }
1.9 joerg 2906:
1.18 yamt 2907: wapbl_free(scratch, MAXBSIZE);
1.2 simonb 2908: return error;
2909: }
2910:
2911: int
1.6 joerg 2912: wapbl_replay_can_read(struct wapbl_replay *wr, daddr_t blk, long len)
2913: {
1.14 joerg 2914: int fsblklen = 1 << wr->wr_fs_dev_bshift;
1.6 joerg 2915:
2916: KDASSERT(wapbl_replay_isopen(wr));
2917: KASSERT((len % fsblklen) == 0);
2918:
2919: while (len != 0) {
2920: struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
2921: if (wb)
2922: return 1;
2923: len -= fsblklen;
2924: }
2925: return 0;
2926: }
2927:
2928: int
1.2 simonb 2929: wapbl_replay_read(struct wapbl_replay *wr, void *data, daddr_t blk, long len)
2930: {
1.14 joerg 2931: int fsblklen = 1 << wr->wr_fs_dev_bshift;
1.2 simonb 2932:
2933: KDASSERT(wapbl_replay_isopen(wr));
2934:
2935: KASSERT((len % fsblklen) == 0);
2936:
2937: while (len != 0) {
2938: struct wapbl_blk *wb = wapbl_blkhash_get(wr, blk);
2939: if (wb) {
2940: off_t off = wb->wb_off;
2941: int error;
2942: error = wapbl_circ_read(wr, data, fsblklen, &off);
2943: if (error)
2944: return error;
2945: }
2946: data = (uint8_t *)data + fsblklen;
2947: len -= fsblklen;
2948: blk++;
2949: }
2950: return 0;
2951: }
1.35 pooka 2952:
1.36 pooka 2953: #ifdef _KERNEL
1.35 pooka 2954: /*
1.61 snj 2955: * This is not really a module now, but maybe on its way to
1.35 pooka 2956: * being one some day.
2957: */
2958: MODULE(MODULE_CLASS_VFS, wapbl, NULL);
2959:
2960: static int
2961: wapbl_modcmd(modcmd_t cmd, void *arg)
2962: {
2963:
2964: switch (cmd) {
2965: case MODULE_CMD_INIT:
1.39 christos 2966: wapbl_init();
1.35 pooka 2967: return 0;
2968: case MODULE_CMD_FINI:
1.39 christos 2969: #ifdef notyet
2970: return wapbl_fini(true);
2971: #endif
1.35 pooka 2972: return EOPNOTSUPP;
2973: default:
2974: return ENOTTY;
2975: }
2976: }
1.36 pooka 2977: #endif /* _KERNEL */
CVSweb <webmaster@jp.NetBSD.org>