Annotation of src/sys/kern/vfs_subr.c, Revision 1.457
1.457 ! hannken 1: /* $NetBSD: vfs_subr.c,v 1.456 2017/01/11 09:07:57 hannken Exp $ */
1.74 thorpej 2:
3: /*-
1.315 ad 4: * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
1.74 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.446 hannken 9: * NASA Ames Research Center, by Charles M. Hannum, by Andrew Doran,
10: * by Marshall Kirk McKusick and Greg Ganger at the University of Michigan.
1.74 thorpej 11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: *
21: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31: * POSSIBILITY OF SUCH DAMAGE.
32: */
1.32 cgd 33:
1.29 cgd 34: /*
1.30 mycroft 35: * Copyright (c) 1989, 1993
36: * The Regents of the University of California. All rights reserved.
1.29 cgd 37: * (c) UNIX System Laboratories, Inc.
38: * All or some portions of this file are derived from material licensed
39: * to the University of California by American Telephone and Telegraph
40: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
41: * the permission of UNIX System Laboratories, Inc.
42: *
43: * Redistribution and use in source and binary forms, with or without
44: * modification, are permitted provided that the following conditions
45: * are met:
46: * 1. Redistributions of source code must retain the above copyright
47: * notice, this list of conditions and the following disclaimer.
48: * 2. Redistributions in binary form must reproduce the above copyright
49: * notice, this list of conditions and the following disclaimer in the
50: * documentation and/or other materials provided with the distribution.
1.204 agc 51: * 3. Neither the name of the University nor the names of its contributors
1.29 cgd 52: * may be used to endorse or promote products derived from this software
53: * without specific prior written permission.
54: *
55: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65: * SUCH DAMAGE.
66: *
1.32 cgd 67: * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
1.29 cgd 68: */
69:
1.162 lukem 70: #include <sys/cdefs.h>
1.457 ! hannken 71: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.456 2017/01/11 09:07:57 hannken Exp $");
1.78 mrg 72:
1.448 pooka 73: #ifdef _KERNEL_OPT
1.125 chs 74: #include "opt_ddb.h"
1.95 thorpej 75: #include "opt_compat_netbsd.h"
1.97 christos 76: #include "opt_compat_43.h"
1.448 pooka 77: #endif
1.29 cgd 78:
79: #include <sys/param.h>
1.30 mycroft 80: #include <sys/systm.h>
1.363 pooka 81: #include <sys/conf.h>
1.400 pooka 82: #include <sys/dirent.h>
1.421 rmind 83: #include <sys/filedesc.h>
1.138 bouyer 84: #include <sys/kernel.h>
1.29 cgd 85: #include <sys/mount.h>
1.450 hannken 86: #include <sys/vnode_impl.h>
1.30 mycroft 87: #include <sys/stat.h>
1.421 rmind 88: #include <sys/sysctl.h>
1.29 cgd 89: #include <sys/namei.h>
90: #include <sys/buf.h>
91: #include <sys/errno.h>
1.366 yamt 92: #include <sys/kmem.h>
1.51 christos 93: #include <sys/syscallargs.h>
1.266 elad 94: #include <sys/kauth.h>
1.401 pgoyette 95: #include <sys/module.h>
1.50 christos 96:
1.380 elad 97: #include <miscfs/genfs/genfs.h>
1.30 mycroft 98: #include <miscfs/specfs/specdev.h>
1.125 chs 99: #include <uvm/uvm_ddb.h>
1.129 mrg 100:
1.353 pooka 101: const enum vtype iftovt_tab[16] = {
102: VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
103: VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
104: };
105: const int vttoif_tab[9] = {
106: 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
107: S_IFSOCK, S_IFIFO, S_IFMT,
108: };
109:
110: /*
111: * Insq/Remq for the vnode usage lists.
112: */
113: #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
114: #define bufremvn(bp) { \
115: LIST_REMOVE(bp, b_vnbufs); \
116: (bp)->b_vnbufs.le_next = NOLIST; \
117: }
118:
119: int doforce = 1; /* 1 => permit forcible unmounting */
120: int prtactive = 0; /* 1 => print out reclaim of active vnodes */
121:
1.447 hannken 122: extern struct mount *dead_rootmount;
123:
1.353 pooka 124: /*
1.89 kleink 125: * Local declarations.
126: */
1.276 hannken 127:
1.446 hannken 128: static void vn_initialize_syncerd(void);
1.51 christos 129:
1.353 pooka 130: /*
131: * Initialize the vnode management data structures.
132: */
133: void
134: vntblinit(void)
135: {
136:
137: vn_initialize_syncerd();
1.442 hannken 138: vfs_mount_sysinit();
1.421 rmind 139: vfs_vnode_sysinit();
1.202 yamt 140: }
141:
142: /*
1.421 rmind 143: * Flush out and invalidate all buffers associated with a vnode.
144: * Called with the underlying vnode locked, which should prevent new dirty
145: * buffers from being queued.
1.353 pooka 146: */
1.421 rmind 147: int
148: vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
1.445 matt 149: bool catch_p, int slptimeo)
1.353 pooka 150: {
1.421 rmind 151: struct buf *bp, *nbp;
152: int error;
153: int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
154: (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
1.353 pooka 155:
1.421 rmind 156: /* XXXUBC this doesn't look at flags or slp* */
1.423 rmind 157: mutex_enter(vp->v_interlock);
1.421 rmind 158: error = VOP_PUTPAGES(vp, 0, 0, flushflags);
159: if (error) {
160: return error;
1.353 pooka 161: }
162:
1.421 rmind 163: if (flags & V_SAVE) {
164: error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
165: if (error)
166: return (error);
167: KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
1.353 pooka 168: }
169:
1.421 rmind 170: mutex_enter(&bufcache_lock);
171: restart:
172: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
1.426 yamt 173: KASSERT(bp->b_vp == vp);
1.421 rmind 174: nbp = LIST_NEXT(bp, b_vnbufs);
1.445 matt 175: error = bbusy(bp, catch_p, slptimeo, NULL);
1.421 rmind 176: if (error != 0) {
177: if (error == EPASSTHROUGH)
178: goto restart;
179: mutex_exit(&bufcache_lock);
180: return (error);
181: }
182: brelsel(bp, BC_INVAL | BC_VFLUSH);
1.353 pooka 183: }
184:
1.421 rmind 185: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
1.426 yamt 186: KASSERT(bp->b_vp == vp);
1.421 rmind 187: nbp = LIST_NEXT(bp, b_vnbufs);
1.445 matt 188: error = bbusy(bp, catch_p, slptimeo, NULL);
1.421 rmind 189: if (error != 0) {
190: if (error == EPASSTHROUGH)
191: goto restart;
192: mutex_exit(&bufcache_lock);
193: return (error);
194: }
1.309 ad 195: /*
1.421 rmind 196: * XXX Since there are no node locks for NFS, I believe
197: * there is a slight chance that a delayed write will
198: * occur while sleeping just above, so check for it.
1.309 ad 199: */
1.421 rmind 200: if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
201: #ifdef DEBUG
202: printf("buffer still DELWRI\n");
203: #endif
204: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
205: mutex_exit(&bufcache_lock);
1.424 hannken 206: VOP_BWRITE(bp->b_vp, bp);
1.421 rmind 207: mutex_enter(&bufcache_lock);
208: goto restart;
1.309 ad 209: }
1.421 rmind 210: brelsel(bp, BC_INVAL | BC_VFLUSH);
1.202 yamt 211: }
212:
1.421 rmind 213: #ifdef DIAGNOSTIC
214: if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
215: panic("vinvalbuf: flush failed, vp %p", vp);
216: #endif
1.202 yamt 217:
1.421 rmind 218: mutex_exit(&bufcache_lock);
1.309 ad 219:
1.421 rmind 220: return (0);
1.202 yamt 221: }
222:
1.327 ad 223: /*
1.421 rmind 224: * Destroy any in core blocks past the truncation length.
225: * Called with the underlying vnode locked, which should prevent new dirty
226: * buffers from being queued.
1.29 cgd 227: */
1.50 christos 228: int
1.445 matt 229: vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch_p, int slptimeo)
1.29 cgd 230: {
1.421 rmind 231: struct buf *bp, *nbp;
232: int error;
233: voff_t off;
1.29 cgd 234:
1.421 rmind 235: off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
1.423 rmind 236: mutex_enter(vp->v_interlock);
1.421 rmind 237: error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
238: if (error) {
239: return error;
240: }
1.338 ad 241:
1.421 rmind 242: mutex_enter(&bufcache_lock);
243: restart:
244: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
1.426 yamt 245: KASSERT(bp->b_vp == vp);
1.421 rmind 246: nbp = LIST_NEXT(bp, b_vnbufs);
247: if (bp->b_lblkno < lbn)
248: continue;
1.445 matt 249: error = bbusy(bp, catch_p, slptimeo, NULL);
1.421 rmind 250: if (error != 0) {
251: if (error == EPASSTHROUGH)
252: goto restart;
253: mutex_exit(&bufcache_lock);
254: return (error);
1.339 ad 255: }
1.421 rmind 256: brelsel(bp, BC_INVAL | BC_VFLUSH);
1.339 ad 257: }
1.421 rmind 258:
259: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
1.426 yamt 260: KASSERT(bp->b_vp == vp);
1.421 rmind 261: nbp = LIST_NEXT(bp, b_vnbufs);
262: if (bp->b_lblkno < lbn)
263: continue;
1.445 matt 264: error = bbusy(bp, catch_p, slptimeo, NULL);
1.421 rmind 265: if (error != 0) {
266: if (error == EPASSTHROUGH)
267: goto restart;
268: mutex_exit(&bufcache_lock);
269: return (error);
1.338 ad 270: }
1.421 rmind 271: brelsel(bp, BC_INVAL | BC_VFLUSH);
1.327 ad 272: }
1.421 rmind 273: mutex_exit(&bufcache_lock);
274:
275: return (0);
1.29 cgd 276: }
277:
278: /*
1.421 rmind 279: * Flush all dirty buffers from a vnode.
280: * Called with the underlying vnode locked, which should prevent new dirty
281: * buffers from being queued.
1.29 cgd 282: */
1.422 hannken 283: int
1.434 chs 284: vflushbuf(struct vnode *vp, int flags)
1.29 cgd 285: {
1.421 rmind 286: struct buf *bp, *nbp;
1.434 chs 287: int error, pflags;
288: bool dirty, sync;
1.29 cgd 289:
1.434 chs 290: sync = (flags & FSYNC_WAIT) != 0;
291: pflags = PGO_CLEANIT | PGO_ALLPAGES |
292: (sync ? PGO_SYNCIO : 0) |
293: ((flags & FSYNC_LAZY) ? PGO_LAZY : 0);
1.423 rmind 294: mutex_enter(vp->v_interlock);
1.434 chs 295: (void) VOP_PUTPAGES(vp, 0, 0, pflags);
296:
1.421 rmind 297: loop:
298: mutex_enter(&bufcache_lock);
299: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
1.426 yamt 300: KASSERT(bp->b_vp == vp);
1.421 rmind 301: nbp = LIST_NEXT(bp, b_vnbufs);
302: if ((bp->b_cflags & BC_BUSY))
303: continue;
304: if ((bp->b_oflags & BO_DELWRI) == 0)
305: panic("vflushbuf: not dirty, bp %p", bp);
306: bp->b_cflags |= BC_BUSY | BC_VFLUSH;
307: mutex_exit(&bufcache_lock);
308: /*
309: * Wait for I/O associated with indirect blocks to complete,
310: * since there is no way to quickly wait for them below.
311: */
1.434 chs 312: if (bp->b_vp == vp || !sync)
1.421 rmind 313: (void) bawrite(bp);
1.422 hannken 314: else {
315: error = bwrite(bp);
316: if (error)
317: return error;
318: }
1.421 rmind 319: goto loop;
1.344 ad 320: }
1.421 rmind 321: mutex_exit(&bufcache_lock);
322:
1.434 chs 323: if (!sync)
1.422 hannken 324: return 0;
1.421 rmind 325:
1.423 rmind 326: mutex_enter(vp->v_interlock);
1.421 rmind 327: while (vp->v_numoutput != 0)
1.423 rmind 328: cv_wait(&vp->v_cv, vp->v_interlock);
1.421 rmind 329: dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
1.423 rmind 330: mutex_exit(vp->v_interlock);
1.421 rmind 331:
332: if (dirty) {
333: vprint("vflushbuf: dirty", vp);
334: goto loop;
1.339 ad 335: }
1.422 hannken 336:
337: return 0;
1.29 cgd 338: }
339:
1.421 rmind 340: /*
341: * Create a vnode for a block device.
342: * Used for root filesystem and swap areas.
343: * Also used for memory file system special devices.
344: */
345: int
346: bdevvp(dev_t dev, vnode_t **vpp)
1.376 dyoung 347: {
1.447 hannken 348: struct vattr va;
349:
350: vattr_null(&va);
351: va.va_type = VBLK;
352: va.va_rdev = dev;
1.376 dyoung 353:
1.447 hannken 354: return vcache_new(dead_rootmount, NULL, &va, NOCRED, vpp);
1.376 dyoung 355: }
356:
1.29 cgd 357: /*
1.421 rmind 358: * Create a vnode for a character device.
359: * Used for kernfs and some console handling.
1.29 cgd 360: */
1.50 christos 361: int
1.421 rmind 362: cdevvp(dev_t dev, vnode_t **vpp)
1.29 cgd 363: {
1.447 hannken 364: struct vattr va;
1.30 mycroft 365:
1.447 hannken 366: vattr_null(&va);
367: va.va_type = VCHR;
368: va.va_rdev = dev;
369:
370: return vcache_new(dead_rootmount, NULL, &va, NOCRED, vpp);
1.29 cgd 371: }
372:
373: /*
1.421 rmind 374: * Associate a buffer with a vnode. There must already be a hold on
375: * the vnode.
1.29 cgd 376: */
1.309 ad 377: void
1.421 rmind 378: bgetvp(struct vnode *vp, struct buf *bp)
1.29 cgd 379: {
1.309 ad 380:
1.421 rmind 381: KASSERT(bp->b_vp == NULL);
382: KASSERT(bp->b_objlock == &buffer_lock);
1.423 rmind 383: KASSERT(mutex_owned(vp->v_interlock));
1.421 rmind 384: KASSERT(mutex_owned(&bufcache_lock));
385: KASSERT((bp->b_cflags & BC_BUSY) != 0);
386: KASSERT(!cv_has_waiters(&bp->b_done));
1.29 cgd 387:
1.421 rmind 388: vholdl(vp);
389: bp->b_vp = vp;
390: if (vp->v_type == VBLK || vp->v_type == VCHR)
391: bp->b_dev = vp->v_rdev;
392: else
393: bp->b_dev = NODEV;
1.309 ad 394:
395: /*
1.421 rmind 396: * Insert onto list for new vnode.
1.309 ad 397: */
1.421 rmind 398: bufinsvn(bp, &vp->v_cleanblkhd);
1.423 rmind 399: bp->b_objlock = vp->v_interlock;
1.298 pooka 400: }
401:
1.398 haad 402: /*
1.421 rmind 403: * Disassociate a buffer from a vnode.
1.398 haad 404: */
405: void
1.421 rmind 406: brelvp(struct buf *bp)
1.398 haad 407: {
1.421 rmind 408: struct vnode *vp = bp->b_vp;
1.398 haad 409:
1.421 rmind 410: KASSERT(vp != NULL);
1.423 rmind 411: KASSERT(bp->b_objlock == vp->v_interlock);
412: KASSERT(mutex_owned(vp->v_interlock));
1.421 rmind 413: KASSERT(mutex_owned(&bufcache_lock));
414: KASSERT((bp->b_cflags & BC_BUSY) != 0);
415: KASSERT(!cv_has_waiters(&bp->b_done));
1.398 haad 416:
1.421 rmind 417: /*
418: * Delete from old vnode list, if on one.
419: */
420: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
421: bufremvn(bp);
1.398 haad 422:
1.421 rmind 423: if (vp->v_uobj.uo_npages == 0 && (vp->v_iflag & VI_ONWORKLST) &&
424: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
425: vp->v_iflag &= ~VI_WRMAPDIRTY;
426: vn_syncer_remove_from_worklist(vp);
1.309 ad 427: }
1.29 cgd 428:
1.421 rmind 429: bp->b_objlock = &buffer_lock;
430: bp->b_vp = NULL;
431: holdrelel(vp);
1.29 cgd 432: }
433:
434: /*
1.421 rmind 435: * Reassign a buffer from one vnode list to another.
436: * The list reassignment must be within the same vnode.
437: * Used to assign file specific control information
438: * (indirect blocks) to the list to which they belong.
1.29 cgd 439: */
1.30 mycroft 440: void
1.421 rmind 441: reassignbuf(struct buf *bp, struct vnode *vp)
1.29 cgd 442: {
1.421 rmind 443: struct buflists *listheadp;
444: int delayx;
1.29 cgd 445:
1.421 rmind 446: KASSERT(mutex_owned(&bufcache_lock));
1.423 rmind 447: KASSERT(bp->b_objlock == vp->v_interlock);
448: KASSERT(mutex_owned(vp->v_interlock));
1.421 rmind 449: KASSERT((bp->b_cflags & BC_BUSY) != 0);
1.351 ad 450:
451: /*
1.421 rmind 452: * Delete from old vnode list, if on one.
1.351 ad 453: */
1.421 rmind 454: if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
455: bufremvn(bp);
1.309 ad 456:
1.273 reinoud 457: /*
1.421 rmind 458: * If dirty, put on list of dirty buffers;
459: * otherwise insert onto list of clean buffers.
1.273 reinoud 460: */
1.421 rmind 461: if ((bp->b_oflags & BO_DELWRI) == 0) {
462: listheadp = &vp->v_cleanblkhd;
463: if (vp->v_uobj.uo_npages == 0 &&
464: (vp->v_iflag & VI_ONWORKLST) &&
465: LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
466: vp->v_iflag &= ~VI_WRMAPDIRTY;
467: vn_syncer_remove_from_worklist(vp);
1.315 ad 468: }
1.421 rmind 469: } else {
470: listheadp = &vp->v_dirtyblkhd;
471: if ((vp->v_iflag & VI_ONWORKLST) == 0) {
472: switch (vp->v_type) {
473: case VDIR:
474: delayx = dirdelay;
475: break;
476: case VBLK:
1.438 hannken 477: if (spec_node_getmountedfs(vp) != NULL) {
1.421 rmind 478: delayx = metadelay;
479: break;
480: }
481: /* fall through */
482: default:
483: delayx = filedelay;
484: break;
1.29 cgd 485: }
1.421 rmind 486: if (!vp->v_mount ||
487: (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
488: vn_syncer_add_to_worklist(vp, delayx);
1.231 mycroft 489: }
1.166 chs 490: }
1.421 rmind 491: bufinsvn(bp, listheadp);
1.29 cgd 492: }
493:
494: /*
1.409 hannken 495: * Lookup a vnode by device number and return it referenced.
1.29 cgd 496: */
1.50 christos 497: int
1.309 ad 498: vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1.29 cgd 499: {
500:
1.436 hannken 501: return (spec_node_lookup_by_dev(type, dev, vpp) == 0);
1.96 thorpej 502: }
503:
504: /*
505: * Revoke all the vnodes corresponding to the specified minor number
506: * range (endpoints inclusive) of the specified major.
507: */
508: void
1.247 thorpej 509: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96 thorpej 510: {
1.436 hannken 511: vnode_t *vp;
1.316 ad 512: dev_t dev;
1.96 thorpej 513: int mn;
514:
1.316 ad 515: for (mn = minl; mn <= minh; mn++) {
516: dev = makedev(maj, mn);
1.436 hannken 517: while (spec_node_lookup_by_dev(type, dev, &vp) == 0) {
518: VOP_REVOKE(vp, REVOKEALL);
519: vrele(vp);
1.316 ad 520: }
521: }
1.29 cgd 522: }
523:
524: /*
1.446 hannken 525: * The filesystem synchronizer mechanism - syncer.
526: *
527: * It is useful to delay writes of file data and filesystem metadata for
528: * a certain amount of time so that quickly created and deleted files need
529: * not waste disk bandwidth being created and removed. To implement this,
530: * vnodes are appended to a "workitem" queue.
531: *
532: * Most pending metadata should not wait for more than ten seconds. Thus,
533: * mounted on block devices are delayed only about a half the time that file
534: * data is delayed. Similarly, directory updates are more critical, so are
535: * only delayed about a third the time that file data is delayed.
536: *
537: * There are SYNCER_MAXDELAY queues that are processed in a round-robin
538: * manner at a rate of one each second (driven off the filesystem syner
539: * thread). The syncer_delayno variable indicates the next queue that is
540: * to be processed. Items that need to be processed soon are placed in
541: * this queue:
542: *
543: * syncer_workitem_pending[syncer_delayno]
544: *
545: * A delay of e.g. fifteen seconds is done by placing the request fifteen
546: * entries later in the queue:
547: *
548: * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
549: *
550: * Flag VI_ONWORKLST indicates that vnode is added into the queue.
551: */
552:
553: #define SYNCER_MAXDELAY 32
554:
1.455 hannken 555: typedef TAILQ_HEAD(synclist, vnode_impl) synclist_t;
1.446 hannken 556:
557: static void vn_syncer_add1(struct vnode *, int);
558: static void sysctl_vfs_syncfs_setup(struct sysctllog **);
559:
560: /*
561: * Defines and variables for the syncer process.
562: */
563: int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
564: time_t syncdelay = 30; /* max time to delay syncing data */
565: time_t filedelay = 30; /* time to delay syncing files */
566: time_t dirdelay = 15; /* time to delay syncing directories */
567: time_t metadelay = 10; /* time to delay syncing metadata */
568: time_t lockdelay = 1; /* time to delay if locking fails */
569:
570: kmutex_t syncer_mutex; /* used to freeze syncer, long term */
571: static kmutex_t syncer_data_lock; /* short term lock on data structs */
572:
573: static int syncer_delayno = 0;
574: static long syncer_last;
575: static synclist_t * syncer_workitem_pending;
576:
577: static void
578: vn_initialize_syncerd(void)
579: {
580: int i;
581:
582: syncer_last = SYNCER_MAXDELAY + 2;
583:
584: sysctl_vfs_syncfs_setup(NULL);
585:
586: syncer_workitem_pending =
587: kmem_alloc(syncer_last * sizeof (struct synclist), KM_SLEEP);
588:
589: for (i = 0; i < syncer_last; i++)
590: TAILQ_INIT(&syncer_workitem_pending[i]);
591:
592: mutex_init(&syncer_mutex, MUTEX_DEFAULT, IPL_NONE);
593: mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_NONE);
594: }
595:
596: /*
597: * Return delay factor appropriate for the given file system. For
598: * WAPBL we use the sync vnode to burst out metadata updates: sync
599: * those file systems more frequently.
600: */
601: static inline int
602: sync_delay(struct mount *mp)
603: {
604:
605: return mp->mnt_wapbl != NULL ? metadelay : syncdelay;
606: }
607:
608: /*
609: * Compute the next slot index from delay.
610: */
611: static inline int
612: sync_delay_slot(int delayx)
613: {
614:
615: if (delayx > syncer_maxdelay - 2)
616: delayx = syncer_maxdelay - 2;
617: return (syncer_delayno + delayx) % syncer_last;
618: }
619:
620: /*
621: * Add an item to the syncer work queue.
622: */
623: static void
624: vn_syncer_add1(struct vnode *vp, int delayx)
625: {
626: synclist_t *slp;
1.455 hannken 627: vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
1.446 hannken 628:
629: KASSERT(mutex_owned(&syncer_data_lock));
630:
631: if (vp->v_iflag & VI_ONWORKLST) {
632: /*
633: * Remove in order to adjust the position of the vnode.
634: * Note: called from sched_sync(), which will not hold
635: * interlock, therefore we cannot modify v_iflag here.
636: */
1.455 hannken 637: slp = &syncer_workitem_pending[vip->vi_synclist_slot];
638: TAILQ_REMOVE(slp, vip, vi_synclist);
1.446 hannken 639: } else {
640: KASSERT(mutex_owned(vp->v_interlock));
641: vp->v_iflag |= VI_ONWORKLST;
642: }
643:
1.455 hannken 644: vip->vi_synclist_slot = sync_delay_slot(delayx);
1.446 hannken 645:
1.455 hannken 646: slp = &syncer_workitem_pending[vip->vi_synclist_slot];
647: TAILQ_INSERT_TAIL(slp, vip, vi_synclist);
1.446 hannken 648: }
649:
650: void
651: vn_syncer_add_to_worklist(struct vnode *vp, int delayx)
652: {
653:
654: KASSERT(mutex_owned(vp->v_interlock));
655:
656: mutex_enter(&syncer_data_lock);
657: vn_syncer_add1(vp, delayx);
658: mutex_exit(&syncer_data_lock);
659: }
660:
661: /*
662: * Remove an item from the syncer work queue.
663: */
664: void
665: vn_syncer_remove_from_worklist(struct vnode *vp)
666: {
667: synclist_t *slp;
1.455 hannken 668: vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
1.446 hannken 669:
670: KASSERT(mutex_owned(vp->v_interlock));
671:
672: mutex_enter(&syncer_data_lock);
673: if (vp->v_iflag & VI_ONWORKLST) {
674: vp->v_iflag &= ~VI_ONWORKLST;
1.455 hannken 675: slp = &syncer_workitem_pending[vip->vi_synclist_slot];
676: TAILQ_REMOVE(slp, vip, vi_synclist);
1.446 hannken 677: }
678: mutex_exit(&syncer_data_lock);
679: }
680:
681: /*
682: * Add this mount point to the syncer.
683: */
684: void
685: vfs_syncer_add_to_worklist(struct mount *mp)
686: {
687: static int start, incr, next;
688: int vdelay;
689:
690: KASSERT(mutex_owned(&mp->mnt_updating));
691: KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) == 0);
692:
693: /*
694: * We attempt to scatter the mount points on the list
695: * so that they will go off at evenly distributed times
696: * even if all the filesystems are mounted at once.
697: */
698:
699: next += incr;
700: if (next == 0 || next > syncer_maxdelay) {
701: start /= 2;
702: incr /= 2;
703: if (start == 0) {
704: start = syncer_maxdelay / 2;
705: incr = syncer_maxdelay;
706: }
707: next = start;
708: }
709: mp->mnt_iflag |= IMNT_ONWORKLIST;
710: vdelay = sync_delay(mp);
711: mp->mnt_synclist_slot = vdelay > 0 ? next % vdelay : 0;
712: }
713:
714: /*
715: * Remove the mount point from the syncer.
716: */
717: void
718: vfs_syncer_remove_from_worklist(struct mount *mp)
719: {
720:
721: KASSERT(mutex_owned(&mp->mnt_updating));
722: KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) != 0);
723:
724: mp->mnt_iflag &= ~IMNT_ONWORKLIST;
725: }
726:
727: /*
728: * Try lazy sync, return true on success.
729: */
730: static bool
731: lazy_sync_vnode(struct vnode *vp)
732: {
733: bool synced;
734:
735: KASSERT(mutex_owned(&syncer_data_lock));
736:
737: synced = false;
738: /* We are locking in the wrong direction. */
739: if (mutex_tryenter(vp->v_interlock)) {
740: mutex_exit(&syncer_data_lock);
1.453 hannken 741: if (vcache_tryvget(vp) == 0) {
1.446 hannken 742: if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
743: synced = true;
744: (void) VOP_FSYNC(vp, curlwp->l_cred,
745: FSYNC_LAZY, 0, 0);
746: vput(vp);
747: } else
748: vrele(vp);
749: }
750: mutex_enter(&syncer_data_lock);
751: }
752: return synced;
753: }
754:
755: /*
756: * System filesystem synchronizer daemon.
757: */
758: void
759: sched_sync(void *arg)
760: {
761: synclist_t *slp;
762: struct vnode *vp;
763: struct mount *mp, *nmp;
764: time_t starttime;
765: bool synced;
766:
767: for (;;) {
768: mutex_enter(&syncer_mutex);
769:
770: starttime = time_second;
771:
772: /*
773: * Sync mounts whose dirty time has expired.
774: */
775: mutex_enter(&mountlist_lock);
776: for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
777: if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0 ||
778: mp->mnt_synclist_slot != syncer_delayno) {
779: nmp = TAILQ_NEXT(mp, mnt_list);
780: continue;
781: }
782: mp->mnt_synclist_slot = sync_delay_slot(sync_delay(mp));
783: if (vfs_busy(mp, &nmp))
784: continue;
785: VFS_SYNC(mp, MNT_LAZY, curlwp->l_cred);
786: vfs_unbusy(mp, false, &nmp);
787: }
788: mutex_exit(&mountlist_lock);
789:
790: mutex_enter(&syncer_data_lock);
791:
792: /*
793: * Push files whose dirty time has expired.
794: */
795: slp = &syncer_workitem_pending[syncer_delayno];
796: syncer_delayno += 1;
797: if (syncer_delayno >= syncer_last)
798: syncer_delayno = 0;
799:
1.455 hannken 800: while ((vp = VIMPL_TO_VNODE(TAILQ_FIRST(slp))) != NULL) {
1.446 hannken 801: synced = lazy_sync_vnode(vp);
802:
803: /*
804: * XXX The vnode may have been recycled, in which
805: * case it may have a new identity.
806: */
1.455 hannken 807: if (VIMPL_TO_VNODE(TAILQ_FIRST(slp)) == vp) {
1.446 hannken 808: /*
809: * Put us back on the worklist. The worklist
810: * routine will remove us from our current
811: * position and then add us back in at a later
812: * position.
813: *
814: * Try again sooner rather than later if
815: * we were unable to lock the vnode. Lock
816: * failure should not prevent us from doing
817: * the sync "soon".
818: *
819: * If we locked it yet arrive here, it's
820: * likely that lazy sync is in progress and
821: * so the vnode still has dirty metadata.
822: * syncdelay is mainly to get this vnode out
823: * of the way so we do not consider it again
824: * "soon" in this loop, so the delay time is
825: * not critical as long as it is not "soon".
826: * While write-back strategy is the file
827: * system's domain, we expect write-back to
828: * occur no later than syncdelay seconds
829: * into the future.
830: */
831: vn_syncer_add1(vp,
832: synced ? syncdelay : lockdelay);
833: }
834: }
835: mutex_exit(&syncer_mutex);
836:
837: /*
838: * If it has taken us less than a second to process the
839: * current work, then wait. Otherwise start right over
840: * again. We can still lose time if any single round
841: * takes more than two seconds, but it does not really
842: * matter as we are just trying to generally pace the
843: * filesystem activity.
844: */
845: if (time_second == starttime) {
846: kpause("syncer", false, hz, &syncer_data_lock);
847: }
848: mutex_exit(&syncer_data_lock);
849: }
850: }
851:
852: static void
853: sysctl_vfs_syncfs_setup(struct sysctllog **clog)
854: {
855: const struct sysctlnode *rnode, *cnode;
856:
857: sysctl_createv(clog, 0, NULL, &rnode,
858: CTLFLAG_PERMANENT,
859: CTLTYPE_NODE, "sync",
860: SYSCTL_DESCR("syncer options"),
861: NULL, 0, NULL, 0,
862: CTL_VFS, CTL_CREATE, CTL_EOL);
863:
864: sysctl_createv(clog, 0, &rnode, &cnode,
865: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
866: CTLTYPE_QUAD, "delay",
867: SYSCTL_DESCR("max time to delay syncing data"),
868: NULL, 0, &syncdelay, 0,
869: CTL_CREATE, CTL_EOL);
870:
871: sysctl_createv(clog, 0, &rnode, &cnode,
872: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
873: CTLTYPE_QUAD, "filedelay",
874: SYSCTL_DESCR("time to delay syncing files"),
875: NULL, 0, &filedelay, 0,
876: CTL_CREATE, CTL_EOL);
877:
878: sysctl_createv(clog, 0, &rnode, &cnode,
879: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
880: CTLTYPE_QUAD, "dirdelay",
881: SYSCTL_DESCR("time to delay syncing directories"),
882: NULL, 0, &dirdelay, 0,
883: CTL_CREATE, CTL_EOL);
884:
885: sysctl_createv(clog, 0, &rnode, &cnode,
886: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
887: CTLTYPE_QUAD, "metadelay",
888: SYSCTL_DESCR("time to delay syncing metadata"),
889: NULL, 0, &metadelay, 0,
890: CTL_CREATE, CTL_EOL);
891: }
892:
893: /*
1.220 lukem 894: * sysctl helper routine to return list of supported fstypes
895: */
1.358 pooka 896: int
1.220 lukem 897: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
898: {
1.291 christos 899: char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220 lukem 900: char *where = oldp;
901: struct vfsops *v;
902: size_t needed, left, slen;
903: int error, first;
904:
905: if (newp != NULL)
906: return (EPERM);
907: if (namelen != 0)
908: return (EINVAL);
909:
910: first = 1;
911: error = 0;
912: needed = 0;
913: left = *oldlenp;
914:
1.311 ad 915: sysctl_unlock();
1.302 ad 916: mutex_enter(&vfs_list_lock);
1.220 lukem 917: LIST_FOREACH(v, &vfs_list, vfs_list) {
918: if (where == NULL)
919: needed += strlen(v->vfs_name) + 1;
920: else {
1.245 christos 921: memset(bf, 0, sizeof(bf));
1.220 lukem 922: if (first) {
1.245 christos 923: strncpy(bf, v->vfs_name, sizeof(bf));
1.220 lukem 924: first = 0;
925: } else {
1.245 christos 926: bf[0] = ' ';
927: strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220 lukem 928: }
1.245 christos 929: bf[sizeof(bf)-1] = '\0';
930: slen = strlen(bf);
1.220 lukem 931: if (left < slen + 1)
932: break;
1.302 ad 933: v->vfs_refcount++;
934: mutex_exit(&vfs_list_lock);
1.354 pooka 935: /* +1 to copy out the trailing NUL byte */
1.245 christos 936: error = copyout(bf, where, slen + 1);
1.302 ad 937: mutex_enter(&vfs_list_lock);
938: v->vfs_refcount--;
1.220 lukem 939: if (error)
940: break;
941: where += slen;
942: needed += slen;
943: left -= slen;
944: }
945: }
1.302 ad 946: mutex_exit(&vfs_list_lock);
1.311 ad 947: sysctl_relock();
1.220 lukem 948: *oldlenp = needed;
949: return (error);
950: }
951:
1.421 rmind 952: int kinfo_vdebug = 1;
953: int kinfo_vgetfailed;
1.212 atatat 954:
1.421 rmind 955: #define KINFO_VNODESLOP 10
1.384 jmcneill 956:
957: /*
1.421 rmind 958: * Dump vnode list (via sysctl).
959: * Copyout address of vnode followed by vnode.
1.58 thorpej 960: */
961: int
1.421 rmind 962: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.58 thorpej 963: {
1.421 rmind 964: char *where = oldp;
965: size_t *sizep = oldlenp;
966: struct mount *mp, *nmp;
1.443 hannken 967: vnode_t *vp, vbuf;
968: struct vnode_iterator *marker;
1.421 rmind 969: char *bp = where;
970: char *ewhere;
971: int error;
1.58 thorpej 972:
1.421 rmind 973: if (namelen != 0)
974: return (EOPNOTSUPP);
975: if (newp != NULL)
976: return (EPERM);
1.58 thorpej 977:
1.421 rmind 978: #define VPTRSZ sizeof(vnode_t *)
979: #define VNODESZ sizeof(vnode_t)
980: if (where == NULL) {
981: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
982: return (0);
1.58 thorpej 983: }
1.421 rmind 984: ewhere = where + *sizep;
1.58 thorpej 985:
1.421 rmind 986: sysctl_unlock();
987: mutex_enter(&mountlist_lock);
1.441 christos 988: for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1.421 rmind 989: if (vfs_busy(mp, &nmp)) {
1.58 thorpej 990: continue;
991: }
1.443 hannken 992: vfs_vnode_iterator_init(mp, &marker);
1.444 christos 993: while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
1.421 rmind 994: if (bp + VPTRSZ + VNODESZ > ewhere) {
1.443 hannken 995: vrele(vp);
996: vfs_vnode_iterator_destroy(marker);
1.421 rmind 997: vfs_unbusy(mp, false, NULL);
998: sysctl_relock();
999: *sizep = bp - where;
1000: return (ENOMEM);
1001: }
1002: memcpy(&vbuf, vp, VNODESZ);
1003: if ((error = copyout(&vp, bp, VPTRSZ)) ||
1004: (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1.443 hannken 1005: vrele(vp);
1006: vfs_vnode_iterator_destroy(marker);
1.421 rmind 1007: vfs_unbusy(mp, false, NULL);
1008: sysctl_relock();
1009: return (error);
1010: }
1.443 hannken 1011: vrele(vp);
1.421 rmind 1012: bp += VPTRSZ + VNODESZ;
1.353 pooka 1013: }
1.443 hannken 1014: vfs_vnode_iterator_destroy(marker);
1.421 rmind 1015: vfs_unbusy(mp, false, &nmp);
1.353 pooka 1016: }
1.421 rmind 1017: mutex_exit(&mountlist_lock);
1018: sysctl_relock();
1.326 ad 1019:
1.421 rmind 1020: *sizep = bp - where;
1021: return (0);
1.353 pooka 1022: }
1023:
1024: /*
1025: * Set vnode attributes to VNOVAL
1026: */
1027: void
1028: vattr_null(struct vattr *vap)
1029: {
1030:
1.393 pooka 1031: memset(vap, 0, sizeof(*vap));
1032:
1.353 pooka 1033: vap->va_type = VNON;
1034:
1035: /*
1036: * Assign individually so that it is safe even if size and
1037: * sign of each member are varied.
1038: */
1039: vap->va_mode = VNOVAL;
1040: vap->va_nlink = VNOVAL;
1041: vap->va_uid = VNOVAL;
1042: vap->va_gid = VNOVAL;
1043: vap->va_fsid = VNOVAL;
1044: vap->va_fileid = VNOVAL;
1045: vap->va_size = VNOVAL;
1046: vap->va_blocksize = VNOVAL;
1047: vap->va_atime.tv_sec =
1048: vap->va_mtime.tv_sec =
1049: vap->va_ctime.tv_sec =
1050: vap->va_birthtime.tv_sec = VNOVAL;
1051: vap->va_atime.tv_nsec =
1052: vap->va_mtime.tv_nsec =
1053: vap->va_ctime.tv_nsec =
1054: vap->va_birthtime.tv_nsec = VNOVAL;
1055: vap->va_gen = VNOVAL;
1056: vap->va_flags = VNOVAL;
1057: vap->va_rdev = VNOVAL;
1058: vap->va_bytes = VNOVAL;
1059: }
1060:
1.451 hannken 1061: /*
1062: * Vnode state to string.
1063: */
1064: const char *
1065: vstate_name(enum vnode_state state)
1066: {
1067:
1068: switch (state) {
1069: case VS_MARKER:
1070: return "MARKER";
1071: case VS_LOADING:
1072: return "LOADING";
1073: case VS_ACTIVE:
1074: return "ACTIVE";
1075: case VS_BLOCKED:
1076: return "BLOCKED";
1077: case VS_RECLAIMING:
1078: return "RECLAIMING";
1079: case VS_RECLAIMED:
1080: return "RECLAIMED";
1081: default:
1082: return "ILLEGAL";
1083: }
1084: }
1085:
1086: /*
1087: * Print a description of a vnode (common part).
1088: */
1089: static void
1090: vprint_common(struct vnode *vp, const char *prefix,
1091: void (*pr)(const char *, ...) __printflike(1, 2))
1092: {
1093: int n;
1094: char bf[96];
1095: const uint8_t *cp;
1.454 hannken 1096: vnode_impl_t *vip;
1.451 hannken 1097: const char * const vnode_tags[] = { VNODE_TAGS };
1098: const char * const vnode_types[] = { VNODE_TYPES };
1099: const char vnode_flagbits[] = VNODE_FLAGBITS;
1100:
1.353 pooka 1101: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
1102: #define ARRAY_PRINT(idx, arr) \
1.370 yamt 1103: ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1.353 pooka 1104:
1.454 hannken 1105: vip = VNODE_TO_VIMPL(vp);
1.451 hannken 1106:
1107: snprintb(bf, sizeof(bf),
1108: vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
1109:
1110: (*pr)("vnode %p flags %s\n", vp, bf);
1111: (*pr)("%stag %s(%d) type %s(%d) mount %p typedata %p\n", prefix,
1112: ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
1113: ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1114: vp->v_mount, vp->v_mountedhere);
1115: (*pr)("%susecount %d writecount %d holdcount %d\n", prefix,
1116: vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1117: (*pr)("%ssize %" PRIx64 " writesize %" PRIx64 " numoutput %d\n",
1118: prefix, vp->v_size, vp->v_writesize, vp->v_numoutput);
1.457 ! hannken 1119: (*pr)("%sdata %p lock %p\n", prefix, vp->v_data, &vip->vi_lock);
1.451 hannken 1120:
1.454 hannken 1121: (*pr)("%sstate %s key(%p %zd)", prefix, vstate_name(vip->vi_state),
1122: vip->vi_key.vk_mount, vip->vi_key.vk_key_len);
1123: n = vip->vi_key.vk_key_len;
1124: cp = vip->vi_key.vk_key;
1.451 hannken 1125: while (n-- > 0)
1126: (*pr)(" %02x", *cp++);
1127: (*pr)("\n");
1.454 hannken 1128: (*pr)("%slrulisthd %p\n", prefix, vip->vi_lrulisthd);
1.451 hannken 1129:
1130: #undef ARRAY_PRINT
1131: #undef ARRAY_SIZE
1132: }
1.353 pooka 1133:
1134: /*
1135: * Print out a description of a vnode.
1136: */
1137: void
1138: vprint(const char *label, struct vnode *vp)
1139: {
1140:
1141: if (label != NULL)
1142: printf("%s: ", label);
1.451 hannken 1143: vprint_common(vp, "\t", printf);
1.353 pooka 1144: if (vp->v_data != NULL) {
1145: printf("\t");
1146: VOP_PRINT(vp);
1147: }
1148: }
1149:
1.380 elad 1150: /* Deprecated. Kept for KPI compatibility. */
1.353 pooka 1151: int
1152: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
1153: mode_t acc_mode, kauth_cred_t cred)
1154: {
1155:
1.380 elad 1156: #ifdef DIAGNOSTIC
1.381 elad 1157: printf("vaccess: deprecated interface used.\n");
1.380 elad 1158: #endif /* DIAGNOSTIC */
1.353 pooka 1159:
1.437 plunky 1160: return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(acc_mode,
1.433 elad 1161: type, file_mode), NULL /* This may panic. */, NULL,
1162: genfs_can_access(type, file_mode, uid, gid, acc_mode, cred));
1.353 pooka 1163: }
1164:
1165: /*
1166: * Given a file system name, look up the vfsops for that
1167: * file system, or return NULL if file system isn't present
1168: * in the kernel.
1169: */
1170: struct vfsops *
1171: vfs_getopsbyname(const char *name)
1172: {
1173: struct vfsops *v;
1174:
1175: mutex_enter(&vfs_list_lock);
1176: LIST_FOREACH(v, &vfs_list, vfs_list) {
1177: if (strcmp(v->vfs_name, name) == 0)
1178: break;
1179: }
1180: if (v != NULL)
1181: v->vfs_refcount++;
1182: mutex_exit(&vfs_list_lock);
1183:
1184: return (v);
1185: }
1186:
1187: void
1188: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
1189: {
1190: const struct statvfs *mbp;
1191:
1192: if (sbp == (mbp = &mp->mnt_stat))
1193: return;
1194:
1195: (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
1196: sbp->f_fsid = mbp->f_fsid;
1197: sbp->f_owner = mbp->f_owner;
1198: sbp->f_flag = mbp->f_flag;
1199: sbp->f_syncwrites = mbp->f_syncwrites;
1200: sbp->f_asyncwrites = mbp->f_asyncwrites;
1201: sbp->f_syncreads = mbp->f_syncreads;
1202: sbp->f_asyncreads = mbp->f_asyncreads;
1203: (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
1204: (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
1205: sizeof(sbp->f_fstypename));
1206: (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
1207: sizeof(sbp->f_mntonname));
1208: (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
1209: sizeof(sbp->f_mntfromname));
1210: sbp->f_namemax = mbp->f_namemax;
1211: }
1212:
1213: int
1214: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
1215: const char *vfsname, struct mount *mp, struct lwp *l)
1216: {
1217: int error;
1218: size_t size;
1219: struct statvfs *sfs = &mp->mnt_stat;
1220: int (*fun)(const void *, void *, size_t, size_t *);
1221:
1222: (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
1223: sizeof(mp->mnt_stat.f_fstypename));
1224:
1225: if (onp) {
1226: struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1227: fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
1228: if (cwdi->cwdi_rdir != NULL) {
1229: size_t len;
1230: char *bp;
1231: char *path = PNBUF_GET();
1232:
1233: bp = path + MAXPATHLEN;
1234: *--bp = '\0';
1235: rw_enter(&cwdi->cwdi_lock, RW_READER);
1236: error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
1237: path, MAXPATHLEN / 2, 0, l);
1238: rw_exit(&cwdi->cwdi_lock);
1239: if (error) {
1240: PNBUF_PUT(path);
1241: return error;
1242: }
1243:
1244: len = strlen(bp);
1245: if (len > sizeof(sfs->f_mntonname) - 1)
1246: len = sizeof(sfs->f_mntonname) - 1;
1247: (void)strncpy(sfs->f_mntonname, bp, len);
1248: PNBUF_PUT(path);
1249:
1250: if (len < sizeof(sfs->f_mntonname) - 1) {
1251: error = (*fun)(onp, &sfs->f_mntonname[len],
1252: sizeof(sfs->f_mntonname) - len - 1, &size);
1253: if (error)
1254: return error;
1255: size += len;
1256: } else {
1257: size = len;
1258: }
1259: } else {
1260: error = (*fun)(onp, &sfs->f_mntonname,
1261: sizeof(sfs->f_mntonname) - 1, &size);
1262: if (error)
1263: return error;
1264: }
1265: (void)memset(sfs->f_mntonname + size, 0,
1266: sizeof(sfs->f_mntonname) - size);
1267: }
1268:
1269: if (fromp) {
1270: fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
1271: error = (*fun)(fromp, sfs->f_mntfromname,
1272: sizeof(sfs->f_mntfromname) - 1, &size);
1273: if (error)
1274: return error;
1275: (void)memset(sfs->f_mntfromname + size, 0,
1276: sizeof(sfs->f_mntfromname) - size);
1277: }
1278: return 0;
1279: }
1280:
1281: void
1282: vfs_timestamp(struct timespec *ts)
1283: {
1284:
1285: nanotime(ts);
1286: }
1287:
1288: time_t rootfstime; /* recorded root fs time, if known */
1289: void
1290: setrootfstime(time_t t)
1291: {
1292: rootfstime = t;
1293: }
1294:
1.439 rmind 1295: static const uint8_t vttodt_tab[ ] = {
1296: [VNON] = DT_UNKNOWN,
1297: [VREG] = DT_REG,
1298: [VDIR] = DT_DIR,
1299: [VBLK] = DT_BLK,
1300: [VCHR] = DT_CHR,
1301: [VLNK] = DT_LNK,
1302: [VSOCK] = DT_SOCK,
1303: [VFIFO] = DT_FIFO,
1304: [VBAD] = DT_UNKNOWN
1.400 pooka 1305: };
1306:
1307: uint8_t
1308: vtype2dt(enum vtype vt)
1309: {
1310:
1311: CTASSERT(VBAD == __arraycount(vttodt_tab) - 1);
1312: return vttodt_tab[vt];
1313: }
1314:
1.353 pooka 1315: int
1316: VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
1317: {
1318: int error;
1319:
1320: KERNEL_LOCK(1, NULL);
1321: error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
1322: KERNEL_UNLOCK_ONE(NULL);
1323:
1324: return error;
1325: }
1326:
1327: int
1328: VFS_START(struct mount *mp, int a)
1329: {
1330: int error;
1331:
1332: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1333: KERNEL_LOCK(1, NULL);
1334: }
1335: error = (*(mp->mnt_op->vfs_start))(mp, a);
1336: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1337: KERNEL_UNLOCK_ONE(NULL);
1338: }
1339:
1340: return error;
1341: }
1342:
1343: int
1344: VFS_UNMOUNT(struct mount *mp, int a)
1345: {
1346: int error;
1347:
1348: KERNEL_LOCK(1, NULL);
1349: error = (*(mp->mnt_op->vfs_unmount))(mp, a);
1350: KERNEL_UNLOCK_ONE(NULL);
1351:
1352: return error;
1353: }
1354:
1355: int
1356: VFS_ROOT(struct mount *mp, struct vnode **a)
1357: {
1358: int error;
1359:
1360: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1361: KERNEL_LOCK(1, NULL);
1362: }
1363: error = (*(mp->mnt_op->vfs_root))(mp, a);
1364: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1365: KERNEL_UNLOCK_ONE(NULL);
1366: }
1367:
1368: return error;
1369: }
1370:
1371: int
1.432 dholland 1372: VFS_QUOTACTL(struct mount *mp, struct quotactl_args *args)
1.353 pooka 1373: {
1374: int error;
1375:
1376: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1377: KERNEL_LOCK(1, NULL);
1378: }
1.431 dholland 1379: error = (*(mp->mnt_op->vfs_quotactl))(mp, args);
1.353 pooka 1380: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1381: KERNEL_UNLOCK_ONE(NULL);
1382: }
1383:
1384: return error;
1385: }
1386:
1387: int
1388: VFS_STATVFS(struct mount *mp, struct statvfs *a)
1389: {
1390: int error;
1391:
1392: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1393: KERNEL_LOCK(1, NULL);
1394: }
1395: error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
1396: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1397: KERNEL_UNLOCK_ONE(NULL);
1398: }
1399:
1400: return error;
1401: }
1402:
1403: int
1404: VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
1405: {
1406: int error;
1407:
1408: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1409: KERNEL_LOCK(1, NULL);
1410: }
1411: error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
1412: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1413: KERNEL_UNLOCK_ONE(NULL);
1414: }
1415:
1416: return error;
1417: }
1418:
1419: int
1420: VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
1421: {
1422: int error;
1423:
1424: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1425: KERNEL_LOCK(1, NULL);
1426: }
1427: error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
1428: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1429: KERNEL_UNLOCK_ONE(NULL);
1430: }
1431:
1432: return error;
1433: }
1434:
1435: int
1436: VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
1437: {
1438: int error;
1439:
1440: if ((vp->v_vflag & VV_MPSAFE) == 0) {
1441: KERNEL_LOCK(1, NULL);
1442: }
1443: error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
1444: if ((vp->v_vflag & VV_MPSAFE) == 0) {
1445: KERNEL_UNLOCK_ONE(NULL);
1446: }
1447:
1448: return error;
1449: }
1450:
1451: int
1452: VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
1453: {
1454: int error;
1455:
1456: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1457: KERNEL_LOCK(1, NULL);
1458: }
1459: error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
1460: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1461: KERNEL_UNLOCK_ONE(NULL);
1462: }
1463:
1464: return error;
1465: }
1466:
1467: int
1468: VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
1469: {
1470: int error;
1471:
1472: KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
1473: error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
1474: KERNEL_UNLOCK_ONE(NULL); /* XXX */
1475:
1476: return error;
1477: }
1478:
1479: int
1480: VFS_SUSPENDCTL(struct mount *mp, int a)
1481: {
1482: int error;
1483:
1484: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1485: KERNEL_LOCK(1, NULL);
1486: }
1487: error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
1488: if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1489: KERNEL_UNLOCK_ONE(NULL);
1490: }
1491:
1492: return error;
1493: }
1494:
1.378 pooka 1495: #if defined(DDB) || defined(DEBUGPRINT)
1.353 pooka 1496: static const char buf_flagbits[] = BUF_FLAGBITS;
1497:
1498: void
1499: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
1500: {
1501: char bf[1024];
1502:
1503: (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
1504: PRIx64 " dev 0x%x\n",
1505: bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
1506:
1.361 christos 1507: snprintb(bf, sizeof(bf),
1508: buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
1.353 pooka 1509: (*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
1510:
1511: (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1512: bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.369 ad 1513: (*pr)(" data %p saveaddr %p\n",
1514: bp->b_data, bp->b_saveaddr);
1.353 pooka 1515: (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
1516: }
1517:
1518: void
1519: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
1520: {
1521:
1522: uvm_object_printit(&vp->v_uobj, full, pr);
1.451 hannken 1523: (*pr)("\n");
1524: vprint_common(vp, "", printf);
1.353 pooka 1525: if (full) {
1526: struct buf *bp;
1527:
1528: (*pr)("clean bufs:\n");
1529: LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
1530: (*pr)(" bp %p\n", bp);
1531: vfs_buf_print(bp, full, pr);
1532: }
1533:
1534: (*pr)("dirty bufs:\n");
1535: LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
1536: (*pr)(" bp %p\n", bp);
1537: vfs_buf_print(bp, full, pr);
1538: }
1539: }
1540: }
1541:
1542: void
1543: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
1544: {
1545: char sbuf[256];
1546:
1.446 hannken 1547: (*pr)("vnodecovered = %p data = %p\n",
1548: mp->mnt_vnodecovered,mp->mnt_data);
1.353 pooka 1549:
1550: (*pr)("fs_bshift %d dev_bshift = %d\n",
1551: mp->mnt_fs_bshift,mp->mnt_dev_bshift);
1552:
1.361 christos 1553: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1.353 pooka 1554: (*pr)("flag = %s\n", sbuf);
1555:
1.361 christos 1556: snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1.353 pooka 1557: (*pr)("iflag = %s\n", sbuf);
1558:
1559: (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
1560: &mp->mnt_unmounting, &mp->mnt_updating);
1561:
1562: (*pr)("statvfs cache:\n");
1563: (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
1564: (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
1565: (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
1566:
1567: (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
1568: (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
1569: (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
1570: (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
1571:
1572: (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
1573: (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
1574: (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
1575: (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
1576:
1577: (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
1578: mp->mnt_stat.f_fsidx.__fsid_val[0],
1579: mp->mnt_stat.f_fsidx.__fsid_val[1]);
1580:
1581: (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
1582: (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
1583:
1.361 christos 1584: snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
1585:
1.353 pooka 1586: (*pr)("\tflag = %s\n",sbuf);
1587: (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
1588: (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
1589: (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
1590: (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
1591: (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
1592: (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
1593: (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
1594:
1595: {
1596: int cnt = 0;
1.456 hannken 1597: vnode_t *vp;
1598: vnode_impl_t *vip;
1.353 pooka 1599: (*pr)("locked vnodes =");
1.456 hannken 1600: TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1601: vp = VIMPL_TO_VNODE(vip);
1.353 pooka 1602: if (VOP_ISLOCKED(vp)) {
1603: if ((++cnt % 6) == 0) {
1604: (*pr)(" %p,\n\t", vp);
1605: } else {
1606: (*pr)(" %p,", vp);
1607: }
1608: }
1609: }
1610: (*pr)("\n");
1611: }
1612:
1613: if (full) {
1614: int cnt = 0;
1.456 hannken 1615: vnode_t *vp;
1616: vnode_impl_t *vip;
1.353 pooka 1617: (*pr)("all vnodes =");
1.456 hannken 1618: TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1619: vp = VIMPL_TO_VNODE(vip);
1620: if (!TAILQ_NEXT(vip, vi_mntvnodes)) {
1.353 pooka 1621: (*pr)(" %p", vp);
1622: } else if ((++cnt % 6) == 0) {
1623: (*pr)(" %p,\n\t", vp);
1624: } else {
1625: (*pr)(" %p,", vp);
1626: }
1627: }
1.456 hannken 1628: (*pr)("\n");
1.353 pooka 1629: }
1630: }
1.372 elad 1631:
1.385 elad 1632: /*
1.421 rmind 1633: * List all of the locked vnodes in the system.
1.385 elad 1634: */
1.421 rmind 1635: void printlockedvnodes(void);
1636:
1637: void
1638: printlockedvnodes(void)
1.385 elad 1639: {
1.421 rmind 1640: struct mount *mp, *nmp;
1.456 hannken 1641: vnode_t *vp;
1642: vnode_impl_t *vip;
1.385 elad 1643:
1.421 rmind 1644: printf("Locked vnodes\n");
1645: mutex_enter(&mountlist_lock);
1.441 christos 1646: for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1.421 rmind 1647: if (vfs_busy(mp, &nmp)) {
1648: continue;
1.385 elad 1649: }
1.456 hannken 1650: TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1651: vp = VIMPL_TO_VNODE(vip);
1.421 rmind 1652: if (VOP_ISLOCKED(vp))
1653: vprint(NULL, vp);
1.385 elad 1654: }
1.421 rmind 1655: mutex_enter(&mountlist_lock);
1656: vfs_unbusy(mp, false, &nmp);
1.385 elad 1657: }
1.421 rmind 1658: mutex_exit(&mountlist_lock);
1659: }
1.385 elad 1660:
1.421 rmind 1661: #endif /* DDB || DEBUGPRINT */
CVSweb <webmaster@jp.NetBSD.org>