[BACK]Return to vfs_subr.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_subr.c, Revision 1.391

1.391   ! pooka       1: /*     $NetBSD: vfs_subr.c,v 1.390 2009/11/26 20:52:19 pooka Exp $     */
1.74      thorpej     2:
                      3: /*-
1.315     ad          4:  * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
1.74      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.302     ad          9:  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
1.74      thorpej    10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  *
                     20:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     21:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     22:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     23:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     24:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     25:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     26:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     27:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     28:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     29:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     30:  * POSSIBILITY OF SUCH DAMAGE.
                     31:  */
1.32      cgd        32:
1.29      cgd        33: /*
1.30      mycroft    34:  * Copyright (c) 1989, 1993
                     35:  *     The Regents of the University of California.  All rights reserved.
1.29      cgd        36:  * (c) UNIX System Laboratories, Inc.
                     37:  * All or some portions of this file are derived from material licensed
                     38:  * to the University of California by American Telephone and Telegraph
                     39:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     40:  * the permission of UNIX System Laboratories, Inc.
                     41:  *
                     42:  * Redistribution and use in source and binary forms, with or without
                     43:  * modification, are permitted provided that the following conditions
                     44:  * are met:
                     45:  * 1. Redistributions of source code must retain the above copyright
                     46:  *    notice, this list of conditions and the following disclaimer.
                     47:  * 2. Redistributions in binary form must reproduce the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer in the
                     49:  *    documentation and/or other materials provided with the distribution.
1.204     agc        50:  * 3. Neither the name of the University nor the names of its contributors
1.29      cgd        51:  *    may be used to endorse or promote products derived from this software
                     52:  *    without specific prior written permission.
                     53:  *
                     54:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     55:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     56:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     57:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     58:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     59:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     60:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     61:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     62:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     63:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     64:  * SUCH DAMAGE.
                     65:  *
1.32      cgd        66:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
1.29      cgd        67:  */
                     68:
                     69: /*
1.346     ad         70:  * Note on v_usecount and locking:
                     71:  *
                     72:  * At nearly all points it is known that v_usecount could be zero, the
                     73:  * vnode interlock will be held.
                     74:  *
                     75:  * To change v_usecount away from zero, the interlock must be held.  To
                     76:  * change from a non-zero value to zero, again the interlock must be
                     77:  * held.
                     78:  *
1.379     yamt       79:  * There's a flag bit, VC_XLOCK, embedded in v_usecount.
                     80:  * To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock
                     81:  * must be held.
                     82:  * To modify the VC_XLOCK bit, the interlock must be held.
                     83:  * We always keep the usecount (v_usecount & VC_MASK) non-zero while the
                     84:  * VC_XLOCK bit is set.
                     85:  *
                     86:  * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero
                     87:  * value to a non-zero value can safely be done using atomic operations,
                     88:  * without the interlock held.
                     89:  * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero
                     90:  * value can be done using atomic operations, without the interlock held.
1.29      cgd        91:  */
1.162     lukem      92:
                     93: #include <sys/cdefs.h>
1.391   ! pooka      94: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.390 2009/11/26 20:52:19 pooka Exp $");
1.78      mrg        95:
1.125     chs        96: #include "opt_ddb.h"
1.95      thorpej    97: #include "opt_compat_netbsd.h"
1.97      christos   98: #include "opt_compat_43.h"
1.29      cgd        99:
                    100: #include <sys/param.h>
1.30      mycroft   101: #include <sys/systm.h>
1.363     pooka     102: #include <sys/conf.h>
1.29      cgd       103: #include <sys/proc.h>
1.138     bouyer    104: #include <sys/kernel.h>
1.29      cgd       105: #include <sys/mount.h>
1.46      mycroft   106: #include <sys/fcntl.h>
1.29      cgd       107: #include <sys/vnode.h>
1.30      mycroft   108: #include <sys/stat.h>
1.29      cgd       109: #include <sys/namei.h>
                    110: #include <sys/ucred.h>
                    111: #include <sys/buf.h>
                    112: #include <sys/errno.h>
1.366     yamt      113: #include <sys/kmem.h>
1.51      christos  114: #include <sys/syscallargs.h>
1.58      thorpej   115: #include <sys/device.h>
1.192     christos  116: #include <sys/filedesc.h>
1.266     elad      117: #include <sys/kauth.h>
1.307     ad        118: #include <sys/atomic.h>
1.309     ad        119: #include <sys/kthread.h>
1.355     simonb    120: #include <sys/wapbl.h>
1.50      christos  121:
1.380     elad      122: #include <miscfs/genfs/genfs.h>
1.30      mycroft   123: #include <miscfs/specfs/specdev.h>
1.113     fvdl      124: #include <miscfs/syncfs/syncfs.h>
1.30      mycroft   125:
1.125     chs       126: #include <uvm/uvm.h>
1.255     yamt      127: #include <uvm/uvm_readahead.h>
1.125     chs       128: #include <uvm/uvm_ddb.h>
1.129     mrg       129:
                    130: #include <sys/sysctl.h>
1.77      mrg       131:
1.353     pooka     132: const enum vtype iftovt_tab[16] = {
                    133:        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
                    134:        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
                    135: };
                    136: const int      vttoif_tab[9] = {
                    137:        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
                    138:        S_IFSOCK, S_IFIFO, S_IFMT,
                    139: };
                    140:
                    141: /*
                    142:  * Insq/Remq for the vnode usage lists.
                    143:  */
                    144: #define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
                    145: #define        bufremvn(bp) {                                                  \
                    146:        LIST_REMOVE(bp, b_vnbufs);                                      \
                    147:        (bp)->b_vnbufs.le_next = NOLIST;                                \
                    148: }
                    149:
                    150: int doforce = 1;               /* 1 => permit forcible unmounting */
                    151: int prtactive = 0;             /* 1 => print out reclaim of active vnodes */
                    152:
1.309     ad        153: static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
                    154: static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
                    155: static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
                    156:
1.353     pooka     157: struct mntlist mountlist =                     /* mounted filesystem list */
                    158:     CIRCLEQ_HEAD_INITIALIZER(mountlist);
                    159:
                    160: u_int numvnodes;
                    161: static specificdata_domain_t mount_specificdata_domain;
                    162:
1.309     ad        163: static int vrele_pending;
1.351     ad        164: static int vrele_gen;
1.309     ad        165: static kmutex_t        vrele_lock;
                    166: static kcondvar_t vrele_cv;
                    167: static lwp_t *vrele_lwp;
1.113     fvdl      168:
1.382     dyoung    169: static uint64_t mountgen = 0;
                    170: static kmutex_t mountgen_lock;
                    171:
1.353     pooka     172: kmutex_t mountlist_lock;
                    173: kmutex_t mntid_lock;
                    174: kmutex_t mntvnode_lock;
                    175: kmutex_t vnode_free_list_lock;
                    176: kmutex_t vfs_list_lock;
                    177:
1.309     ad        178: static pool_cache_t vnode_cache;
1.186     thorpej   179:
1.89      kleink    180: /*
1.353     pooka     181:  * These define the root filesystem and device.
                    182:  */
                    183: struct vnode *rootvnode;
                    184: struct device *root_device;                    /* root device */
                    185:
                    186: /*
1.89      kleink    187:  * Local declarations.
                    188:  */
1.276     hannken   189:
1.309     ad        190: static void vrele_thread(void *);
                    191: static void insmntque(vnode_t *, struct mount *);
                    192: static int getdevvp(dev_t, vnode_t **, enum vtype);
1.364     yamt      193: static vnode_t *getcleanvnode(void);
1.309     ad        194: void vpanic(vnode_t *, const char *);
1.382     dyoung    195: static void vfs_shutdown1(struct lwp *);
1.309     ad        196:
1.353     pooka     197: #ifdef DEBUG
                    198: void printlockedvnodes(void);
                    199: #endif
                    200:
1.309     ad        201: #ifdef DIAGNOSTIC
                    202: void
                    203: vpanic(vnode_t *vp, const char *msg)
                    204: {
                    205:
                    206:        vprint(NULL, vp);
                    207:        panic("%s\n", msg);
                    208: }
                    209: #else
                    210: #define        vpanic(vp, msg) /* nothing */
                    211: #endif
                    212:
                    213: void
                    214: vn_init1(void)
                    215: {
                    216:
                    217:        vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
                    218:            NULL, IPL_NONE, NULL, NULL, NULL);
                    219:        KASSERT(vnode_cache != NULL);
                    220:
                    221:        /* Create deferred release thread. */
                    222:        mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
                    223:        cv_init(&vrele_cv, "vrele");
                    224:        if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
                    225:            NULL, &vrele_lwp, "vrele"))
                    226:                panic("fork vrele");
                    227: }
1.51      christos  228:
1.353     pooka     229: /*
                    230:  * Initialize the vnode management data structures.
                    231:  */
                    232: void
                    233: vntblinit(void)
                    234: {
                    235:
1.382     dyoung    236:        mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
1.353     pooka     237:        mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
                    238:        mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
                    239:        mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
                    240:        mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
                    241:        mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
                    242:
                    243:        mount_specificdata_domain = specificdata_domain_create();
                    244:
                    245:        /* Initialize the filesystem syncer. */
                    246:        vn_initialize_syncerd();
                    247:        vn_init1();
                    248: }
                    249:
1.202     yamt      250: int
1.256     christos  251: vfs_drainvnodes(long target, struct lwp *l)
1.202     yamt      252: {
                    253:
                    254:        while (numvnodes > target) {
1.309     ad        255:                vnode_t *vp;
1.202     yamt      256:
1.309     ad        257:                mutex_enter(&vnode_free_list_lock);
                    258:                vp = getcleanvnode();
1.202     yamt      259:                if (vp == NULL)
                    260:                        return EBUSY; /* give up */
1.309     ad        261:                ungetnewvnode(vp);
1.202     yamt      262:        }
                    263:
                    264:        return 0;
                    265: }
                    266:
                    267: /*
1.353     pooka     268:  * Lookup a mount point by filesystem identifier.
                    269:  *
                    270:  * XXX Needs to add a reference to the mount point.
                    271:  */
                    272: struct mount *
                    273: vfs_getvfs(fsid_t *fsid)
                    274: {
                    275:        struct mount *mp;
                    276:
                    277:        mutex_enter(&mountlist_lock);
                    278:        CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
                    279:                if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
                    280:                    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
                    281:                        mutex_exit(&mountlist_lock);
                    282:                        return (mp);
                    283:                }
                    284:        }
                    285:        mutex_exit(&mountlist_lock);
                    286:        return ((struct mount *)0);
                    287: }
                    288:
                    289: /*
                    290:  * Drop a reference to a mount structure, freeing if the last reference.
                    291:  */
                    292: void
                    293: vfs_destroy(struct mount *mp)
                    294: {
                    295:
1.357     ad        296:        if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
1.353     pooka     297:                return;
                    298:        }
                    299:
                    300:        /*
                    301:         * Nothing else has visibility of the mount: we can now
                    302:         * free the data structures.
                    303:         */
1.357     ad        304:        KASSERT(mp->mnt_refcnt == 0);
1.353     pooka     305:        specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
                    306:        rw_destroy(&mp->mnt_unmounting);
                    307:        mutex_destroy(&mp->mnt_updating);
                    308:        mutex_destroy(&mp->mnt_renamelock);
                    309:        if (mp->mnt_op != NULL) {
                    310:                vfs_delref(mp->mnt_op);
                    311:        }
                    312:        kmem_free(mp, sizeof(*mp));
                    313: }
                    314:
                    315: /*
1.202     yamt      316:  * grab a vnode from freelist and clean it.
                    317:  */
1.309     ad        318: vnode_t *
                    319: getcleanvnode(void)
1.202     yamt      320: {
1.309     ad        321:        vnode_t *vp;
                    322:        vnodelst_t *listhd;
1.202     yamt      323:
1.309     ad        324:        KASSERT(mutex_owned(&vnode_free_list_lock));
1.229     yamt      325:
1.309     ad        326: retry:
1.229     yamt      327:        listhd = &vnode_free_list;
                    328: try_nextlist:
                    329:        TAILQ_FOREACH(vp, listhd, v_freelist) {
1.309     ad        330:                /*
                    331:                 * It's safe to test v_usecount and v_iflag
                    332:                 * without holding the interlock here, since
                    333:                 * these vnodes should never appear on the
                    334:                 * lists.
                    335:                 */
                    336:                if (vp->v_usecount != 0) {
                    337:                        vpanic(vp, "free vnode isn't");
                    338:                }
                    339:                if ((vp->v_iflag & VI_CLEAN) != 0) {
                    340:                        vpanic(vp, "clean vnode on freelist");
                    341:                }
                    342:                if (vp->v_freelisthd != listhd) {
                    343:                        printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
                    344:                        vpanic(vp, "list head mismatch");
                    345:                }
                    346:                if (!mutex_tryenter(&vp->v_interlock))
1.208     hannken   347:                        continue;
1.227     yamt      348:                /*
1.309     ad        349:                 * Our lwp might hold the underlying vnode
                    350:                 * locked, so don't try to reclaim a VI_LAYER
                    351:                 * node if it's locked.
1.227     yamt      352:                 */
1.302     ad        353:                if ((vp->v_iflag & VI_XLOCK) == 0 &&
                    354:                    ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285     hannken   355:                        break;
1.202     yamt      356:                }
1.309     ad        357:                mutex_exit(&vp->v_interlock);
1.202     yamt      358:        }
                    359:
1.309     ad        360:        if (vp == NULL) {
1.229     yamt      361:                if (listhd == &vnode_free_list) {
                    362:                        listhd = &vnode_hold_list;
                    363:                        goto try_nextlist;
                    364:                }
1.309     ad        365:                mutex_exit(&vnode_free_list_lock);
                    366:                return NULL;
1.202     yamt      367:        }
                    368:
1.309     ad        369:        /* Remove it from the freelist. */
1.202     yamt      370:        TAILQ_REMOVE(listhd, vp, v_freelist);
1.309     ad        371:        vp->v_freelisthd = NULL;
                    372:        mutex_exit(&vnode_free_list_lock);
                    373:
1.386     bouyer    374:        if (vp->v_usecount != 0) {
                    375:                /*
                    376:                 * was referenced again before we got the interlock
                    377:                 * Don't return to freelist - the holder of the last
                    378:                 * reference will destroy it.
                    379:                 */
1.387     bouyer    380:                mutex_exit(&vp->v_interlock);
1.386     bouyer    381:                mutex_enter(&vnode_free_list_lock);
                    382:                goto retry;
                    383:        }
                    384:
1.309     ad        385:        /*
                    386:         * The vnode is still associated with a file system, so we must
                    387:         * clean it out before reusing it.  We need to add a reference
                    388:         * before doing this.  If the vnode gains another reference while
                    389:         * being cleaned out then we lose - retry.
                    390:         */
1.379     yamt      391:        atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK);
1.309     ad        392:        vclean(vp, DOCLOSE);
1.379     yamt      393:        KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
                    394:        atomic_add_int(&vp->v_usecount, -VC_XLOCK);
1.309     ad        395:        if (vp->v_usecount == 1) {
                    396:                /* We're about to dirty it. */
                    397:                vp->v_iflag &= ~VI_CLEAN;
                    398:                mutex_exit(&vp->v_interlock);
1.318     ad        399:                if (vp->v_type == VBLK || vp->v_type == VCHR) {
                    400:                        spec_node_destroy(vp);
                    401:                }
                    402:                vp->v_type = VNON;
1.309     ad        403:        } else {
                    404:                /*
                    405:                 * Don't return to freelist - the holder of the last
                    406:                 * reference will destroy it.
                    407:                 */
1.348     ad        408:                vrelel(vp, 0); /* releases vp->v_interlock */
1.309     ad        409:                mutex_enter(&vnode_free_list_lock);
                    410:                goto retry;
                    411:        }
                    412:
                    413:        if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
                    414:            !TAILQ_EMPTY(&vp->v_uobj.memq)) {
                    415:                vpanic(vp, "cleaned vnode isn't");
                    416:        }
                    417:        if (vp->v_numoutput != 0) {
                    418:                vpanic(vp, "clean vnode has pending I/O's");
                    419:        }
                    420:        if ((vp->v_iflag & VI_ONWORKLST) != 0) {
                    421:                vpanic(vp, "clean vnode on syncer list");
                    422:        }
1.202     yamt      423:
                    424:        return vp;
                    425: }
                    426:
1.327     ad        427: /*
1.338     ad        428:  * Mark a mount point as busy, and gain a new reference to it.  Used to
1.344     ad        429:  * prevent the file system from being unmounted during critical sections.
1.338     ad        430:  *
1.339     ad        431:  * => The caller must hold a pre-existing reference to the mount.
1.344     ad        432:  * => Will fail if the file system is being unmounted, or is unmounted.
1.29      cgd       433:  */
1.50      christos  434: int
1.344     ad        435: vfs_busy(struct mount *mp, struct mount **nextp)
1.29      cgd       436: {
                    437:
1.344     ad        438:        KASSERT(mp->mnt_refcnt > 0);
1.338     ad        439:
1.344     ad        440:        if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
                    441:                if (nextp != NULL) {
                    442:                        KASSERT(mutex_owned(&mountlist_lock));
1.339     ad        443:                        *nextp = CIRCLEQ_NEXT(mp, mnt_list);
                    444:                }
1.344     ad        445:                return EBUSY;
1.339     ad        446:        }
1.344     ad        447:        if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
                    448:                rw_exit(&mp->mnt_unmounting);
1.338     ad        449:                if (nextp != NULL) {
1.344     ad        450:                        KASSERT(mutex_owned(&mountlist_lock));
                    451:                        *nextp = CIRCLEQ_NEXT(mp, mnt_list);
1.338     ad        452:                }
1.344     ad        453:                return ENOENT;
1.327     ad        454:        }
1.344     ad        455:        if (nextp != NULL) {
                    456:                mutex_exit(&mountlist_lock);
                    457:        }
                    458:        atomic_inc_uint(&mp->mnt_refcnt);
                    459:        return 0;
1.29      cgd       460: }
                    461:
                    462: /*
1.344     ad        463:  * Unbusy a busy filesystem.
1.339     ad        464:  *
1.344     ad        465:  * => If keepref is true, preserve reference added by vfs_busy().
                    466:  * => If nextp != NULL, acquire mountlist_lock.
1.29      cgd       467:  */
                    468: void
1.339     ad        469: vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
1.29      cgd       470: {
                    471:
1.327     ad        472:        KASSERT(mp->mnt_refcnt > 0);
                    473:
1.344     ad        474:        if (nextp != NULL) {
                    475:                mutex_enter(&mountlist_lock);
                    476:        }
                    477:        rw_exit(&mp->mnt_unmounting);
                    478:        if (!keepref) {
                    479:                vfs_destroy(mp);
1.327     ad        480:        }
1.339     ad        481:        if (nextp != NULL) {
1.344     ad        482:                KASSERT(mutex_owned(&mountlist_lock));
1.339     ad        483:                *nextp = CIRCLEQ_NEXT(mp, mnt_list);
                    484:        }
1.29      cgd       485: }
                    486:
1.376     dyoung    487: struct mount *
                    488: vfs_mountalloc(struct vfsops *vfsops, struct vnode *vp)
                    489: {
                    490:        int error;
                    491:        struct mount *mp;
                    492:
                    493:        mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
                    494:        if (mp == NULL)
                    495:                return NULL;
                    496:
                    497:        mp->mnt_op = vfsops;
                    498:        mp->mnt_refcnt = 1;
                    499:        TAILQ_INIT(&mp->mnt_vnodelist);
                    500:        rw_init(&mp->mnt_unmounting);
                    501:        mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
                    502:        mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
                    503:        error = vfs_busy(mp, NULL);
                    504:        KASSERT(error == 0);
                    505:        mp->mnt_vnodecovered = vp;
                    506:        mount_initspecific(mp);
                    507:
1.382     dyoung    508:        mutex_enter(&mountgen_lock);
                    509:        mp->mnt_gen = mountgen++;
                    510:        mutex_exit(&mountgen_lock);
                    511:
1.376     dyoung    512:        return mp;
                    513: }
                    514:
1.29      cgd       515: /*
1.80      fvdl      516:  * Lookup a filesystem type, and if found allocate and initialize
                    517:  * a mount structure for it.
                    518:  *
                    519:  * Devname is usually updated by mount(8) after booting.
1.29      cgd       520:  */
1.50      christos  521: int
1.247     thorpej   522: vfs_rootmountalloc(const char *fstypename, const char *devname,
                    523:     struct mount **mpp)
1.29      cgd       524: {
1.80      fvdl      525:        struct vfsops *vfsp = NULL;
                    526:        struct mount *mp;
1.29      cgd       527:
1.309     ad        528:        mutex_enter(&vfs_list_lock);
1.152     jdolecek  529:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291     christos  530:                if (!strncmp(vfsp->vfs_name, fstypename,
                    531:                    sizeof(mp->mnt_stat.f_fstypename)))
1.80      fvdl      532:                        break;
1.315     ad        533:        if (vfsp == NULL) {
                    534:                mutex_exit(&vfs_list_lock);
1.80      fvdl      535:                return (ENODEV);
1.315     ad        536:        }
1.309     ad        537:        vfsp->vfs_refcount++;
                    538:        mutex_exit(&vfs_list_lock);
                    539:
1.376     dyoung    540:        if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
1.327     ad        541:                return ENOMEM;
1.80      fvdl      542:        mp->mnt_flag = MNT_RDONLY;
1.291     christos  543:        (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
                    544:            sizeof(mp->mnt_stat.f_fstypename));
1.80      fvdl      545:        mp->mnt_stat.f_mntonname[0] = '/';
1.314     pooka     546:        mp->mnt_stat.f_mntonname[1] = '\0';
1.291     christos  547:        mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
                    548:            '\0';
                    549:        (void)copystr(devname, mp->mnt_stat.f_mntfromname,
                    550:            sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.80      fvdl      551:        *mpp = mp;
1.29      cgd       552:        return (0);
                    553: }
                    554:
1.30      mycroft   555: /*
                    556:  * Routines having to do with the management of the vnode table.
                    557:  */
1.217     junyoung  558: extern int (**dead_vnodeop_p)(void *);
1.30      mycroft   559:
1.29      cgd       560: /*
                    561:  * Return the next vnode from the free list.
                    562:  */
1.50      christos  563: int
1.247     thorpej   564: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
1.309     ad        565:            vnode_t **vpp)
1.29      cgd       566: {
1.142     chs       567:        struct uvm_object *uobj;
1.113     fvdl      568:        static int toggle;
1.309     ad        569:        vnode_t *vp;
1.153     thorpej   570:        int error = 0, tryalloc;
1.158     chs       571:
1.159     enami     572:  try_again:
1.327     ad        573:        if (mp != NULL) {
1.103     sommerfe  574:                /*
1.327     ad        575:                 * Mark filesystem busy while we're creating a
                    576:                 * vnode.  If unmount is in progress, this will
1.342     ad        577:                 * fail.
1.103     sommerfe  578:                 */
1.344     ad        579:                error = vfs_busy(mp, NULL);
1.327     ad        580:                if (error)
1.103     sommerfe  581:                        return error;
                    582:        }
1.29      cgd       583:
1.113     fvdl      584:        /*
                    585:         * We must choose whether to allocate a new vnode or recycle an
                    586:         * existing one. The criterion for allocating a new one is that
                    587:         * the total number of vnodes is less than the number desired or
                    588:         * there are no vnodes on either free list. Generally we only
                    589:         * want to recycle vnodes that have no buffers associated with
                    590:         * them, so we look first on the vnode_free_list. If it is empty,
                    591:         * we next consider vnodes with referencing buffers on the
                    592:         * vnode_hold_list. The toggle ensures that half the time we
                    593:         * will use a buffer from the vnode_hold_list, and half the time
                    594:         * we will allocate a new one unless the list has grown to twice
                    595:         * the desired size. We are reticent to recycle vnodes from the
                    596:         * vnode_hold_list because we will lose the identity of all its
                    597:         * referencing buffers.
                    598:         */
1.142     chs       599:
1.153     thorpej   600:        vp = NULL;
                    601:
1.309     ad        602:        mutex_enter(&vnode_free_list_lock);
1.153     thorpej   603:
1.113     fvdl      604:        toggle ^= 1;
                    605:        if (numvnodes > 2 * desiredvnodes)
                    606:                toggle = 0;
                    607:
1.153     thorpej   608:        tryalloc = numvnodes < desiredvnodes ||
1.159     enami     609:            (TAILQ_FIRST(&vnode_free_list) == NULL &&
                    610:             (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153     thorpej   611:
1.309     ad        612:        if (tryalloc) {
1.206     yamt      613:                numvnodes++;
1.309     ad        614:                mutex_exit(&vnode_free_list_lock);
1.310     pooka     615:                if ((vp = vnalloc(NULL)) == NULL) {
1.309     ad        616:                        mutex_enter(&vnode_free_list_lock);
                    617:                        numvnodes--;
                    618:                } else
                    619:                        vp->v_usecount = 1;
                    620:        }
                    621:
                    622:        if (vp == NULL) {
                    623:                vp = getcleanvnode();
                    624:                if (vp == NULL) {
1.327     ad        625:                        if (mp != NULL) {
1.339     ad        626:                                vfs_unbusy(mp, false, NULL);
1.327     ad        627:                        }
1.153     thorpej   628:                        if (tryalloc) {
                    629:                                printf("WARNING: unable to allocate new "
                    630:                                    "vnode, retrying...\n");
1.345     ad        631:                                kpause("newvn", false, hz, NULL);
1.153     thorpej   632:                                goto try_again;
                    633:                        }
1.132     jdolecek  634:                        tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29      cgd       635:                        *vpp = 0;
                    636:                        return (ENFILE);
                    637:                }
1.302     ad        638:                vp->v_iflag = 0;
                    639:                vp->v_vflag = 0;
                    640:                vp->v_uflag = 0;
1.158     chs       641:                vp->v_socket = NULL;
1.29      cgd       642:        }
1.309     ad        643:
                    644:        KASSERT(vp->v_usecount == 1);
                    645:        KASSERT(vp->v_freelisthd == NULL);
                    646:        KASSERT(LIST_EMPTY(&vp->v_nclist));
                    647:        KASSERT(LIST_EMPTY(&vp->v_dnclist));
                    648:
1.29      cgd       649:        vp->v_type = VNON;
1.104     wrstuden  650:        vp->v_vnlock = &vp->v_lock;
1.29      cgd       651:        vp->v_tag = tag;
                    652:        vp->v_op = vops;
                    653:        insmntque(vp, mp);
1.30      mycroft   654:        *vpp = vp;
                    655:        vp->v_data = 0;
1.142     chs       656:
                    657:        /*
                    658:         * initialize uvm_object within vnode.
                    659:         */
                    660:
1.158     chs       661:        uobj = &vp->v_uobj;
                    662:        KASSERT(uobj->pgops == &uvm_vnodeops);
                    663:        KASSERT(uobj->uo_npages == 0);
                    664:        KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288     yamt      665:        vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142     chs       666:
1.309     ad        667:        if (mp != NULL) {
                    668:                if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
                    669:                        vp->v_vflag |= VV_MPSAFE;
1.339     ad        670:                vfs_unbusy(mp, true, NULL);
1.309     ad        671:        }
                    672:
1.29      cgd       673:        return (0);
1.130     fvdl      674: }
                    675:
                    676: /*
                    677:  * This is really just the reverse of getnewvnode(). Needed for
                    678:  * VFS_VGET functions who may need to push back a vnode in case
                    679:  * of a locking race.
                    680:  */
                    681: void
1.309     ad        682: ungetnewvnode(vnode_t *vp)
                    683: {
                    684:
                    685:        KASSERT(vp->v_usecount == 1);
                    686:        KASSERT(vp->v_data == NULL);
                    687:        KASSERT(vp->v_freelisthd == NULL);
                    688:
                    689:        mutex_enter(&vp->v_interlock);
                    690:        vp->v_iflag |= VI_CLEAN;
1.324     pooka     691:        vrelel(vp, 0);
1.309     ad        692: }
                    693:
                    694: /*
                    695:  * Allocate a new, uninitialized vnode.  If 'mp' is non-NULL, this is a
                    696:  * marker vnode and we are prepared to wait for the allocation.
                    697:  */
                    698: vnode_t *
1.310     pooka     699: vnalloc(struct mount *mp)
1.130     fvdl      700: {
1.309     ad        701:        vnode_t *vp;
                    702:
                    703:        vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
                    704:        if (vp == NULL) {
                    705:                return NULL;
                    706:        }
                    707:
                    708:        memset(vp, 0, sizeof(*vp));
                    709:        UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
                    710:        cv_init(&vp->v_cv, "vnode");
                    711:        /*
                    712:         * done by memset() above.
                    713:         *      LIST_INIT(&vp->v_nclist);
                    714:         *      LIST_INIT(&vp->v_dnclist);
                    715:         */
                    716:
                    717:        if (mp != NULL) {
                    718:                vp->v_mount = mp;
                    719:                vp->v_type = VBAD;
                    720:                vp->v_iflag = VI_MARKER;
                    721:        } else {
1.326     ad        722:                rw_init(&vp->v_lock.vl_lock);
1.309     ad        723:        }
                    724:
                    725:        return vp;
                    726: }
                    727:
                    728: /*
                    729:  * Free an unused, unreferenced vnode.
                    730:  */
                    731: void
1.310     pooka     732: vnfree(vnode_t *vp)
1.309     ad        733: {
                    734:
                    735:        KASSERT(vp->v_usecount == 0);
                    736:
                    737:        if ((vp->v_iflag & VI_MARKER) == 0) {
1.326     ad        738:                rw_destroy(&vp->v_lock.vl_lock);
1.309     ad        739:                mutex_enter(&vnode_free_list_lock);
                    740:                numvnodes--;
                    741:                mutex_exit(&vnode_free_list_lock);
                    742:        }
                    743:
                    744:        UVM_OBJ_DESTROY(&vp->v_uobj);
                    745:        cv_destroy(&vp->v_cv);
                    746:        pool_cache_put(vnode_cache, vp);
                    747: }
                    748:
                    749: /*
                    750:  * Remove a vnode from its freelist.
                    751:  */
                    752: static inline void
                    753: vremfree(vnode_t *vp)
                    754: {
                    755:
                    756:        KASSERT(mutex_owned(&vp->v_interlock));
1.350     ad        757:        KASSERT(vp->v_usecount == 0);
1.130     fvdl      758:
1.217     junyoung  759:        /*
1.309     ad        760:         * Note that the reference count must not change until
                    761:         * the vnode is removed.
1.130     fvdl      762:         */
1.309     ad        763:        mutex_enter(&vnode_free_list_lock);
                    764:        if (vp->v_holdcnt > 0) {
                    765:                KASSERT(vp->v_freelisthd == &vnode_hold_list);
                    766:        } else {
                    767:                KASSERT(vp->v_freelisthd == &vnode_free_list);
                    768:        }
                    769:        TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
                    770:        vp->v_freelisthd = NULL;
                    771:        mutex_exit(&vnode_free_list_lock);
1.29      cgd       772: }
                    773:
                    774: /*
                    775:  * Move a vnode from one mount queue to another.
                    776:  */
1.260     yamt      777: static void
1.309     ad        778: insmntque(vnode_t *vp, struct mount *mp)
1.29      cgd       779: {
1.327     ad        780:        struct mount *omp;
1.29      cgd       781:
1.103     sommerfe  782: #ifdef DIAGNOSTIC
                    783:        if ((mp != NULL) &&
1.207     dbj       784:            (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113     fvdl      785:            vp->v_tag != VT_VFS) {
1.103     sommerfe  786:                panic("insmntque into dying filesystem");
                    787:        }
                    788: #endif
1.217     junyoung  789:
1.309     ad        790:        mutex_enter(&mntvnode_lock);
1.29      cgd       791:        /*
                    792:         * Delete from old mount point vnode list, if on one.
                    793:         */
1.327     ad        794:        if ((omp = vp->v_mount) != NULL)
1.272     reinoud   795:                TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29      cgd       796:        /*
1.327     ad        797:         * Insert into list of vnodes for the new mount point, if
                    798:         * available.  The caller must take a reference on the mount
                    799:         * structure and donate to the vnode.
1.29      cgd       800:         */
1.279     pooka     801:        if ((vp->v_mount = mp) != NULL)
                    802:                TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.309     ad        803:        mutex_exit(&mntvnode_lock);
1.327     ad        804:
                    805:        if (omp != NULL) {
                    806:                /* Release reference to old mount. */
1.344     ad        807:                vfs_destroy(omp);
1.327     ad        808:        }
1.29      cgd       809: }
                    810:
                    811: /*
1.353     pooka     812:  * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
                    813:  * recycled.
                    814:  */
                    815: void
                    816: vwait(vnode_t *vp, int flags)
                    817: {
                    818:
                    819:        KASSERT(mutex_owned(&vp->v_interlock));
                    820:        KASSERT(vp->v_usecount != 0);
                    821:
                    822:        while ((vp->v_iflag & flags) != 0)
                    823:                cv_wait(&vp->v_cv, &vp->v_interlock);
                    824: }
                    825:
                    826: /*
                    827:  * Insert a marker vnode into a mount's vnode list, after the
                    828:  * specified vnode.  mntvnode_lock must be held.
                    829:  */
                    830: void
                    831: vmark(vnode_t *mvp, vnode_t *vp)
                    832: {
                    833:        struct mount *mp;
                    834:
                    835:        mp = mvp->v_mount;
                    836:
                    837:        KASSERT(mutex_owned(&mntvnode_lock));
                    838:        KASSERT((mvp->v_iflag & VI_MARKER) != 0);
                    839:        KASSERT(vp->v_mount == mp);
                    840:
                    841:        TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
                    842: }
                    843:
                    844: /*
                    845:  * Remove a marker vnode from a mount's vnode list, and return
                    846:  * a pointer to the next vnode in the list.  mntvnode_lock must
                    847:  * be held.
                    848:  */
                    849: vnode_t *
                    850: vunmark(vnode_t *mvp)
                    851: {
                    852:        vnode_t *vp;
                    853:        struct mount *mp;
                    854:
                    855:        mp = mvp->v_mount;
                    856:
                    857:        KASSERT(mutex_owned(&mntvnode_lock));
                    858:        KASSERT((mvp->v_iflag & VI_MARKER) != 0);
                    859:
                    860:        vp = TAILQ_NEXT(mvp, v_mntvnodes);
                    861:        TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
                    862:
                    863:        KASSERT(vp == NULL || vp->v_mount == mp);
                    864:
                    865:        return vp;
                    866: }
                    867:
                    868: /*
                    869:  * Update outstanding I/O count and do wakeup if requested.
                    870:  */
                    871: void
                    872: vwakeup(struct buf *bp)
                    873: {
                    874:        struct vnode *vp;
                    875:
                    876:        if ((vp = bp->b_vp) == NULL)
                    877:                return;
                    878:
                    879:        KASSERT(bp->b_objlock == &vp->v_interlock);
                    880:        KASSERT(mutex_owned(bp->b_objlock));
                    881:
                    882:        if (--vp->v_numoutput < 0)
                    883:                panic("vwakeup: neg numoutput, vp %p", vp);
                    884:        if (vp->v_numoutput == 0)
                    885:                cv_broadcast(&vp->v_cv);
                    886: }
                    887:
                    888: /*
                    889:  * Flush out and invalidate all buffers associated with a vnode.
                    890:  * Called with the underlying vnode locked, which should prevent new dirty
                    891:  * buffers from being queued.
                    892:  */
                    893: int
                    894: vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
                    895:          bool catch, int slptimeo)
                    896: {
                    897:        struct buf *bp, *nbp;
                    898:        int error;
                    899:        int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
                    900:            (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
                    901:
                    902:        /* XXXUBC this doesn't look at flags or slp* */
                    903:        mutex_enter(&vp->v_interlock);
                    904:        error = VOP_PUTPAGES(vp, 0, 0, flushflags);
                    905:        if (error) {
                    906:                return error;
                    907:        }
                    908:
                    909:        if (flags & V_SAVE) {
                    910:                error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
                    911:                if (error)
                    912:                        return (error);
                    913:                KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
                    914:        }
                    915:
                    916:        mutex_enter(&bufcache_lock);
                    917: restart:
                    918:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    919:                nbp = LIST_NEXT(bp, b_vnbufs);
                    920:                error = bbusy(bp, catch, slptimeo, NULL);
                    921:                if (error != 0) {
                    922:                        if (error == EPASSTHROUGH)
                    923:                                goto restart;
                    924:                        mutex_exit(&bufcache_lock);
                    925:                        return (error);
                    926:                }
                    927:                brelsel(bp, BC_INVAL | BC_VFLUSH);
                    928:        }
                    929:
                    930:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    931:                nbp = LIST_NEXT(bp, b_vnbufs);
                    932:                error = bbusy(bp, catch, slptimeo, NULL);
                    933:                if (error != 0) {
                    934:                        if (error == EPASSTHROUGH)
                    935:                                goto restart;
                    936:                        mutex_exit(&bufcache_lock);
                    937:                        return (error);
                    938:                }
                    939:                /*
                    940:                 * XXX Since there are no node locks for NFS, I believe
                    941:                 * there is a slight chance that a delayed write will
                    942:                 * occur while sleeping just above, so check for it.
                    943:                 */
                    944:                if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
                    945: #ifdef DEBUG
                    946:                        printf("buffer still DELWRI\n");
                    947: #endif
                    948:                        bp->b_cflags |= BC_BUSY | BC_VFLUSH;
                    949:                        mutex_exit(&bufcache_lock);
                    950:                        VOP_BWRITE(bp);
                    951:                        mutex_enter(&bufcache_lock);
                    952:                        goto restart;
                    953:                }
                    954:                brelsel(bp, BC_INVAL | BC_VFLUSH);
                    955:        }
                    956:
                    957: #ifdef DIAGNOSTIC
                    958:        if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
                    959:                panic("vinvalbuf: flush failed, vp %p", vp);
                    960: #endif
                    961:
                    962:        mutex_exit(&bufcache_lock);
                    963:
                    964:        return (0);
                    965: }
                    966:
                    967: /*
                    968:  * Destroy any in core blocks past the truncation length.
                    969:  * Called with the underlying vnode locked, which should prevent new dirty
                    970:  * buffers from being queued.
                    971:  */
                    972: int
                    973: vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
                    974: {
                    975:        struct buf *bp, *nbp;
                    976:        int error;
                    977:        voff_t off;
                    978:
                    979:        off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
                    980:        mutex_enter(&vp->v_interlock);
                    981:        error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
                    982:        if (error) {
                    983:                return error;
                    984:        }
                    985:
                    986:        mutex_enter(&bufcache_lock);
                    987: restart:
                    988:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    989:                nbp = LIST_NEXT(bp, b_vnbufs);
                    990:                if (bp->b_lblkno < lbn)
                    991:                        continue;
                    992:                error = bbusy(bp, catch, slptimeo, NULL);
                    993:                if (error != 0) {
                    994:                        if (error == EPASSTHROUGH)
                    995:                                goto restart;
                    996:                        mutex_exit(&bufcache_lock);
                    997:                        return (error);
                    998:                }
                    999:                brelsel(bp, BC_INVAL | BC_VFLUSH);
                   1000:        }
                   1001:
                   1002:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                   1003:                nbp = LIST_NEXT(bp, b_vnbufs);
                   1004:                if (bp->b_lblkno < lbn)
                   1005:                        continue;
                   1006:                error = bbusy(bp, catch, slptimeo, NULL);
                   1007:                if (error != 0) {
                   1008:                        if (error == EPASSTHROUGH)
                   1009:                                goto restart;
                   1010:                        mutex_exit(&bufcache_lock);
                   1011:                        return (error);
                   1012:                }
                   1013:                brelsel(bp, BC_INVAL | BC_VFLUSH);
                   1014:        }
                   1015:        mutex_exit(&bufcache_lock);
                   1016:
                   1017:        return (0);
                   1018: }
                   1019:
                   1020: /*
                   1021:  * Flush all dirty buffers from a vnode.
                   1022:  * Called with the underlying vnode locked, which should prevent new dirty
                   1023:  * buffers from being queued.
                   1024:  */
                   1025: void
                   1026: vflushbuf(struct vnode *vp, int sync)
                   1027: {
                   1028:        struct buf *bp, *nbp;
                   1029:        int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
                   1030:        bool dirty;
                   1031:
                   1032:        mutex_enter(&vp->v_interlock);
                   1033:        (void) VOP_PUTPAGES(vp, 0, 0, flags);
                   1034:
                   1035: loop:
                   1036:        mutex_enter(&bufcache_lock);
                   1037:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                   1038:                nbp = LIST_NEXT(bp, b_vnbufs);
                   1039:                if ((bp->b_cflags & BC_BUSY))
                   1040:                        continue;
                   1041:                if ((bp->b_oflags & BO_DELWRI) == 0)
                   1042:                        panic("vflushbuf: not dirty, bp %p", bp);
                   1043:                bp->b_cflags |= BC_BUSY | BC_VFLUSH;
                   1044:                mutex_exit(&bufcache_lock);
                   1045:                /*
                   1046:                 * Wait for I/O associated with indirect blocks to complete,
                   1047:                 * since there is no way to quickly wait for them below.
                   1048:                 */
                   1049:                if (bp->b_vp == vp || sync == 0)
                   1050:                        (void) bawrite(bp);
                   1051:                else
                   1052:                        (void) bwrite(bp);
                   1053:                goto loop;
                   1054:        }
                   1055:        mutex_exit(&bufcache_lock);
                   1056:
                   1057:        if (sync == 0)
                   1058:                return;
                   1059:
                   1060:        mutex_enter(&vp->v_interlock);
                   1061:        while (vp->v_numoutput != 0)
                   1062:                cv_wait(&vp->v_cv, &vp->v_interlock);
                   1063:        dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
                   1064:        mutex_exit(&vp->v_interlock);
                   1065:
                   1066:        if (dirty) {
                   1067:                vprint("vflushbuf: dirty", vp);
                   1068:                goto loop;
                   1069:        }
                   1070: }
                   1071:
                   1072: /*
1.29      cgd      1073:  * Create a vnode for a block device.
1.59      thorpej  1074:  * Used for root filesystem and swap areas.
1.29      cgd      1075:  * Also used for memory file system special devices.
                   1076:  */
1.50      christos 1077: int
1.309     ad       1078: bdevvp(dev_t dev, vnode_t **vpp)
1.29      cgd      1079: {
1.30      mycroft  1080:
                   1081:        return (getdevvp(dev, vpp, VBLK));
1.29      cgd      1082: }
                   1083:
                   1084: /*
                   1085:  * Create a vnode for a character device.
                   1086:  * Used for kernfs and some console handling.
                   1087:  */
1.50      christos 1088: int
1.309     ad       1089: cdevvp(dev_t dev, vnode_t **vpp)
1.29      cgd      1090: {
1.30      mycroft  1091:
                   1092:        return (getdevvp(dev, vpp, VCHR));
1.29      cgd      1093: }
                   1094:
                   1095: /*
1.353     pooka    1096:  * Associate a buffer with a vnode.  There must already be a hold on
                   1097:  * the vnode.
                   1098:  */
                   1099: void
                   1100: bgetvp(struct vnode *vp, struct buf *bp)
                   1101: {
                   1102:
                   1103:        KASSERT(bp->b_vp == NULL);
                   1104:        KASSERT(bp->b_objlock == &buffer_lock);
                   1105:        KASSERT(mutex_owned(&vp->v_interlock));
                   1106:        KASSERT(mutex_owned(&bufcache_lock));
                   1107:        KASSERT((bp->b_cflags & BC_BUSY) != 0);
                   1108:        KASSERT(!cv_has_waiters(&bp->b_done));
                   1109:
                   1110:        vholdl(vp);
                   1111:        bp->b_vp = vp;
                   1112:        if (vp->v_type == VBLK || vp->v_type == VCHR)
                   1113:                bp->b_dev = vp->v_rdev;
                   1114:        else
                   1115:                bp->b_dev = NODEV;
                   1116:
                   1117:        /*
                   1118:         * Insert onto list for new vnode.
                   1119:         */
                   1120:        bufinsvn(bp, &vp->v_cleanblkhd);
                   1121:        bp->b_objlock = &vp->v_interlock;
                   1122: }
                   1123:
                   1124: /*
                   1125:  * Disassociate a buffer from a vnode.
                   1126:  */
                   1127: void
                   1128: brelvp(struct buf *bp)
                   1129: {
                   1130:        struct vnode *vp = bp->b_vp;
                   1131:
                   1132:        KASSERT(vp != NULL);
                   1133:        KASSERT(bp->b_objlock == &vp->v_interlock);
                   1134:        KASSERT(mutex_owned(&vp->v_interlock));
                   1135:        KASSERT(mutex_owned(&bufcache_lock));
                   1136:        KASSERT((bp->b_cflags & BC_BUSY) != 0);
                   1137:        KASSERT(!cv_has_waiters(&bp->b_done));
                   1138:
                   1139:        /*
                   1140:         * Delete from old vnode list, if on one.
                   1141:         */
                   1142:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
                   1143:                bufremvn(bp);
                   1144:
                   1145:        if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) &&
                   1146:            LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
                   1147:                vp->v_iflag &= ~VI_WRMAPDIRTY;
                   1148:                vn_syncer_remove_from_worklist(vp);
                   1149:        }
                   1150:
                   1151:        bp->b_objlock = &buffer_lock;
                   1152:        bp->b_vp = NULL;
                   1153:        holdrelel(vp);
                   1154: }
                   1155:
                   1156: /*
                   1157:  * Reassign a buffer from one vnode list to another.
                   1158:  * The list reassignment must be within the same vnode.
                   1159:  * Used to assign file specific control information
                   1160:  * (indirect blocks) to the list to which they belong.
                   1161:  */
                   1162: void
                   1163: reassignbuf(struct buf *bp, struct vnode *vp)
                   1164: {
                   1165:        struct buflists *listheadp;
                   1166:        int delayx;
                   1167:
                   1168:        KASSERT(mutex_owned(&bufcache_lock));
                   1169:        KASSERT(bp->b_objlock == &vp->v_interlock);
                   1170:        KASSERT(mutex_owned(&vp->v_interlock));
                   1171:        KASSERT((bp->b_cflags & BC_BUSY) != 0);
                   1172:
                   1173:        /*
                   1174:         * Delete from old vnode list, if on one.
                   1175:         */
                   1176:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
                   1177:                bufremvn(bp);
                   1178:
                   1179:        /*
                   1180:         * If dirty, put on list of dirty buffers;
                   1181:         * otherwise insert onto list of clean buffers.
                   1182:         */
                   1183:        if ((bp->b_oflags & BO_DELWRI) == 0) {
                   1184:                listheadp = &vp->v_cleanblkhd;
                   1185:                if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
                   1186:                    (vp->v_iflag & VI_ONWORKLST) &&
                   1187:                    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
                   1188:                        vp->v_iflag &= ~VI_WRMAPDIRTY;
                   1189:                        vn_syncer_remove_from_worklist(vp);
                   1190:                }
                   1191:        } else {
                   1192:                listheadp = &vp->v_dirtyblkhd;
                   1193:                if ((vp->v_iflag & VI_ONWORKLST) == 0) {
                   1194:                        switch (vp->v_type) {
                   1195:                        case VDIR:
                   1196:                                delayx = dirdelay;
                   1197:                                break;
                   1198:                        case VBLK:
                   1199:                                if (vp->v_specmountpoint != NULL) {
                   1200:                                        delayx = metadelay;
                   1201:                                        break;
                   1202:                                }
                   1203:                                /* fall through */
                   1204:                        default:
                   1205:                                delayx = filedelay;
                   1206:                                break;
                   1207:                        }
                   1208:                        if (!vp->v_mount ||
                   1209:                            (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
                   1210:                                vn_syncer_add_to_worklist(vp, delayx);
                   1211:                }
                   1212:        }
                   1213:        bufinsvn(bp, listheadp);
                   1214: }
                   1215:
                   1216: /*
1.29      cgd      1217:  * Create a vnode for a device.
                   1218:  * Used by bdevvp (block device) for root file system etc.,
                   1219:  * and by cdevvp (character device) for console and kernfs.
                   1220:  */
1.260     yamt     1221: static int
1.309     ad       1222: getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
1.29      cgd      1223: {
1.309     ad       1224:        vnode_t *vp;
                   1225:        vnode_t *nvp;
1.29      cgd      1226:        int error;
                   1227:
1.80      fvdl     1228:        if (dev == NODEV) {
1.302     ad       1229:                *vpp = NULL;
1.29      cgd      1230:                return (0);
1.80      fvdl     1231:        }
1.50      christos 1232:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29      cgd      1233:        if (error) {
1.302     ad       1234:                *vpp = NULL;
1.29      cgd      1235:                return (error);
                   1236:        }
                   1237:        vp = nvp;
                   1238:        vp->v_type = type;
1.309     ad       1239:        vp->v_vflag |= VV_MPSAFE;
1.297     pooka    1240:        uvm_vnp_setsize(vp, 0);
1.318     ad       1241:        spec_node_init(vp, dev);
1.29      cgd      1242:        *vpp = vp;
                   1243:        return (0);
                   1244: }
                   1245:
                   1246: /*
1.349     ad       1247:  * Try to gain a reference to a vnode, without acquiring its interlock.
                   1248:  * The caller must hold a lock that will prevent the vnode from being
                   1249:  * recycled or freed.
                   1250:  */
                   1251: bool
                   1252: vtryget(vnode_t *vp)
                   1253: {
                   1254:        u_int use, next;
                   1255:
                   1256:        /*
                   1257:         * If the vnode is being freed, don't make life any harder
                   1258:         * for vclean() by adding another reference without waiting.
                   1259:         * This is not strictly necessary, but we'll do it anyway.
                   1260:         */
                   1261:        if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) {
                   1262:                return false;
                   1263:        }
                   1264:        for (use = vp->v_usecount;; use = next) {
1.379     yamt     1265:                if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) {
1.349     ad       1266:                        /* Need interlock held if first reference. */
                   1267:                        return false;
                   1268:                }
                   1269:                next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
                   1270:                if (__predict_true(next == use)) {
                   1271:                        return true;
                   1272:                }
                   1273:        }
                   1274: }
                   1275:
                   1276: /*
1.29      cgd      1277:  * Grab a particular vnode from the free list, increment its
1.83      fvdl     1278:  * reference count and lock it. If the vnode lock bit is set the
                   1279:  * vnode is being eliminated in vgone. In that case, we can not
                   1280:  * grab the vnode, so the process is awakened when the transition is
                   1281:  * completed, and an error returned to indicate that the vnode is no
                   1282:  * longer usable (possibly having been changed to a new file system type).
1.29      cgd      1283:  */
1.30      mycroft  1284: int
1.309     ad       1285: vget(vnode_t *vp, int flags)
1.29      cgd      1286: {
1.175     perseant 1287:        int error;
1.29      cgd      1288:
1.309     ad       1289:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
                   1290:
                   1291:        if ((flags & LK_INTERLOCK) == 0)
                   1292:                mutex_enter(&vp->v_interlock);
                   1293:
                   1294:        /*
                   1295:         * Before adding a reference, we must remove the vnode
                   1296:         * from its freelist.
                   1297:         */
                   1298:        if (vp->v_usecount == 0) {
1.350     ad       1299:                vremfree(vp);
1.346     ad       1300:                vp->v_usecount = 1;
                   1301:        } else {
                   1302:                atomic_inc_uint(&vp->v_usecount);
1.309     ad       1303:        }
                   1304:
1.30      mycroft  1305:        /*
                   1306:         * If the vnode is in the process of being cleaned out for
                   1307:         * another use, we wait for the cleaning to finish and then
1.312     ad       1308:         * return failure.  Cleaning is determined by checking if
                   1309:         * the VI_XLOCK or VI_FREEING flags are set.
1.80      fvdl     1310:         */
1.312     ad       1311:        if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
1.313     ad       1312:                if ((flags & LK_NOWAIT) != 0) {
1.324     pooka    1313:                        vrelel(vp, 0);
1.142     chs      1314:                        return EBUSY;
                   1315:                }
1.312     ad       1316:                vwait(vp, VI_XLOCK | VI_FREEING);
1.324     pooka    1317:                vrelel(vp, 0);
1.313     ad       1318:                return ENOENT;
1.29      cgd      1319:        }
1.386     bouyer   1320:
                   1321:        if ((vp->v_iflag & VI_INACTNOW) != 0) {
                   1322:                /*
                   1323:                 * if it's being desactived, wait for it to complete.
                   1324:                 * Make sure to not return a clean vnode.
                   1325:                 */
                   1326:                 if ((flags & LK_NOWAIT) != 0) {
                   1327:                        vrelel(vp, 0);
                   1328:                        return EBUSY;
                   1329:                }
                   1330:                vwait(vp, VI_INACTNOW);
                   1331:                if ((vp->v_iflag & VI_CLEAN) != 0) {
                   1332:                        vrelel(vp, 0);
                   1333:                        return ENOENT;
                   1334:                }
                   1335:        }
1.80      fvdl     1336:        if (flags & LK_TYPE_MASK) {
1.313     ad       1337:                error = vn_lock(vp, flags | LK_INTERLOCK);
                   1338:                if (error != 0) {
1.257     yamt     1339:                        vrele(vp);
1.113     fvdl     1340:                }
1.313     ad       1341:                return error;
1.80      fvdl     1342:        }
1.309     ad       1343:        mutex_exit(&vp->v_interlock);
1.313     ad       1344:        return 0;
1.29      cgd      1345: }
                   1346:
                   1347: /*
                   1348:  * vput(), just unlock and vrele()
                   1349:  */
                   1350: void
1.309     ad       1351: vput(vnode_t *vp)
1.29      cgd      1352: {
1.30      mycroft  1353:
1.309     ad       1354:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
                   1355:
                   1356:        VOP_UNLOCK(vp, 0);
                   1357:        vrele(vp);
1.29      cgd      1358: }
                   1359:
                   1360: /*
1.346     ad       1361:  * Try to drop reference on a vnode.  Abort if we are releasing the
1.359     ad       1362:  * last reference.  Note: this _must_ succeed if not the last reference.
1.346     ad       1363:  */
                   1364: static inline bool
                   1365: vtryrele(vnode_t *vp)
                   1366: {
                   1367:        u_int use, next;
                   1368:
                   1369:        for (use = vp->v_usecount;; use = next) {
1.379     yamt     1370:                if (use == 1) {
1.346     ad       1371:                        return false;
                   1372:                }
1.379     yamt     1373:                KASSERT((use & VC_MASK) > 1);
1.346     ad       1374:                next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
                   1375:                if (__predict_true(next == use)) {
                   1376:                        return true;
                   1377:                }
                   1378:        }
                   1379: }
                   1380:
                   1381: /*
1.309     ad       1382:  * Vnode release.  If reference count drops to zero, call inactive
                   1383:  * routine and either return to freelist or free to the pool.
1.29      cgd      1384:  */
1.309     ad       1385: void
1.324     pooka    1386: vrelel(vnode_t *vp, int flags)
1.29      cgd      1387: {
1.309     ad       1388:        bool recycle, defer;
                   1389:        int error;
                   1390:
                   1391:        KASSERT(mutex_owned(&vp->v_interlock));
                   1392:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.315     ad       1393:        KASSERT(vp->v_freelisthd == NULL);
1.29      cgd      1394:
1.359     ad       1395:        if (__predict_false(vp->v_op == dead_vnodeop_p &&
                   1396:            (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
1.309     ad       1397:                vpanic(vp, "dead but not clean");
                   1398:        }
                   1399:
                   1400:        /*
                   1401:         * If not the last reference, just drop the reference count
                   1402:         * and unlock.
                   1403:         */
1.346     ad       1404:        if (vtryrele(vp)) {
1.309     ad       1405:                vp->v_iflag |= VI_INACTREDO;
                   1406:                mutex_exit(&vp->v_interlock);
1.29      cgd      1407:                return;
1.80      fvdl     1408:        }
1.309     ad       1409:        if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
1.359     ad       1410:                vpanic(vp, "vrelel: bad ref count");
1.29      cgd      1411:        }
1.309     ad       1412:
1.359     ad       1413:        KASSERT((vp->v_iflag & VI_XLOCK) == 0);
                   1414:
1.30      mycroft  1415:        /*
1.309     ad       1416:         * If not clean, deactivate the vnode, but preserve
                   1417:         * our reference across the call to VOP_INACTIVE().
1.30      mycroft  1418:         */
1.309     ad       1419:  retry:
                   1420:        if ((vp->v_iflag & VI_CLEAN) == 0) {
                   1421:                recycle = false;
1.346     ad       1422:                vp->v_iflag |= VI_INACTNOW;
                   1423:
1.309     ad       1424:                /*
                   1425:                 * XXX This ugly block can be largely eliminated if
                   1426:                 * locking is pushed down into the file systems.
                   1427:                 */
                   1428:                if (curlwp == uvm.pagedaemon_lwp) {
                   1429:                        /* The pagedaemon can't wait around; defer. */
                   1430:                        defer = true;
                   1431:                } else if (curlwp == vrele_lwp) {
                   1432:                        /* We have to try harder. */
                   1433:                        vp->v_iflag &= ~VI_INACTREDO;
                   1434:                        error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
                   1435:                            LK_RETRY);
                   1436:                        if (error != 0) {
                   1437:                                /* XXX */
                   1438:                                vpanic(vp, "vrele: unable to lock %p");
                   1439:                        }
                   1440:                        defer = false;
                   1441:                } else if ((vp->v_iflag & VI_LAYER) != 0) {
                   1442:                        /*
                   1443:                         * Acquiring the stack's lock in vclean() even
                   1444:                         * for an honest vput/vrele is dangerous because
                   1445:                         * our caller may hold other vnode locks; defer.
                   1446:                         */
                   1447:                        defer = true;
                   1448:                } else {
                   1449:                        /* If we can't acquire the lock, then defer. */
                   1450:                        vp->v_iflag &= ~VI_INACTREDO;
                   1451:                        error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
                   1452:                            LK_NOWAIT);
                   1453:                        if (error != 0) {
                   1454:                                defer = true;
                   1455:                                mutex_enter(&vp->v_interlock);
                   1456:                        } else {
                   1457:                                defer = false;
                   1458:                        }
                   1459:                }
                   1460:
                   1461:                if (defer) {
                   1462:                        /*
                   1463:                         * Defer reclaim to the kthread; it's not safe to
                   1464:                         * clean it here.  We donate it our last reference.
                   1465:                         */
                   1466:                        KASSERT(mutex_owned(&vp->v_interlock));
                   1467:                        KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
1.346     ad       1468:                        vp->v_iflag &= ~VI_INACTNOW;
1.309     ad       1469:                        vp->v_iflag |= VI_INACTPEND;
                   1470:                        mutex_enter(&vrele_lock);
                   1471:                        TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
                   1472:                        if (++vrele_pending > (desiredvnodes >> 8))
                   1473:                                cv_signal(&vrele_cv);
                   1474:                        mutex_exit(&vrele_lock);
1.386     bouyer   1475:                        cv_broadcast(&vp->v_cv);
1.309     ad       1476:                        mutex_exit(&vp->v_interlock);
                   1477:                        return;
                   1478:                }
                   1479:
1.318     ad       1480: #ifdef DIAGNOSTIC
1.321     ad       1481:                if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
                   1482:                    vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1.318     ad       1483:                        vprint("vrelel: missing VOP_CLOSE()", vp);
                   1484:                }
                   1485: #endif
                   1486:
1.309     ad       1487:                /*
1.312     ad       1488:                 * The vnode can gain another reference while being
                   1489:                 * deactivated.  If VOP_INACTIVE() indicates that
                   1490:                 * the described file has been deleted, then recycle
                   1491:                 * the vnode irrespective of additional references.
                   1492:                 * Another thread may be waiting to re-use the on-disk
                   1493:                 * inode.
                   1494:                 *
                   1495:                 * Note that VOP_INACTIVE() will drop the vnode lock.
1.309     ad       1496:                 */
                   1497:                VOP_INACTIVE(vp, &recycle);
                   1498:                mutex_enter(&vp->v_interlock);
1.346     ad       1499:                vp->v_iflag &= ~VI_INACTNOW;
1.386     bouyer   1500:                cv_broadcast(&vp->v_cv);
1.312     ad       1501:                if (!recycle) {
1.346     ad       1502:                        if (vtryrele(vp)) {
1.312     ad       1503:                                mutex_exit(&vp->v_interlock);
                   1504:                                return;
                   1505:                        }
1.309     ad       1506:
1.312     ad       1507:                        /*
                   1508:                         * If we grew another reference while
                   1509:                         * VOP_INACTIVE() was underway, retry.
                   1510:                         */
                   1511:                        if ((vp->v_iflag & VI_INACTREDO) != 0) {
                   1512:                                goto retry;
                   1513:                        }
1.309     ad       1514:                }
                   1515:
                   1516:                /* Take care of space accounting. */
                   1517:                if (vp->v_iflag & VI_EXECMAP) {
                   1518:                        atomic_add_int(&uvmexp.execpages,
                   1519:                            -vp->v_uobj.uo_npages);
                   1520:                        atomic_add_int(&uvmexp.filepages,
                   1521:                            vp->v_uobj.uo_npages);
                   1522:                }
1.346     ad       1523:                vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
1.309     ad       1524:                vp->v_vflag &= ~VV_MAPPED;
                   1525:
                   1526:                /*
                   1527:                 * Recycle the vnode if the file is now unused (unlinked),
                   1528:                 * otherwise just free it.
                   1529:                 */
                   1530:                if (recycle) {
                   1531:                        vclean(vp, DOCLOSE);
                   1532:                }
                   1533:                KASSERT(vp->v_usecount > 0);
1.298     pooka    1534:        }
1.309     ad       1535:
1.346     ad       1536:        if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
1.309     ad       1537:                /* Gained another reference while being reclaimed. */
                   1538:                mutex_exit(&vp->v_interlock);
                   1539:                return;
1.147     chs      1540:        }
1.298     pooka    1541:
1.309     ad       1542:        if ((vp->v_iflag & VI_CLEAN) != 0) {
                   1543:                /*
                   1544:                 * It's clean so destroy it.  It isn't referenced
                   1545:                 * anywhere since it has been reclaimed.
                   1546:                 */
                   1547:                KASSERT(vp->v_holdcnt == 0);
                   1548:                KASSERT(vp->v_writecount == 0);
                   1549:                mutex_exit(&vp->v_interlock);
                   1550:                insmntque(vp, NULL);
1.318     ad       1551:                if (vp->v_type == VBLK || vp->v_type == VCHR) {
                   1552:                        spec_node_destroy(vp);
                   1553:                }
1.310     pooka    1554:                vnfree(vp);
1.298     pooka    1555:        } else {
1.309     ad       1556:                /*
                   1557:                 * Otherwise, put it back onto the freelist.  It
                   1558:                 * can't be destroyed while still associated with
                   1559:                 * a file system.
                   1560:                 */
                   1561:                mutex_enter(&vnode_free_list_lock);
                   1562:                if (vp->v_holdcnt > 0) {
                   1563:                        vp->v_freelisthd = &vnode_hold_list;
                   1564:                } else {
                   1565:                        vp->v_freelisthd = &vnode_free_list;
                   1566:                }
                   1567:                TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
                   1568:                mutex_exit(&vnode_free_list_lock);
                   1569:                mutex_exit(&vp->v_interlock);
1.298     pooka    1570:        }
                   1571: }
                   1572:
                   1573: void
1.309     ad       1574: vrele(vnode_t *vp)
1.298     pooka    1575: {
                   1576:
1.309     ad       1577:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
                   1578:
1.346     ad       1579:        if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
                   1580:                return;
                   1581:        }
1.309     ad       1582:        mutex_enter(&vp->v_interlock);
1.324     pooka    1583:        vrelel(vp, 0);
1.298     pooka    1584: }
                   1585:
1.309     ad       1586: static void
                   1587: vrele_thread(void *cookie)
1.298     pooka    1588: {
1.309     ad       1589:        vnode_t *vp;
1.298     pooka    1590:
1.309     ad       1591:        for (;;) {
                   1592:                mutex_enter(&vrele_lock);
                   1593:                while (TAILQ_EMPTY(&vrele_list)) {
1.351     ad       1594:                        vrele_gen++;
                   1595:                        cv_broadcast(&vrele_cv);
1.309     ad       1596:                        cv_timedwait(&vrele_cv, &vrele_lock, hz);
                   1597:                }
                   1598:                vp = TAILQ_FIRST(&vrele_list);
                   1599:                TAILQ_REMOVE(&vrele_list, vp, v_freelist);
                   1600:                vrele_pending--;
                   1601:                mutex_exit(&vrele_lock);
                   1602:
                   1603:                /*
                   1604:                 * If not the last reference, then ignore the vnode
                   1605:                 * and look for more work.
                   1606:                 */
                   1607:                mutex_enter(&vp->v_interlock);
                   1608:                KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
                   1609:                vp->v_iflag &= ~VI_INACTPEND;
1.324     pooka    1610:                vrelel(vp, 0);
1.309     ad       1611:        }
1.29      cgd      1612: }
                   1613:
                   1614: /*
                   1615:  * Page or buffer structure gets a reference.
1.258     chs      1616:  * Called with v_interlock held.
1.29      cgd      1617:  */
1.30      mycroft  1618: void
1.309     ad       1619: vholdl(vnode_t *vp)
1.29      cgd      1620: {
                   1621:
1.309     ad       1622:        KASSERT(mutex_owned(&vp->v_interlock));
                   1623:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
                   1624:
                   1625:        if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
                   1626:                mutex_enter(&vnode_free_list_lock);
                   1627:                KASSERT(vp->v_freelisthd == &vnode_free_list);
                   1628:                TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
                   1629:                vp->v_freelisthd = &vnode_hold_list;
                   1630:                TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
                   1631:                mutex_exit(&vnode_free_list_lock);
1.113     fvdl     1632:        }
1.29      cgd      1633: }
                   1634:
                   1635: /*
                   1636:  * Page or buffer structure frees a reference.
1.258     chs      1637:  * Called with v_interlock held.
1.29      cgd      1638:  */
1.30      mycroft  1639: void
1.309     ad       1640: holdrelel(vnode_t *vp)
1.29      cgd      1641: {
                   1642:
1.309     ad       1643:        KASSERT(mutex_owned(&vp->v_interlock));
                   1644:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.142     chs      1645:
1.309     ad       1646:        if (vp->v_holdcnt <= 0) {
                   1647:                vpanic(vp, "holdrelel: holdcnt vp %p");
                   1648:        }
1.142     chs      1649:
1.309     ad       1650:        vp->v_holdcnt--;
                   1651:        if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1652:                mutex_enter(&vnode_free_list_lock);
                   1653:                KASSERT(vp->v_freelisthd == &vnode_hold_list);
                   1654:                TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
                   1655:                vp->v_freelisthd = &vnode_free_list;
                   1656:                TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
                   1657:                mutex_exit(&vnode_free_list_lock);
1.113     fvdl     1658:        }
1.81      ross     1659: }
                   1660:
                   1661: /*
1.309     ad       1662:  * Vnode reference, where a reference is already held by some other
                   1663:  * object (for example, a file structure).
1.81      ross     1664:  */
                   1665: void
1.309     ad       1666: vref(vnode_t *vp)
1.81      ross     1667: {
                   1668:
1.309     ad       1669:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
1.346     ad       1670:        KASSERT(vp->v_usecount != 0);
1.309     ad       1671:
1.346     ad       1672:        atomic_inc_uint(&vp->v_usecount);
1.29      cgd      1673: }
                   1674:
                   1675: /*
                   1676:  * Remove any vnodes in the vnode table belonging to mount point mp.
                   1677:  *
1.183     yamt     1678:  * If FORCECLOSE is not specified, there should not be any active ones,
1.29      cgd      1679:  * return error if any are found (nb: this is a user error, not a
1.183     yamt     1680:  * system error). If FORCECLOSE is specified, detach any active vnodes
1.29      cgd      1681:  * that are found.
1.183     yamt     1682:  *
                   1683:  * If WRITECLOSE is set, only flush out regular file vnodes open for
                   1684:  * writing.
                   1685:  *
                   1686:  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29      cgd      1687:  */
1.30      mycroft  1688: #ifdef DEBUG
                   1689: int busyprt = 0;       /* print out busy vnodes */
                   1690: struct ctldebug debug1 = { "busyprt", &busyprt };
                   1691: #endif
1.29      cgd      1692:
1.334     ad       1693: static vnode_t *
                   1694: vflushnext(vnode_t *mvp, int *when)
                   1695: {
                   1696:
                   1697:        if (hardclock_ticks > *when) {
                   1698:                mutex_exit(&mntvnode_lock);
                   1699:                yield();
                   1700:                mutex_enter(&mntvnode_lock);
                   1701:                *when = hardclock_ticks + hz / 10;
                   1702:        }
                   1703:
                   1704:        return vunmark(mvp);
                   1705: }
                   1706:
1.50      christos 1707: int
1.309     ad       1708: vflush(struct mount *mp, vnode_t *skipvp, int flags)
1.29      cgd      1709: {
1.309     ad       1710:        vnode_t *vp, *mvp;
1.351     ad       1711:        int busy = 0, when = 0, gen;
                   1712:
                   1713:        /*
                   1714:         * First, flush out any vnode references from vrele_list.
                   1715:         */
                   1716:        mutex_enter(&vrele_lock);
                   1717:        gen = vrele_gen;
1.352     pooka    1718:        while (vrele_pending && gen == vrele_gen) {
1.351     ad       1719:                cv_broadcast(&vrele_cv);
                   1720:                cv_wait(&vrele_cv, &vrele_lock);
1.352     pooka    1721:        }
1.351     ad       1722:        mutex_exit(&vrele_lock);
1.29      cgd      1723:
1.309     ad       1724:        /* Allocate a marker vnode. */
1.310     pooka    1725:        if ((mvp = vnalloc(mp)) == NULL)
1.309     ad       1726:                return (ENOMEM);
                   1727:
1.273     reinoud  1728:        /*
                   1729:         * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
                   1730:         * and vclean() are called
                   1731:         */
1.351     ad       1732:        mutex_enter(&mntvnode_lock);
1.334     ad       1733:        for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
                   1734:            vp = vflushnext(mvp, &when)) {
1.309     ad       1735:                vmark(mvp, vp);
                   1736:                if (vp->v_mount != mp || vismarker(vp))
                   1737:                        continue;
1.29      cgd      1738:                /*
                   1739:                 * Skip over a selected vnode.
                   1740:                 */
                   1741:                if (vp == skipvp)
                   1742:                        continue;
1.309     ad       1743:                mutex_enter(&vp->v_interlock);
1.29      cgd      1744:                /*
1.315     ad       1745:                 * Ignore clean but still referenced vnodes.
                   1746:                 */
                   1747:                if ((vp->v_iflag & VI_CLEAN) != 0) {
                   1748:                        mutex_exit(&vp->v_interlock);
                   1749:                        continue;
                   1750:                }
                   1751:                /*
1.309     ad       1752:                 * Skip over a vnodes marked VSYSTEM.
1.29      cgd      1753:                 */
1.302     ad       1754:                if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1.309     ad       1755:                        mutex_exit(&vp->v_interlock);
1.29      cgd      1756:                        continue;
1.80      fvdl     1757:                }
1.29      cgd      1758:                /*
1.30      mycroft  1759:                 * If WRITECLOSE is set, only flush out regular file
                   1760:                 * vnodes open for writing.
                   1761:                 */
                   1762:                if ((flags & WRITECLOSE) &&
1.92      thorpej  1763:                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1.309     ad       1764:                        mutex_exit(&vp->v_interlock);
1.30      mycroft  1765:                        continue;
1.92      thorpej  1766:                }
1.30      mycroft  1767:                /*
1.29      cgd      1768:                 * With v_usecount == 0, all we need to do is clear
                   1769:                 * out the vnode data structures and we are done.
                   1770:                 */
                   1771:                if (vp->v_usecount == 0) {
1.309     ad       1772:                        mutex_exit(&mntvnode_lock);
                   1773:                        vremfree(vp);
1.350     ad       1774:                        vp->v_usecount = 1;
1.309     ad       1775:                        vclean(vp, DOCLOSE);
1.324     pooka    1776:                        vrelel(vp, 0);
1.309     ad       1777:                        mutex_enter(&mntvnode_lock);
1.29      cgd      1778:                        continue;
                   1779:                }
                   1780:                /*
1.30      mycroft  1781:                 * If FORCECLOSE is set, forcibly close the vnode.
1.29      cgd      1782:                 * For block or character devices, revert to an
1.318     ad       1783:                 * anonymous device.  For all other files, just
                   1784:                 * kill them.
1.29      cgd      1785:                 */
                   1786:                if (flags & FORCECLOSE) {
1.309     ad       1787:                        mutex_exit(&mntvnode_lock);
1.346     ad       1788:                        atomic_inc_uint(&vp->v_usecount);
1.29      cgd      1789:                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.309     ad       1790:                                vclean(vp, DOCLOSE);
1.324     pooka    1791:                                vrelel(vp, 0);
1.29      cgd      1792:                        } else {
1.309     ad       1793:                                vclean(vp, 0);
1.318     ad       1794:                                vp->v_op = spec_vnodeop_p; /* XXXSMP */
1.320     ad       1795:                                mutex_exit(&vp->v_interlock);
                   1796:                                /*
                   1797:                                 * The vnode isn't clean, but still resides
                   1798:                                 * on the mount list.  Remove it. XXX This
                   1799:                                 * is a bit dodgy.
                   1800:                                 */
                   1801:                                insmntque(vp, NULL);
                   1802:                                vrele(vp);
1.29      cgd      1803:                        }
1.309     ad       1804:                        mutex_enter(&mntvnode_lock);
1.29      cgd      1805:                        continue;
                   1806:                }
1.30      mycroft  1807: #ifdef DEBUG
1.29      cgd      1808:                if (busyprt)
                   1809:                        vprint("vflush: busy vnode", vp);
1.30      mycroft  1810: #endif
1.309     ad       1811:                mutex_exit(&vp->v_interlock);
1.29      cgd      1812:                busy++;
                   1813:        }
1.309     ad       1814:        mutex_exit(&mntvnode_lock);
1.310     pooka    1815:        vnfree(mvp);
1.29      cgd      1816:        if (busy)
                   1817:                return (EBUSY);
                   1818:        return (0);
                   1819: }
                   1820:
                   1821: /*
                   1822:  * Disassociate the underlying file system from a vnode.
1.309     ad       1823:  *
                   1824:  * Must be called with the interlock held, and will return with it held.
1.29      cgd      1825:  */
1.309     ad       1826: void
                   1827: vclean(vnode_t *vp, int flags)
1.29      cgd      1828: {
1.309     ad       1829:        lwp_t *l = curlwp;
                   1830:        bool recycle, active;
1.318     ad       1831:        int error;
1.29      cgd      1832:
1.309     ad       1833:        KASSERT(mutex_owned(&vp->v_interlock));
                   1834:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
                   1835:        KASSERT(vp->v_usecount != 0);
1.166     chs      1836:
1.309     ad       1837:        /* If cleaning is already in progress wait until done and return. */
                   1838:        if (vp->v_iflag & VI_XLOCK) {
                   1839:                vwait(vp, VI_XLOCK);
                   1840:                return;
                   1841:        }
1.166     chs      1842:
1.309     ad       1843:        /* If already clean, nothing to do. */
                   1844:        if ((vp->v_iflag & VI_CLEAN) != 0) {
                   1845:                return;
1.112     mycroft  1846:        }
1.87      pk       1847:
1.29      cgd      1848:        /*
1.309     ad       1849:         * Prevent the vnode from being recycled or brought into use
                   1850:         * while we clean it out.
1.29      cgd      1851:         */
1.302     ad       1852:        vp->v_iflag |= VI_XLOCK;
                   1853:        if (vp->v_iflag & VI_EXECMAP) {
1.307     ad       1854:                atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
                   1855:                atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1.147     chs      1856:        }
1.302     ad       1857:        vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1.309     ad       1858:        active = (vp->v_usecount > 1);
1.142     chs      1859:
1.309     ad       1860:        /* XXXAD should not lock vnode under layer */
                   1861:        VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1.80      fvdl     1862:
1.98      wrstuden 1863:        /*
1.142     chs      1864:         * Clean out any cached data associated with the vnode.
1.318     ad       1865:         * If purging an active vnode, it must be closed and
                   1866:         * deactivated before being reclaimed. Note that the
                   1867:         * VOP_INACTIVE will unlock the vnode.
1.29      cgd      1868:         */
1.166     chs      1869:        if (flags & DOCLOSE) {
1.256     christos 1870:                error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.355     simonb   1871:                if (error != 0) {
                   1872:                        /* XXX, fix vn_start_write's grab of mp and use that. */
                   1873:
                   1874:                        if (wapbl_vphaswapbl(vp))
                   1875:                                WAPBL_DISCARD(wapbl_vptomp(vp));
1.256     christos 1876:                        error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.355     simonb   1877:                }
1.211     dbj      1878:                KASSERT(error == 0);
1.302     ad       1879:                KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.318     ad       1880:                if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
                   1881:                         spec_node_revoke(vp);
1.231     mycroft  1882:                }
1.166     chs      1883:        }
1.29      cgd      1884:        if (active) {
1.309     ad       1885:                VOP_INACTIVE(vp, &recycle);
1.80      fvdl     1886:        } else {
                   1887:                /*
                   1888:                 * Any other processes trying to obtain this lock must first
1.302     ad       1889:                 * wait for VI_XLOCK to clear, then call the new lock operation.
1.80      fvdl     1890:                 */
                   1891:                VOP_UNLOCK(vp, 0);
1.29      cgd      1892:        }
1.142     chs      1893:
1.309     ad       1894:        /* Disassociate the underlying file system from the vnode. */
                   1895:        if (VOP_RECLAIM(vp)) {
                   1896:                vpanic(vp, "vclean: cannot reclaim");
1.87      pk       1897:        }
1.30      mycroft  1898:
1.169     chs      1899:        KASSERT(vp->v_uobj.uo_npages == 0);
1.255     yamt     1900:        if (vp->v_type == VREG && vp->v_ractx != NULL) {
                   1901:                uvm_ra_freectx(vp->v_ractx);
                   1902:                vp->v_ractx = NULL;
                   1903:        }
1.80      fvdl     1904:        cache_purge(vp);
                   1905:
1.309     ad       1906:        /* Done with purge, notify sleepers of the grim news. */
1.360     ad       1907:        mutex_enter(&vp->v_interlock);
1.30      mycroft  1908:        vp->v_op = dead_vnodeop_p;
                   1909:        vp->v_tag = VT_NON;
1.309     ad       1910:        vp->v_vnlock = &vp->v_lock;
1.332     ad       1911:        KNOTE(&vp->v_klist, NOTE_REVOKE);
1.312     ad       1912:        vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1.304     ad       1913:        vp->v_vflag &= ~VV_LOCKSWORK;
1.319     ad       1914:        if ((flags & DOCLOSE) != 0) {
1.318     ad       1915:                vp->v_iflag |= VI_CLEAN;
                   1916:        }
1.309     ad       1917:        cv_broadcast(&vp->v_cv);
                   1918:
                   1919:        KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1.29      cgd      1920: }
                   1921:
                   1922: /*
1.80      fvdl     1923:  * Recycle an unused vnode to the front of the free list.
                   1924:  * Release the passed interlock if the vnode will be recycled.
1.29      cgd      1925:  */
1.80      fvdl     1926: int
1.309     ad       1927: vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1.217     junyoung 1928: {
                   1929:
1.309     ad       1930:        KASSERT((vp->v_iflag & VI_MARKER) == 0);
                   1931:
                   1932:        mutex_enter(&vp->v_interlock);
                   1933:        if (vp->v_usecount != 0) {
                   1934:                mutex_exit(&vp->v_interlock);
                   1935:                return (0);
1.29      cgd      1936:        }
1.309     ad       1937:        if (inter_lkp)
                   1938:                mutex_exit(inter_lkp);
                   1939:        vremfree(vp);
1.350     ad       1940:        vp->v_usecount = 1;
1.309     ad       1941:        vclean(vp, DOCLOSE);
1.324     pooka    1942:        vrelel(vp, 0);
1.309     ad       1943:        return (1);
1.29      cgd      1944: }
                   1945:
                   1946: /*
1.309     ad       1947:  * Eliminate all activity associated with a vnode in preparation for
                   1948:  * reuse.  Drops a reference from the vnode.
1.29      cgd      1949:  */
                   1950: void
1.309     ad       1951: vgone(vnode_t *vp)
1.80      fvdl     1952: {
1.166     chs      1953:
1.309     ad       1954:        mutex_enter(&vp->v_interlock);
                   1955:        vclean(vp, DOCLOSE);
1.324     pooka    1956:        vrelel(vp, 0);
1.29      cgd      1957: }
                   1958:
                   1959: /*
                   1960:  * Lookup a vnode by device number.
                   1961:  */
1.50      christos 1962: int
1.309     ad       1963: vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1.29      cgd      1964: {
1.309     ad       1965:        vnode_t *vp;
1.80      fvdl     1966:        int rc = 0;
1.29      cgd      1967:
1.363     pooka    1968:        mutex_enter(&device_lock);
1.318     ad       1969:        for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1.29      cgd      1970:                if (dev != vp->v_rdev || type != vp->v_type)
                   1971:                        continue;
                   1972:                *vpp = vp;
1.80      fvdl     1973:                rc = 1;
                   1974:                break;
1.29      cgd      1975:        }
1.363     pooka    1976:        mutex_exit(&device_lock);
1.80      fvdl     1977:        return (rc);
1.96      thorpej  1978: }
                   1979:
                   1980: /*
                   1981:  * Revoke all the vnodes corresponding to the specified minor number
                   1982:  * range (endpoints inclusive) of the specified major.
                   1983:  */
                   1984: void
1.247     thorpej  1985: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96      thorpej  1986: {
1.316     ad       1987:        vnode_t *vp, **vpp;
                   1988:        dev_t dev;
1.96      thorpej  1989:        int mn;
                   1990:
1.274     mrg      1991:        vp = NULL;      /* XXX gcc */
                   1992:
1.363     pooka    1993:        mutex_enter(&device_lock);
1.316     ad       1994:        for (mn = minl; mn <= minh; mn++) {
                   1995:                dev = makedev(maj, mn);
1.318     ad       1996:                vpp = &specfs_hash[SPECHASH(dev)];
1.316     ad       1997:                for (vp = *vpp; vp != NULL;) {
                   1998:                        mutex_enter(&vp->v_interlock);
                   1999:                        if ((vp->v_iflag & VI_CLEAN) != 0 ||
                   2000:                            dev != vp->v_rdev || type != vp->v_type) {
                   2001:                                mutex_exit(&vp->v_interlock);
                   2002:                                vp = vp->v_specnext;
                   2003:                                continue;
                   2004:                        }
1.363     pooka    2005:                        mutex_exit(&device_lock);
1.316     ad       2006:                        if (vget(vp, LK_INTERLOCK) == 0) {
                   2007:                                VOP_REVOKE(vp, REVOKEALL);
                   2008:                                vrele(vp);
                   2009:                        }
1.363     pooka    2010:                        mutex_enter(&device_lock);
1.316     ad       2011:                        vp = *vpp;
                   2012:                }
                   2013:        }
1.363     pooka    2014:        mutex_exit(&device_lock);
1.29      cgd      2015: }
                   2016:
                   2017: /*
                   2018:  * Calculate the total number of references to a special device.
                   2019:  */
1.30      mycroft  2020: int
1.309     ad       2021: vcount(vnode_t *vp)
1.29      cgd      2022: {
                   2023:        int count;
                   2024:
1.363     pooka    2025:        mutex_enter(&device_lock);
1.309     ad       2026:        mutex_enter(&vp->v_interlock);
1.318     ad       2027:        if (vp->v_specnode == NULL) {
1.309     ad       2028:                count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
                   2029:                mutex_exit(&vp->v_interlock);
1.363     pooka    2030:                mutex_exit(&device_lock);
1.309     ad       2031:                return (count);
                   2032:        }
                   2033:        mutex_exit(&vp->v_interlock);
1.318     ad       2034:        count = vp->v_specnode->sn_dev->sd_opencnt;
1.363     pooka    2035:        mutex_exit(&device_lock);
1.29      cgd      2036:        return (count);
                   2037: }
                   2038:
1.101     mrg      2039: /*
1.316     ad       2040:  * Eliminate all activity associated with the requested vnode
                   2041:  * and with all vnodes aliased to the requested vnode.
                   2042:  */
                   2043: void
                   2044: vrevoke(vnode_t *vp)
                   2045: {
                   2046:        vnode_t *vq, **vpp;
                   2047:        enum vtype type;
                   2048:        dev_t dev;
                   2049:
                   2050:        KASSERT(vp->v_usecount > 0);
                   2051:
                   2052:        mutex_enter(&vp->v_interlock);
                   2053:        if ((vp->v_iflag & VI_CLEAN) != 0) {
                   2054:                mutex_exit(&vp->v_interlock);
                   2055:                return;
1.368     enami    2056:        } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
                   2057:                atomic_inc_uint(&vp->v_usecount);
                   2058:                vclean(vp, DOCLOSE);
                   2059:                vrelel(vp, 0);
                   2060:                return;
1.316     ad       2061:        } else {
                   2062:                dev = vp->v_rdev;
                   2063:                type = vp->v_type;
                   2064:                mutex_exit(&vp->v_interlock);
                   2065:        }
                   2066:
1.318     ad       2067:        vpp = &specfs_hash[SPECHASH(dev)];
1.363     pooka    2068:        mutex_enter(&device_lock);
1.316     ad       2069:        for (vq = *vpp; vq != NULL;) {
1.333     ad       2070:                /* If clean or being cleaned, then ignore it. */
                   2071:                mutex_enter(&vq->v_interlock);
                   2072:                if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
1.317     ad       2073:                    vq->v_rdev != dev || vq->v_type != type) {
1.333     ad       2074:                        mutex_exit(&vq->v_interlock);
1.316     ad       2075:                        vq = vq->v_specnext;
                   2076:                        continue;
                   2077:                }
1.363     pooka    2078:                mutex_exit(&device_lock);
1.350     ad       2079:                if (vq->v_usecount == 0) {
1.317     ad       2080:                        vremfree(vq);
1.350     ad       2081:                        vq->v_usecount = 1;
                   2082:                } else {
                   2083:                        atomic_inc_uint(&vq->v_usecount);
1.316     ad       2084:                }
                   2085:                vclean(vq, DOCLOSE);
1.324     pooka    2086:                vrelel(vq, 0);
1.363     pooka    2087:                mutex_enter(&device_lock);
1.316     ad       2088:                vq = *vpp;
                   2089:        }
1.363     pooka    2090:        mutex_exit(&device_lock);
1.316     ad       2091: }
                   2092:
                   2093: /*
1.220     lukem    2094:  * sysctl helper routine to return list of supported fstypes
                   2095:  */
1.358     pooka    2096: int
1.220     lukem    2097: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
                   2098: {
1.291     christos 2099:        char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220     lukem    2100:        char *where = oldp;
                   2101:        struct vfsops *v;
                   2102:        size_t needed, left, slen;
                   2103:        int error, first;
                   2104:
                   2105:        if (newp != NULL)
                   2106:                return (EPERM);
                   2107:        if (namelen != 0)
                   2108:                return (EINVAL);
                   2109:
                   2110:        first = 1;
                   2111:        error = 0;
                   2112:        needed = 0;
                   2113:        left = *oldlenp;
                   2114:
1.311     ad       2115:        sysctl_unlock();
1.302     ad       2116:        mutex_enter(&vfs_list_lock);
1.220     lukem    2117:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   2118:                if (where == NULL)
                   2119:                        needed += strlen(v->vfs_name) + 1;
                   2120:                else {
1.245     christos 2121:                        memset(bf, 0, sizeof(bf));
1.220     lukem    2122:                        if (first) {
1.245     christos 2123:                                strncpy(bf, v->vfs_name, sizeof(bf));
1.220     lukem    2124:                                first = 0;
                   2125:                        } else {
1.245     christos 2126:                                bf[0] = ' ';
                   2127:                                strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220     lukem    2128:                        }
1.245     christos 2129:                        bf[sizeof(bf)-1] = '\0';
                   2130:                        slen = strlen(bf);
1.220     lukem    2131:                        if (left < slen + 1)
                   2132:                                break;
1.302     ad       2133:                        v->vfs_refcount++;
                   2134:                        mutex_exit(&vfs_list_lock);
1.354     pooka    2135:                        /* +1 to copy out the trailing NUL byte */
1.245     christos 2136:                        error = copyout(bf, where, slen + 1);
1.302     ad       2137:                        mutex_enter(&vfs_list_lock);
                   2138:                        v->vfs_refcount--;
1.220     lukem    2139:                        if (error)
                   2140:                                break;
                   2141:                        where += slen;
                   2142:                        needed += slen;
                   2143:                        left -= slen;
                   2144:                }
                   2145:        }
1.302     ad       2146:        mutex_exit(&vfs_list_lock);
1.311     ad       2147:        sysctl_relock();
1.220     lukem    2148:        *oldlenp = needed;
                   2149:        return (error);
                   2150: }
                   2151:
1.212     atatat   2152:
1.29      cgd      2153: int kinfo_vdebug = 1;
                   2154: int kinfo_vgetfailed;
                   2155: #define KINFO_VNODESLOP        10
                   2156: /*
                   2157:  * Dump vnode list (via sysctl).
                   2158:  * Copyout address of vnode followed by vnode.
                   2159:  */
                   2160: /* ARGSUSED */
1.50      christos 2161: int
1.212     atatat   2162: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29      cgd      2163: {
1.212     atatat   2164:        char *where = oldp;
                   2165:        size_t *sizep = oldlenp;
1.80      fvdl     2166:        struct mount *mp, *nmp;
1.311     ad       2167:        vnode_t *vp, *mvp, vbuf;
1.389     enami    2168:        char *bp = where;
1.29      cgd      2169:        char *ewhere;
                   2170:        int error;
1.212     atatat   2171:
                   2172:        if (namelen != 0)
                   2173:                return (EOPNOTSUPP);
                   2174:        if (newp != NULL)
                   2175:                return (EPERM);
1.29      cgd      2176:
1.309     ad       2177: #define VPTRSZ sizeof(vnode_t *)
                   2178: #define VNODESZ        sizeof(vnode_t)
1.29      cgd      2179:        if (where == NULL) {
                   2180:                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                   2181:                return (0);
                   2182:        }
                   2183:        ewhere = where + *sizep;
1.80      fvdl     2184:
1.311     ad       2185:        sysctl_unlock();
1.302     ad       2186:        mutex_enter(&mountlist_lock);
1.177     matt     2187:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1.389     enami    2188:            mp = nmp) {
1.344     ad       2189:                if (vfs_busy(mp, &nmp)) {
1.29      cgd      2190:                        continue;
1.80      fvdl     2191:                }
1.309     ad       2192:                /* Allocate a marker vnode. */
1.375     elad     2193:                mvp = vnalloc(mp);
                   2194:                /* Should never fail for mp != NULL */
                   2195:                KASSERT(mvp != NULL);
1.309     ad       2196:                mutex_enter(&mntvnode_lock);
1.389     enami    2197:                for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp;
                   2198:                    vp = vunmark(mvp)) {
1.309     ad       2199:                        vmark(mvp, vp);
1.29      cgd      2200:                        /*
                   2201:                         * Check that the vp is still associated with
                   2202:                         * this filesystem.  RACE: could have been
                   2203:                         * recycled onto the same filesystem.
                   2204:                         */
1.309     ad       2205:                        if (vp->v_mount != mp || vismarker(vp))
                   2206:                                continue;
1.29      cgd      2207:                        if (bp + VPTRSZ + VNODESZ > ewhere) {
1.309     ad       2208:                                (void)vunmark(mvp);
                   2209:                                mutex_exit(&mntvnode_lock);
1.310     pooka    2210:                                vnfree(mvp);
1.388     enami    2211:                                vfs_unbusy(mp, false, NULL);
1.311     ad       2212:                                sysctl_relock();
1.29      cgd      2213:                                *sizep = bp - where;
                   2214:                                return (ENOMEM);
                   2215:                        }
1.311     ad       2216:                        memcpy(&vbuf, vp, VNODESZ);
1.309     ad       2217:                        mutex_exit(&mntvnode_lock);
1.367     enami    2218:                        if ((error = copyout(&vp, bp, VPTRSZ)) ||
1.389     enami    2219:                            (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1.309     ad       2220:                                mutex_enter(&mntvnode_lock);
                   2221:                                (void)vunmark(mvp);
                   2222:                                mutex_exit(&mntvnode_lock);
1.310     pooka    2223:                                vnfree(mvp);
1.388     enami    2224:                                vfs_unbusy(mp, false, NULL);
1.311     ad       2225:                                sysctl_relock();
1.29      cgd      2226:                                return (error);
1.309     ad       2227:                        }
1.29      cgd      2228:                        bp += VPTRSZ + VNODESZ;
1.309     ad       2229:                        mutex_enter(&mntvnode_lock);
1.29      cgd      2230:                }
1.309     ad       2231:                mutex_exit(&mntvnode_lock);
1.344     ad       2232:                vnfree(mvp);
1.339     ad       2233:                vfs_unbusy(mp, false, &nmp);
1.29      cgd      2234:        }
1.302     ad       2235:        mutex_exit(&mountlist_lock);
1.311     ad       2236:        sysctl_relock();
1.29      cgd      2237:
                   2238:        *sizep = bp - where;
                   2239:        return (0);
1.30      mycroft  2240: }
                   2241:
                   2242: /*
1.309     ad       2243:  * Remove clean vnodes from a mountpoint's vnode list.
                   2244:  */
                   2245: void
                   2246: vfs_scrubvnlist(struct mount *mp)
                   2247: {
                   2248:        vnode_t *vp, *nvp;
                   2249:
1.327     ad       2250:  retry:
1.309     ad       2251:        mutex_enter(&mntvnode_lock);
                   2252:        for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
                   2253:                nvp = TAILQ_NEXT(vp, v_mntvnodes);
                   2254:                mutex_enter(&vp->v_interlock);
1.315     ad       2255:                if ((vp->v_iflag & VI_CLEAN) != 0) {
1.309     ad       2256:                        TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.315     ad       2257:                        vp->v_mount = NULL;
1.327     ad       2258:                        mutex_exit(&mntvnode_lock);
                   2259:                        mutex_exit(&vp->v_interlock);
1.344     ad       2260:                        vfs_destroy(mp);
1.327     ad       2261:                        goto retry;
1.315     ad       2262:                }
1.309     ad       2263:                mutex_exit(&vp->v_interlock);
                   2264:        }
                   2265:        mutex_exit(&mntvnode_lock);
                   2266: }
                   2267:
                   2268: /*
1.30      mycroft  2269:  * Check to see if a filesystem is mounted on a block device.
                   2270:  */
                   2271: int
1.309     ad       2272: vfs_mountedon(vnode_t *vp)
1.30      mycroft  2273: {
1.309     ad       2274:        vnode_t *vq;
1.80      fvdl     2275:        int error = 0;
1.30      mycroft  2276:
1.261     reinoud  2277:        if (vp->v_type != VBLK)
                   2278:                return ENOTBLK;
1.113     fvdl     2279:        if (vp->v_specmountpoint != NULL)
1.30      mycroft  2280:                return (EBUSY);
1.363     pooka    2281:        mutex_enter(&device_lock);
1.318     ad       2282:        for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
                   2283:            vq = vq->v_specnext) {
                   2284:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   2285:                        continue;
                   2286:                if (vq->v_specmountpoint != NULL) {
                   2287:                        error = EBUSY;
                   2288:                        break;
1.30      mycroft  2289:                }
                   2290:        }
1.363     pooka    2291:        mutex_exit(&device_lock);
1.80      fvdl     2292:        return (error);
1.30      mycroft  2293: }
                   2294:
1.35      ws       2295: /*
1.39      mycroft  2296:  * Unmount all file systems.
                   2297:  * We traverse the list in reverse order under the assumption that doing so
                   2298:  * will avoid needing to worry about dependencies.
                   2299:  */
1.371     dyoung   2300: bool
1.256     christos 2301: vfs_unmountall(struct lwp *l)
1.39      mycroft  2302: {
1.377     dyoung   2303:        printf("unmounting file systems...");
                   2304:        return vfs_unmountall1(l, true, true);
                   2305: }
                   2306:
1.382     dyoung   2307: static void
                   2308: vfs_unmount_print(struct mount *mp, const char *pfx)
                   2309: {
                   2310:        printf("%sunmounted %s on %s type %s\n", pfx,
                   2311:            mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
                   2312:            mp->mnt_stat.f_fstypename);
                   2313: }
                   2314:
                   2315: bool
                   2316: vfs_unmount_forceone(struct lwp *l)
                   2317: {
                   2318:        struct mount *mp, *nmp = NULL;
                   2319:        int error;
                   2320:
                   2321:        CIRCLEQ_FOREACH_REVERSE(mp, &mountlist, mnt_list) {
                   2322:                if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen)
                   2323:                        nmp = mp;
                   2324:        }
                   2325:
                   2326:        if (nmp == NULL)
                   2327:                return false;
                   2328:
                   2329: #ifdef DEBUG
                   2330:        printf("\nforcefully unmounting %s (%s)...",
                   2331:            nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname);
                   2332: #endif
                   2333:        atomic_inc_uint(&nmp->mnt_refcnt);
                   2334:        if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) {
                   2335:                vfs_unmount_print(nmp, "forcefully ");
                   2336:                return true;
                   2337:        } else
                   2338:                atomic_dec_uint(&nmp->mnt_refcnt);
                   2339:
                   2340: #ifdef DEBUG
                   2341:        printf("forceful unmount of %s failed with error %d\n",
                   2342:            nmp->mnt_stat.f_mntonname, error);
                   2343: #endif
                   2344:
                   2345:        return false;
                   2346: }
                   2347:
1.377     dyoung   2348: bool
                   2349: vfs_unmountall1(struct lwp *l, bool force, bool verbose)
                   2350: {
1.123     augustss 2351:        struct mount *mp, *nmp;
1.382     dyoung   2352:        bool any_error = false, progress = false;
1.371     dyoung   2353:        int error;
1.39      mycroft  2354:
1.382     dyoung   2355:        for (mp = CIRCLEQ_LAST(&mountlist);
                   2356:             mp != (void *)&mountlist;
1.325     dyoung   2357:             mp = nmp) {
                   2358:                nmp = CIRCLEQ_PREV(mp, mnt_list);
1.54      jtk      2359: #ifdef DEBUG
1.382     dyoung   2360:                printf("\nunmounting %p %s (%s)...",
                   2361:                    (void *)mp, mp->mnt_stat.f_mntonname,
                   2362:                    mp->mnt_stat.f_mntfromname);
1.54      jtk      2363: #endif
1.344     ad       2364:                atomic_inc_uint(&mp->mnt_refcnt);
1.382     dyoung   2365:                if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
                   2366:                        vfs_unmount_print(mp, "");
1.371     dyoung   2367:                        progress = true;
1.382     dyoung   2368:                } else {
                   2369:                        atomic_dec_uint(&mp->mnt_refcnt);
1.377     dyoung   2370:                        if (verbose) {
                   2371:                                printf("unmount of %s failed with error %d\n",
                   2372:                                    mp->mnt_stat.f_mntonname, error);
                   2373:                        }
1.371     dyoung   2374:                        any_error = true;
1.40      mycroft  2375:                }
1.39      mycroft  2376:        }
1.377     dyoung   2377:        if (verbose)
                   2378:                printf(" done\n");
                   2379:        if (any_error && verbose)
1.57      christos 2380:                printf("WARNING: some file systems would not unmount\n");
1.371     dyoung   2381:        return progress;
1.40      mycroft  2382: }
                   2383:
                   2384: /*
                   2385:  * Sync and unmount file systems before shutting down.
                   2386:  */
                   2387: void
1.247     thorpej  2388: vfs_shutdown(void)
1.40      mycroft  2389: {
1.265     skrll    2390:        struct lwp *l;
1.40      mycroft  2391:
1.265     skrll    2392:        /* XXX we're certainly not running in lwp0's context! */
1.377     dyoung   2393:        l = (curlwp == NULL) ? &lwp0 : curlwp;
1.185     christos 2394:
1.382     dyoung   2395:        vfs_shutdown1(l);
                   2396: }
                   2397:
                   2398: void
                   2399: vfs_sync_all(struct lwp *l)
                   2400: {
1.70      cgd      2401:        printf("syncing disks... ");
                   2402:
1.305     pooka    2403:        /* remove user processes from run queue */
1.138     bouyer   2404:        suspendsched();
1.40      mycroft  2405:        (void) spl0();
                   2406:
1.128     sommerfe 2407:        /* avoid coming back this way again if we panic. */
                   2408:        doing_shutdown = 1;
                   2409:
1.184     thorpej  2410:        sys_sync(l, NULL, NULL);
1.40      mycroft  2411:
                   2412:        /* Wait for sync to finish. */
1.213     pk       2413:        if (buf_syncwait() != 0) {
1.124     augustss 2414: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   2415:                Debugger();
                   2416: #endif
1.57      christos 2417:                printf("giving up\n");
1.84      thorpej  2418:                return;
1.73      thorpej  2419:        } else
1.57      christos 2420:                printf("done\n");
1.382     dyoung   2421: }
                   2422:
                   2423: static void
                   2424: vfs_shutdown1(struct lwp *l)
                   2425: {
                   2426:
                   2427:        vfs_sync_all(l);
1.73      thorpej  2428:
1.84      thorpej  2429:        /*
                   2430:         * If we've panic'd, don't make the situation potentially
                   2431:         * worse by unmounting the file systems.
                   2432:         */
                   2433:        if (panicstr != NULL)
                   2434:                return;
                   2435:
                   2436:        /* Release inodes held by texts before update. */
1.73      thorpej  2437: #ifdef notdef
1.84      thorpej  2438:        vnshutdown();
1.73      thorpej  2439: #endif
1.84      thorpej  2440:        /* Unmount file systems. */
1.256     christos 2441:        vfs_unmountall(l);
1.58      thorpej  2442: }
                   2443:
                   2444: /*
1.384     jmcneill 2445:  * Print a list of supported file system types (used by vfs_mountroot)
                   2446:  */
                   2447: static void
                   2448: vfs_print_fstypes(void)
                   2449: {
                   2450:        struct vfsops *v;
                   2451:        int cnt = 0;
                   2452:
                   2453:        mutex_enter(&vfs_list_lock);
                   2454:        LIST_FOREACH(v, &vfs_list, vfs_list)
                   2455:                ++cnt;
                   2456:        mutex_exit(&vfs_list_lock);
                   2457:
                   2458:        if (cnt == 0) {
                   2459:                printf("WARNING: No file system modules have been loaded.\n");
                   2460:                return;
                   2461:        }
                   2462:
                   2463:        printf("Supported file systems:");
                   2464:        mutex_enter(&vfs_list_lock);
                   2465:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   2466:                printf(" %s", v->vfs_name);
                   2467:        }
                   2468:        mutex_exit(&vfs_list_lock);
                   2469:        printf("\n");
                   2470: }
                   2471:
                   2472: /*
1.58      thorpej  2473:  * Mount the root file system.  If the operator didn't specify a
                   2474:  * file system to use, try all possible file systems until one
                   2475:  * succeeds.
                   2476:  */
                   2477: int
1.247     thorpej  2478: vfs_mountroot(void)
1.58      thorpej  2479: {
1.79      thorpej  2480:        struct vfsops *v;
1.239     mycroft  2481:        int error = ENODEV;
1.58      thorpej  2482:
                   2483:        if (root_device == NULL)
                   2484:                panic("vfs_mountroot: root device unknown");
                   2485:
1.264     thorpej  2486:        switch (device_class(root_device)) {
1.58      thorpej  2487:        case DV_IFNET:
                   2488:                if (rootdev != NODEV)
1.173     thorpej  2489:                        panic("vfs_mountroot: rootdev set for DV_IFNET "
1.365     christos 2490:                            "(0x%llx -> %llu,%llu)",
                   2491:                            (unsigned long long)rootdev,
                   2492:                            (unsigned long long)major(rootdev),
                   2493:                            (unsigned long long)minor(rootdev));
1.58      thorpej  2494:                break;
                   2495:
                   2496:        case DV_DISK:
                   2497:                if (rootdev == NODEV)
                   2498:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239     mycroft  2499:                if (bdevvp(rootdev, &rootvp))
                   2500:                        panic("vfs_mountroot: can't get vnode for rootdev");
1.306     pooka    2501:                error = VOP_OPEN(rootvp, FREAD, FSCRED);
1.239     mycroft  2502:                if (error) {
                   2503:                        printf("vfs_mountroot: can't open root device\n");
                   2504:                        return (error);
                   2505:                }
1.58      thorpej  2506:                break;
                   2507:
1.390     pooka    2508:        case DV_VIRTUAL:
                   2509:                break;
                   2510:
1.58      thorpej  2511:        default:
                   2512:                printf("%s: inappropriate for root file system\n",
1.336     cegger   2513:                    device_xname(root_device));
1.58      thorpej  2514:                return (ENODEV);
                   2515:        }
                   2516:
                   2517:        /*
1.362     pgoyette 2518:         * If user specified a root fs type, use it.  Make sure the
                   2519:         * specified type exists and has a mount_root()
1.58      thorpej  2520:         */
1.362     pgoyette 2521:        if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
                   2522:                v = vfs_getopsbyname(rootfstype);
                   2523:                error = EFTYPE;
                   2524:                if (v != NULL) {
                   2525:                        if (v->vfs_mountroot != NULL) {
                   2526:                                error = (v->vfs_mountroot)();
                   2527:                        }
                   2528:                        v->vfs_refcount--;
                   2529:                }
1.239     mycroft  2530:                goto done;
                   2531:        }
1.58      thorpej  2532:
                   2533:        /*
                   2534:         * Try each file system currently configured into the kernel.
                   2535:         */
1.302     ad       2536:        mutex_enter(&vfs_list_lock);
1.220     lukem    2537:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2538:                if (v->vfs_mountroot == NULL)
1.58      thorpej  2539:                        continue;
                   2540: #ifdef DEBUG
1.197     thorpej  2541:                aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58      thorpej  2542: #endif
1.302     ad       2543:                v->vfs_refcount++;
                   2544:                mutex_exit(&vfs_list_lock);
1.239     mycroft  2545:                error = (*v->vfs_mountroot)();
1.302     ad       2546:                mutex_enter(&vfs_list_lock);
                   2547:                v->vfs_refcount--;
1.239     mycroft  2548:                if (!error) {
1.197     thorpej  2549:                        aprint_normal("root file system type: %s\n",
                   2550:                            v->vfs_name);
1.79      thorpej  2551:                        break;
1.58      thorpej  2552:                }
                   2553:        }
1.302     ad       2554:        mutex_exit(&vfs_list_lock);
1.58      thorpej  2555:
1.79      thorpej  2556:        if (v == NULL) {
1.384     jmcneill 2557:                vfs_print_fstypes();
1.336     cegger   2558:                printf("no file system for %s", device_xname(root_device));
1.264     thorpej  2559:                if (device_class(root_device) == DV_DISK)
1.365     christos 2560:                        printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1.79      thorpej  2561:                printf("\n");
1.239     mycroft  2562:                error = EFTYPE;
1.79      thorpej  2563:        }
1.239     mycroft  2564:
                   2565: done:
1.264     thorpej  2566:        if (error && device_class(root_device) == DV_DISK) {
1.306     pooka    2567:                VOP_CLOSE(rootvp, FREAD, FSCRED);
1.239     mycroft  2568:                vrele(rootvp);
                   2569:        }
1.391   ! pooka    2570:        if (error == 0) {
        !          2571:                extern struct cwdinfo cwdi0;
        !          2572:
        !          2573:                CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS;
        !          2574:                CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++;
        !          2575:
        !          2576:                /*
        !          2577:                 * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
        !          2578:                 * reference it.
        !          2579:                 */
        !          2580:                error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode);
        !          2581:                if (error)
        !          2582:                        panic("cannot find root vnode, error=%d", error);
        !          2583:                cwdi0.cwdi_cdir = rootvnode;
        !          2584:                VREF(cwdi0.cwdi_cdir);
        !          2585:                VOP_UNLOCK(rootvnode, 0);
        !          2586:                cwdi0.cwdi_rdir = NULL;
        !          2587:
        !          2588:                /*
        !          2589:                 * Now that root is mounted, we can fixup initproc's CWD
        !          2590:                 * info.  All other processes are kthreads, which merely
        !          2591:                 * share proc0's CWD info.
        !          2592:                 */
        !          2593:                initproc->p_cwdi->cwdi_cdir = rootvnode;
        !          2594:                VREF(initproc->p_cwdi->cwdi_cdir);
        !          2595:                initproc->p_cwdi->cwdi_rdir = NULL;
        !          2596:        }
1.239     mycroft  2597:        return (error);
1.58      thorpej  2598: }
1.326     ad       2599:
                   2600: /*
1.353     pooka    2601:  * Get a new unique fsid
1.326     ad       2602:  */
1.353     pooka    2603: void
                   2604: vfs_getnewfsid(struct mount *mp)
1.326     ad       2605: {
1.353     pooka    2606:        static u_short xxxfs_mntid;
                   2607:        fsid_t tfsid;
                   2608:        int mtype;
1.326     ad       2609:
1.353     pooka    2610:        mutex_enter(&mntid_lock);
                   2611:        mtype = makefstype(mp->mnt_op->vfs_name);
                   2612:        mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
                   2613:        mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
                   2614:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
                   2615:        if (xxxfs_mntid == 0)
                   2616:                ++xxxfs_mntid;
                   2617:        tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
                   2618:        tfsid.__fsid_val[1] = mtype;
                   2619:        if (!CIRCLEQ_EMPTY(&mountlist)) {
                   2620:                while (vfs_getvfs(&tfsid)) {
                   2621:                        tfsid.__fsid_val[0]++;
                   2622:                        xxxfs_mntid++;
                   2623:                }
                   2624:        }
                   2625:        mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
                   2626:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
                   2627:        mutex_exit(&mntid_lock);
                   2628: }
1.326     ad       2629:
1.353     pooka    2630: /*
                   2631:  * Make a 'unique' number from a mount type name.
                   2632:  */
                   2633: long
                   2634: makefstype(const char *type)
                   2635: {
                   2636:        long rv;
1.326     ad       2637:
1.353     pooka    2638:        for (rv = 0; *type; type++) {
                   2639:                rv <<= 2;
                   2640:                rv ^= *type;
                   2641:        }
                   2642:        return rv;
                   2643: }
                   2644:
                   2645: /*
                   2646:  * Set vnode attributes to VNOVAL
                   2647:  */
                   2648: void
                   2649: vattr_null(struct vattr *vap)
                   2650: {
                   2651:
                   2652:        vap->va_type = VNON;
                   2653:
                   2654:        /*
                   2655:         * Assign individually so that it is safe even if size and
                   2656:         * sign of each member are varied.
                   2657:         */
                   2658:        vap->va_mode = VNOVAL;
                   2659:        vap->va_nlink = VNOVAL;
                   2660:        vap->va_uid = VNOVAL;
                   2661:        vap->va_gid = VNOVAL;
                   2662:        vap->va_fsid = VNOVAL;
                   2663:        vap->va_fileid = VNOVAL;
                   2664:        vap->va_size = VNOVAL;
                   2665:        vap->va_blocksize = VNOVAL;
                   2666:        vap->va_atime.tv_sec =
                   2667:            vap->va_mtime.tv_sec =
                   2668:            vap->va_ctime.tv_sec =
                   2669:            vap->va_birthtime.tv_sec = VNOVAL;
                   2670:        vap->va_atime.tv_nsec =
                   2671:            vap->va_mtime.tv_nsec =
                   2672:            vap->va_ctime.tv_nsec =
                   2673:            vap->va_birthtime.tv_nsec = VNOVAL;
                   2674:        vap->va_gen = VNOVAL;
                   2675:        vap->va_flags = VNOVAL;
                   2676:        vap->va_rdev = VNOVAL;
                   2677:        vap->va_bytes = VNOVAL;
                   2678:        vap->va_vaflags = 0;
                   2679: }
                   2680:
                   2681: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
                   2682: #define ARRAY_PRINT(idx, arr) \
1.370     yamt     2683:     ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1.353     pooka    2684:
                   2685: const char * const vnode_tags[] = { VNODE_TAGS };
                   2686: const char * const vnode_types[] = { VNODE_TYPES };
                   2687: const char vnode_flagbits[] = VNODE_FLAGBITS;
                   2688:
                   2689: /*
                   2690:  * Print out a description of a vnode.
                   2691:  */
                   2692: void
                   2693: vprint(const char *label, struct vnode *vp)
                   2694: {
                   2695:        struct vnlock *vl;
                   2696:        char bf[96];
                   2697:        int flag;
                   2698:
                   2699:        vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
                   2700:        flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
1.361     christos 2701:        snprintb(bf, sizeof(bf), vnode_flagbits, flag);
1.353     pooka    2702:
                   2703:        if (label != NULL)
                   2704:                printf("%s: ", label);
                   2705:        printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
                   2706:            "usecount %d, writecount %d, holdcount %d\n"
                   2707:            "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
                   2708:            vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
                   2709:            ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
                   2710:            vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
                   2711:            vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
                   2712:        if (vp->v_data != NULL) {
                   2713:                printf("\t");
                   2714:                VOP_PRINT(vp);
                   2715:        }
                   2716: }
                   2717:
                   2718: #ifdef DEBUG
                   2719: /*
                   2720:  * List all of the locked vnodes in the system.
                   2721:  * Called when debugging the kernel.
                   2722:  */
                   2723: void
                   2724: printlockedvnodes(void)
                   2725: {
                   2726:        struct mount *mp, *nmp;
                   2727:        struct vnode *vp;
                   2728:
                   2729:        printf("Locked vnodes\n");
                   2730:        mutex_enter(&mountlist_lock);
                   2731:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
                   2732:             mp = nmp) {
                   2733:                if (vfs_busy(mp, &nmp)) {
                   2734:                        continue;
                   2735:                }
                   2736:                TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   2737:                        if (VOP_ISLOCKED(vp))
                   2738:                                vprint(NULL, vp);
                   2739:                }
                   2740:                mutex_enter(&mountlist_lock);
                   2741:                vfs_unbusy(mp, false, &nmp);
                   2742:        }
                   2743:        mutex_exit(&mountlist_lock);
                   2744: }
                   2745: #endif
                   2746:
1.380     elad     2747: /* Deprecated. Kept for KPI compatibility. */
1.353     pooka    2748: int
                   2749: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
                   2750:     mode_t acc_mode, kauth_cred_t cred)
                   2751: {
                   2752:
1.380     elad     2753: #ifdef DIAGNOSTIC
1.381     elad     2754:        printf("vaccess: deprecated interface used.\n");
1.380     elad     2755: #endif /* DIAGNOSTIC */
1.353     pooka    2756:
1.380     elad     2757:        return genfs_can_access(type, file_mode, uid, gid, acc_mode, cred);
1.353     pooka    2758: }
                   2759:
                   2760: /*
                   2761:  * Given a file system name, look up the vfsops for that
                   2762:  * file system, or return NULL if file system isn't present
                   2763:  * in the kernel.
                   2764:  */
                   2765: struct vfsops *
                   2766: vfs_getopsbyname(const char *name)
                   2767: {
                   2768:        struct vfsops *v;
                   2769:
                   2770:        mutex_enter(&vfs_list_lock);
                   2771:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   2772:                if (strcmp(v->vfs_name, name) == 0)
                   2773:                        break;
                   2774:        }
                   2775:        if (v != NULL)
                   2776:                v->vfs_refcount++;
                   2777:        mutex_exit(&vfs_list_lock);
                   2778:
                   2779:        return (v);
                   2780: }
                   2781:
                   2782: void
                   2783: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
                   2784: {
                   2785:        const struct statvfs *mbp;
                   2786:
                   2787:        if (sbp == (mbp = &mp->mnt_stat))
                   2788:                return;
                   2789:
                   2790:        (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
                   2791:        sbp->f_fsid = mbp->f_fsid;
                   2792:        sbp->f_owner = mbp->f_owner;
                   2793:        sbp->f_flag = mbp->f_flag;
                   2794:        sbp->f_syncwrites = mbp->f_syncwrites;
                   2795:        sbp->f_asyncwrites = mbp->f_asyncwrites;
                   2796:        sbp->f_syncreads = mbp->f_syncreads;
                   2797:        sbp->f_asyncreads = mbp->f_asyncreads;
                   2798:        (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
                   2799:        (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
                   2800:            sizeof(sbp->f_fstypename));
                   2801:        (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
                   2802:            sizeof(sbp->f_mntonname));
                   2803:        (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
                   2804:            sizeof(sbp->f_mntfromname));
                   2805:        sbp->f_namemax = mbp->f_namemax;
                   2806: }
                   2807:
                   2808: int
                   2809: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
                   2810:     const char *vfsname, struct mount *mp, struct lwp *l)
                   2811: {
                   2812:        int error;
                   2813:        size_t size;
                   2814:        struct statvfs *sfs = &mp->mnt_stat;
                   2815:        int (*fun)(const void *, void *, size_t, size_t *);
                   2816:
                   2817:        (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
                   2818:            sizeof(mp->mnt_stat.f_fstypename));
                   2819:
                   2820:        if (onp) {
                   2821:                struct cwdinfo *cwdi = l->l_proc->p_cwdi;
                   2822:                fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
                   2823:                if (cwdi->cwdi_rdir != NULL) {
                   2824:                        size_t len;
                   2825:                        char *bp;
                   2826:                        char *path = PNBUF_GET();
                   2827:
                   2828:                        bp = path + MAXPATHLEN;
                   2829:                        *--bp = '\0';
                   2830:                        rw_enter(&cwdi->cwdi_lock, RW_READER);
                   2831:                        error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
                   2832:                            path, MAXPATHLEN / 2, 0, l);
                   2833:                        rw_exit(&cwdi->cwdi_lock);
                   2834:                        if (error) {
                   2835:                                PNBUF_PUT(path);
                   2836:                                return error;
                   2837:                        }
                   2838:
                   2839:                        len = strlen(bp);
                   2840:                        if (len > sizeof(sfs->f_mntonname) - 1)
                   2841:                                len = sizeof(sfs->f_mntonname) - 1;
                   2842:                        (void)strncpy(sfs->f_mntonname, bp, len);
                   2843:                        PNBUF_PUT(path);
                   2844:
                   2845:                        if (len < sizeof(sfs->f_mntonname) - 1) {
                   2846:                                error = (*fun)(onp, &sfs->f_mntonname[len],
                   2847:                                    sizeof(sfs->f_mntonname) - len - 1, &size);
                   2848:                                if (error)
                   2849:                                        return error;
                   2850:                                size += len;
                   2851:                        } else {
                   2852:                                size = len;
                   2853:                        }
                   2854:                } else {
                   2855:                        error = (*fun)(onp, &sfs->f_mntonname,
                   2856:                            sizeof(sfs->f_mntonname) - 1, &size);
                   2857:                        if (error)
                   2858:                                return error;
                   2859:                }
                   2860:                (void)memset(sfs->f_mntonname + size, 0,
                   2861:                    sizeof(sfs->f_mntonname) - size);
                   2862:        }
                   2863:
                   2864:        if (fromp) {
                   2865:                fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
                   2866:                error = (*fun)(fromp, sfs->f_mntfromname,
                   2867:                    sizeof(sfs->f_mntfromname) - 1, &size);
                   2868:                if (error)
                   2869:                        return error;
                   2870:                (void)memset(sfs->f_mntfromname + size, 0,
                   2871:                    sizeof(sfs->f_mntfromname) - size);
                   2872:        }
                   2873:        return 0;
                   2874: }
                   2875:
                   2876: void
                   2877: vfs_timestamp(struct timespec *ts)
                   2878: {
                   2879:
                   2880:        nanotime(ts);
                   2881: }
                   2882:
                   2883: time_t rootfstime;                     /* recorded root fs time, if known */
                   2884: void
                   2885: setrootfstime(time_t t)
                   2886: {
                   2887:        rootfstime = t;
                   2888: }
                   2889:
                   2890: /*
                   2891:  * Sham lock manager for vnodes.  This is a temporary measure.
                   2892:  */
                   2893: int
                   2894: vlockmgr(struct vnlock *vl, int flags)
                   2895: {
                   2896:
                   2897:        KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
                   2898:
                   2899:        switch (flags & LK_TYPE_MASK) {
                   2900:        case LK_SHARED:
                   2901:                if (rw_tryenter(&vl->vl_lock, RW_READER)) {
                   2902:                        return 0;
                   2903:                }
                   2904:                if ((flags & LK_NOWAIT) != 0) {
                   2905:                        return EBUSY;
                   2906:                }
                   2907:                rw_enter(&vl->vl_lock, RW_READER);
                   2908:                return 0;
                   2909:
                   2910:        case LK_EXCLUSIVE:
                   2911:                if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
                   2912:                        return 0;
                   2913:                }
                   2914:                if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
                   2915:                    rw_write_held(&vl->vl_lock)) {
                   2916:                        vl->vl_recursecnt++;
                   2917:                        return 0;
                   2918:                }
                   2919:                if ((flags & LK_NOWAIT) != 0) {
                   2920:                        return EBUSY;
                   2921:                }
                   2922:                rw_enter(&vl->vl_lock, RW_WRITER);
                   2923:                return 0;
1.326     ad       2924:
                   2925:        case LK_RELEASE:
                   2926:                if (vl->vl_recursecnt != 0) {
                   2927:                        KASSERT(rw_write_held(&vl->vl_lock));
                   2928:                        vl->vl_recursecnt--;
                   2929:                        return 0;
                   2930:                }
                   2931:                rw_exit(&vl->vl_lock);
                   2932:                return 0;
                   2933:
                   2934:        default:
                   2935:                panic("vlockmgr: flags %x", flags);
                   2936:        }
                   2937: }
                   2938:
                   2939: int
                   2940: vlockstatus(struct vnlock *vl)
                   2941: {
                   2942:
                   2943:        if (rw_write_held(&vl->vl_lock)) {
                   2944:                return LK_EXCLUSIVE;
                   2945:        }
                   2946:        if (rw_read_held(&vl->vl_lock)) {
                   2947:                return LK_SHARED;
                   2948:        }
                   2949:        return 0;
                   2950: }
1.353     pooka    2951:
                   2952: /*
                   2953:  * mount_specific_key_create --
                   2954:  *     Create a key for subsystem mount-specific data.
                   2955:  */
                   2956: int
                   2957: mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
                   2958: {
                   2959:
                   2960:        return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
                   2961: }
                   2962:
                   2963: /*
                   2964:  * mount_specific_key_delete --
                   2965:  *     Delete a key for subsystem mount-specific data.
                   2966:  */
                   2967: void
                   2968: mount_specific_key_delete(specificdata_key_t key)
                   2969: {
                   2970:
                   2971:        specificdata_key_delete(mount_specificdata_domain, key);
                   2972: }
                   2973:
                   2974: /*
                   2975:  * mount_initspecific --
                   2976:  *     Initialize a mount's specificdata container.
                   2977:  */
                   2978: void
                   2979: mount_initspecific(struct mount *mp)
                   2980: {
                   2981:        int error;
                   2982:
                   2983:        error = specificdata_init(mount_specificdata_domain,
                   2984:                                  &mp->mnt_specdataref);
                   2985:        KASSERT(error == 0);
                   2986: }
                   2987:
                   2988: /*
                   2989:  * mount_finispecific --
                   2990:  *     Finalize a mount's specificdata container.
                   2991:  */
                   2992: void
                   2993: mount_finispecific(struct mount *mp)
                   2994: {
                   2995:
                   2996:        specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
                   2997: }
                   2998:
                   2999: /*
                   3000:  * mount_getspecific --
                   3001:  *     Return mount-specific data corresponding to the specified key.
                   3002:  */
                   3003: void *
                   3004: mount_getspecific(struct mount *mp, specificdata_key_t key)
                   3005: {
                   3006:
                   3007:        return (specificdata_getspecific(mount_specificdata_domain,
                   3008:                                         &mp->mnt_specdataref, key));
                   3009: }
                   3010:
                   3011: /*
                   3012:  * mount_setspecific --
                   3013:  *     Set mount-specific data corresponding to the specified key.
                   3014:  */
                   3015: void
                   3016: mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
                   3017: {
                   3018:
                   3019:        specificdata_setspecific(mount_specificdata_domain,
                   3020:                                 &mp->mnt_specdataref, key, data);
                   3021: }
                   3022:
                   3023: int
                   3024: VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
                   3025: {
                   3026:        int error;
                   3027:
                   3028:        KERNEL_LOCK(1, NULL);
                   3029:        error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
                   3030:        KERNEL_UNLOCK_ONE(NULL);
                   3031:
                   3032:        return error;
                   3033: }
                   3034:
                   3035: int
                   3036: VFS_START(struct mount *mp, int a)
                   3037: {
                   3038:        int error;
                   3039:
                   3040:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3041:                KERNEL_LOCK(1, NULL);
                   3042:        }
                   3043:        error = (*(mp->mnt_op->vfs_start))(mp, a);
                   3044:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3045:                KERNEL_UNLOCK_ONE(NULL);
                   3046:        }
                   3047:
                   3048:        return error;
                   3049: }
                   3050:
                   3051: int
                   3052: VFS_UNMOUNT(struct mount *mp, int a)
                   3053: {
                   3054:        int error;
                   3055:
                   3056:        KERNEL_LOCK(1, NULL);
                   3057:        error = (*(mp->mnt_op->vfs_unmount))(mp, a);
                   3058:        KERNEL_UNLOCK_ONE(NULL);
                   3059:
                   3060:        return error;
                   3061: }
                   3062:
                   3063: int
                   3064: VFS_ROOT(struct mount *mp, struct vnode **a)
                   3065: {
                   3066:        int error;
                   3067:
                   3068:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3069:                KERNEL_LOCK(1, NULL);
                   3070:        }
                   3071:        error = (*(mp->mnt_op->vfs_root))(mp, a);
                   3072:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3073:                KERNEL_UNLOCK_ONE(NULL);
                   3074:        }
                   3075:
                   3076:        return error;
                   3077: }
                   3078:
                   3079: int
                   3080: VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
                   3081: {
                   3082:        int error;
                   3083:
                   3084:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3085:                KERNEL_LOCK(1, NULL);
                   3086:        }
                   3087:        error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
                   3088:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3089:                KERNEL_UNLOCK_ONE(NULL);
                   3090:        }
                   3091:
                   3092:        return error;
                   3093: }
                   3094:
                   3095: int
                   3096: VFS_STATVFS(struct mount *mp, struct statvfs *a)
                   3097: {
                   3098:        int error;
                   3099:
                   3100:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3101:                KERNEL_LOCK(1, NULL);
                   3102:        }
                   3103:        error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
                   3104:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3105:                KERNEL_UNLOCK_ONE(NULL);
                   3106:        }
                   3107:
                   3108:        return error;
                   3109: }
                   3110:
                   3111: int
                   3112: VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
                   3113: {
                   3114:        int error;
                   3115:
                   3116:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3117:                KERNEL_LOCK(1, NULL);
                   3118:        }
                   3119:        error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
                   3120:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3121:                KERNEL_UNLOCK_ONE(NULL);
                   3122:        }
                   3123:
                   3124:        return error;
                   3125: }
                   3126:
                   3127: int
                   3128: VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
                   3129: {
                   3130:        int error;
                   3131:
                   3132:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3133:                KERNEL_LOCK(1, NULL);
                   3134:        }
                   3135:        error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
                   3136:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3137:                KERNEL_UNLOCK_ONE(NULL);
                   3138:        }
                   3139:
                   3140:        return error;
                   3141: }
                   3142:
                   3143: int
                   3144: VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
                   3145: {
                   3146:        int error;
                   3147:
                   3148:        if ((vp->v_vflag & VV_MPSAFE) == 0) {
                   3149:                KERNEL_LOCK(1, NULL);
                   3150:        }
                   3151:        error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
                   3152:        if ((vp->v_vflag & VV_MPSAFE) == 0) {
                   3153:                KERNEL_UNLOCK_ONE(NULL);
                   3154:        }
                   3155:
                   3156:        return error;
                   3157: }
                   3158:
                   3159: int
                   3160: VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
                   3161: {
                   3162:        int error;
                   3163:
                   3164:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3165:                KERNEL_LOCK(1, NULL);
                   3166:        }
                   3167:        error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
                   3168:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3169:                KERNEL_UNLOCK_ONE(NULL);
                   3170:        }
                   3171:
                   3172:        return error;
                   3173: }
                   3174:
                   3175: int
                   3176: VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
                   3177: {
                   3178:        int error;
                   3179:
                   3180:        KERNEL_LOCK(1, NULL);           /* XXXSMP check ffs */
                   3181:        error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
                   3182:        KERNEL_UNLOCK_ONE(NULL);        /* XXX */
                   3183:
                   3184:        return error;
                   3185: }
                   3186:
                   3187: int
                   3188: VFS_SUSPENDCTL(struct mount *mp, int a)
                   3189: {
                   3190:        int error;
                   3191:
                   3192:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3193:                KERNEL_LOCK(1, NULL);
                   3194:        }
                   3195:        error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
                   3196:        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                   3197:                KERNEL_UNLOCK_ONE(NULL);
                   3198:        }
                   3199:
                   3200:        return error;
                   3201: }
                   3202:
1.378     pooka    3203: #if defined(DDB) || defined(DEBUGPRINT)
1.353     pooka    3204: static const char buf_flagbits[] = BUF_FLAGBITS;
                   3205:
                   3206: void
                   3207: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
                   3208: {
                   3209:        char bf[1024];
                   3210:
                   3211:        (*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
                   3212:            PRIx64 " dev 0x%x\n",
                   3213:            bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
                   3214:
1.361     christos 3215:        snprintb(bf, sizeof(bf),
                   3216:            buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
1.353     pooka    3217:        (*pr)("  error %d flags 0x%s\n", bp->b_error, bf);
                   3218:
                   3219:        (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
                   3220:                  bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.369     ad       3221:        (*pr)("  data %p saveaddr %p\n",
                   3222:                  bp->b_data, bp->b_saveaddr);
1.353     pooka    3223:        (*pr)("  iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
                   3224: }
                   3225:
                   3226:
                   3227: void
                   3228: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
                   3229: {
                   3230:        char bf[256];
                   3231:
                   3232:        uvm_object_printit(&vp->v_uobj, full, pr);
1.361     christos 3233:        snprintb(bf, sizeof(bf),
                   3234:            vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
1.353     pooka    3235:        (*pr)("\nVNODE flags %s\n", bf);
                   3236:        (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
                   3237:              vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
                   3238:
                   3239:        (*pr)("data %p writecount %ld holdcnt %ld\n",
                   3240:              vp->v_data, vp->v_writecount, vp->v_holdcnt);
                   3241:
                   3242:        (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
                   3243:              ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
                   3244:              ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
                   3245:              vp->v_mount, vp->v_mountedhere);
                   3246:
                   3247:        (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
                   3248:
                   3249:        if (full) {
                   3250:                struct buf *bp;
                   3251:
                   3252:                (*pr)("clean bufs:\n");
                   3253:                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                   3254:                        (*pr)(" bp %p\n", bp);
                   3255:                        vfs_buf_print(bp, full, pr);
                   3256:                }
                   3257:
                   3258:                (*pr)("dirty bufs:\n");
                   3259:                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                   3260:                        (*pr)(" bp %p\n", bp);
                   3261:                        vfs_buf_print(bp, full, pr);
                   3262:                }
                   3263:        }
                   3264: }
                   3265:
                   3266: void
                   3267: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
                   3268: {
                   3269:        char sbuf[256];
                   3270:
                   3271:        (*pr)("vnodecovered = %p syncer = %p data = %p\n",
                   3272:                        mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
                   3273:
                   3274:        (*pr)("fs_bshift %d dev_bshift = %d\n",
                   3275:                        mp->mnt_fs_bshift,mp->mnt_dev_bshift);
                   3276:
1.361     christos 3277:        snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1.353     pooka    3278:        (*pr)("flag = %s\n", sbuf);
                   3279:
1.361     christos 3280:        snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1.353     pooka    3281:        (*pr)("iflag = %s\n", sbuf);
                   3282:
                   3283:        (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
                   3284:            &mp->mnt_unmounting, &mp->mnt_updating);
                   3285:
                   3286:        (*pr)("statvfs cache:\n");
                   3287:        (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
                   3288:        (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
                   3289:        (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
                   3290:
                   3291:        (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
                   3292:        (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
                   3293:        (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
                   3294:        (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
                   3295:
                   3296:        (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
                   3297:        (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
                   3298:        (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
                   3299:        (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
                   3300:
                   3301:        (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
                   3302:                        mp->mnt_stat.f_fsidx.__fsid_val[0],
                   3303:                        mp->mnt_stat.f_fsidx.__fsid_val[1]);
                   3304:
                   3305:        (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
                   3306:        (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
                   3307:
1.361     christos 3308:        snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
                   3309:
1.353     pooka    3310:        (*pr)("\tflag = %s\n",sbuf);
                   3311:        (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
                   3312:        (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
                   3313:        (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
                   3314:        (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
                   3315:        (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
                   3316:        (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
                   3317:        (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
                   3318:
                   3319:        {
                   3320:                int cnt = 0;
                   3321:                struct vnode *vp;
                   3322:                (*pr)("locked vnodes =");
                   3323:                TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   3324:                        if (VOP_ISLOCKED(vp)) {
                   3325:                                if ((++cnt % 6) == 0) {
                   3326:                                        (*pr)(" %p,\n\t", vp);
                   3327:                                } else {
                   3328:                                        (*pr)(" %p,", vp);
                   3329:                                }
                   3330:                        }
                   3331:                }
                   3332:                (*pr)("\n");
                   3333:        }
                   3334:
                   3335:        if (full) {
                   3336:                int cnt = 0;
                   3337:                struct vnode *vp;
                   3338:                (*pr)("all vnodes =");
                   3339:                TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   3340:                        if (!TAILQ_NEXT(vp, v_mntvnodes)) {
                   3341:                                (*pr)(" %p", vp);
                   3342:                        } else if ((++cnt % 6) == 0) {
                   3343:                                (*pr)(" %p,\n\t", vp);
                   3344:                        } else {
                   3345:                                (*pr)(" %p,", vp);
                   3346:                        }
                   3347:                }
                   3348:                (*pr)("\n", vp);
                   3349:        }
                   3350: }
1.378     pooka    3351: #endif /* DDB || DEBUGPRINT */
1.372     elad     3352:
1.385     elad     3353: /*
                   3354:  * Check if a device pointed to by vp is mounted.
                   3355:  *
                   3356:  * Returns:
                   3357:  *   EINVAL    if it's not a disk
                   3358:  *   EBUSY     if it's a disk and mounted
                   3359:  *   0         if it's a disk and not mounted
                   3360:  */
                   3361: int
                   3362: rawdev_mounted(struct vnode *vp, struct vnode **bvpp)
                   3363: {
                   3364:        struct vnode *bvp;
                   3365:        dev_t dev;
                   3366:        int d_type;
                   3367:
                   3368:        bvp = NULL;
                   3369:        dev = vp->v_rdev;
                   3370:        d_type = D_OTHER;
                   3371:
                   3372:        if (iskmemvp(vp))
                   3373:                return EINVAL;
                   3374:
                   3375:        switch (vp->v_type) {
                   3376:        case VCHR: {
                   3377:                const struct cdevsw *cdev;
                   3378:
                   3379:                cdev = cdevsw_lookup(dev);
                   3380:                if (cdev != NULL) {
                   3381:                        dev_t blkdev;
                   3382:
                   3383:                        blkdev = devsw_chr2blk(dev);
                   3384:                        if (blkdev != NODEV) {
                   3385:                                vfinddev(blkdev, VBLK, &bvp);
                   3386:                                if (bvp != NULL)
                   3387:                                        d_type = (cdev->d_flag & D_TYPEMASK);
                   3388:                        }
                   3389:                }
                   3390:
                   3391:                break;
                   3392:                }
                   3393:
                   3394:        case VBLK: {
                   3395:                const struct bdevsw *bdev;
                   3396:
                   3397:                bdev = bdevsw_lookup(dev);
                   3398:                if (bdev != NULL)
                   3399:                        d_type = (bdev->d_flag & D_TYPEMASK);
                   3400:
                   3401:                bvp = vp;
                   3402:
                   3403:                break;
                   3404:                }
                   3405:
                   3406:        default:
                   3407:                break;
                   3408:        }
                   3409:
                   3410:        if (d_type != D_DISK)
                   3411:                return EINVAL;
                   3412:
                   3413:        if (bvpp != NULL)
                   3414:                *bvpp = bvp;
                   3415:
                   3416:        /*
                   3417:         * XXX: This is bogus. We should be failing the request
                   3418:         * XXX: not only if this specific slice is mounted, but
                   3419:         * XXX: if it's on a disk with any other mounted slice.
                   3420:         */
                   3421:        if (vfs_mountedon(bvp))
                   3422:                return EBUSY;
                   3423:
                   3424:        return 0;
                   3425: }

CVSweb <webmaster@jp.NetBSD.org>