[BACK]Return to vfs_mount.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_mount.c, Revision 1.71

1.71    ! christos    1: /*     $NetBSD: vfs_mount.c,v 1.70 2019/02/20 10:08:37 hannken Exp $   */
1.1       rmind       2:
                      3: /*-
1.2       rmind       4:  * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
1.1       rmind       5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  *
                     20:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     21:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     22:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     23:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     24:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     25:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     26:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     27:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     28:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     29:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     30:  * POSSIBILITY OF SUCH DAMAGE.
                     31:  */
                     32:
                     33: /*
                     34:  * Copyright (c) 1989, 1993
                     35:  *     The Regents of the University of California.  All rights reserved.
                     36:  * (c) UNIX System Laboratories, Inc.
                     37:  * All or some portions of this file are derived from material licensed
                     38:  * to the University of California by American Telephone and Telegraph
                     39:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     40:  * the permission of UNIX System Laboratories, Inc.
                     41:  *
                     42:  * Redistribution and use in source and binary forms, with or without
                     43:  * modification, are permitted provided that the following conditions
                     44:  * are met:
                     45:  * 1. Redistributions of source code must retain the above copyright
                     46:  *    notice, this list of conditions and the following disclaimer.
                     47:  * 2. Redistributions in binary form must reproduce the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer in the
                     49:  *    documentation and/or other materials provided with the distribution.
                     50:  * 3. Neither the name of the University nor the names of its contributors
                     51:  *    may be used to endorse or promote products derived from this software
                     52:  *    without specific prior written permission.
                     53:  *
                     54:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     55:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     56:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     57:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     58:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     59:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     60:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     61:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     62:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     63:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     64:  * SUCH DAMAGE.
                     65:  *
                     66:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
                     67:  */
                     68:
                     69: #include <sys/cdefs.h>
1.71    ! christos   70: __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.70 2019/02/20 10:08:37 hannken Exp $");
1.1       rmind      71:
                     72: #include <sys/param.h>
                     73: #include <sys/kernel.h>
                     74:
                     75: #include <sys/atomic.h>
                     76: #include <sys/buf.h>
                     77: #include <sys/conf.h>
                     78: #include <sys/fcntl.h>
                     79: #include <sys/filedesc.h>
                     80: #include <sys/device.h>
                     81: #include <sys/kauth.h>
                     82: #include <sys/kmem.h>
                     83: #include <sys/module.h>
                     84: #include <sys/mount.h>
1.48      hannken    85: #include <sys/fstrans.h>
1.1       rmind      86: #include <sys/namei.h>
1.14      gson       87: #include <sys/extattr.h>
1.1       rmind      88: #include <sys/syscallargs.h>
                     89: #include <sys/sysctl.h>
                     90: #include <sys/systm.h>
1.5       dsl        91: #include <sys/vfs_syscalls.h>
1.41      hannken    92: #include <sys/vnode_impl.h>
1.1       rmind      93:
                     94: #include <miscfs/genfs/genfs.h>
                     95: #include <miscfs/specfs/specdev.h>
                     96:
1.52      hannken    97: enum mountlist_type {
                     98:        ME_MOUNT,
                     99:        ME_MARKER
                    100: };
                    101: struct mountlist_entry {
                    102:        TAILQ_ENTRY(mountlist_entry) me_list;   /* Mount list. */
                    103:        struct mount *me_mount;                 /* Actual mount if ME_MOUNT,
                    104:                                                   current mount else. */
                    105:        enum mountlist_type me_type;            /* Mount or marker. */
                    106: };
                    107: struct mount_iterator {
                    108:        struct mountlist_entry mi_entry;
                    109: };
                    110:
1.45      hannken   111: static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
                    112:     bool (*)(void *, struct vnode *), void *, bool);
                    113:
1.33      pooka     114: /* Root filesystem. */
1.1       rmind     115: vnode_t *                      rootvnode;
                    116:
                    117: /* Mounted filesystem list. */
1.54      hannken   118: static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
                    119: static kmutex_t                        mountlist_lock;
                    120: int vnode_offset_next_by_lru   /* XXX: ugly hack for pstat.c */
                    121:     = offsetof(vnode_impl_t, vi_lrulist.tqe_next);
1.1       rmind     122:
                    123: kmutex_t                       mntvnode_lock;
                    124: kmutex_t                       vfs_list_lock;
                    125:
                    126: static specificdata_domain_t   mount_specificdata_domain;
                    127: static kmutex_t                        mntid_lock;
                    128:
                    129: static kmutex_t                        mountgen_lock;
                    130: static uint64_t                        mountgen;
                    131:
                    132: void
                    133: vfs_mount_sysinit(void)
                    134: {
                    135:
1.24      christos  136:        TAILQ_INIT(&mountlist);
1.1       rmind     137:        mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
                    138:        mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
                    139:        mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
                    140:
                    141:        mount_specificdata_domain = specificdata_domain_create();
                    142:        mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
                    143:        mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
                    144:        mountgen = 0;
                    145: }
                    146:
                    147: struct mount *
                    148: vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
                    149: {
                    150:        struct mount *mp;
1.60      hannken   151:        int error __diagused;
1.1       rmind     152:
                    153:        mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
                    154:        mp->mnt_op = vfsops;
                    155:        mp->mnt_refcnt = 1;
                    156:        TAILQ_INIT(&mp->mnt_vnodelist);
                    157:        mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
                    158:        mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
                    159:        mp->mnt_vnodecovered = vp;
                    160:        mount_initspecific(mp);
1.69      hannken   161:
                    162:        error = fstrans_mount(mp);
                    163:        KASSERT(error == 0);
1.1       rmind     164:
                    165:        mutex_enter(&mountgen_lock);
                    166:        mp->mnt_gen = mountgen++;
                    167:        mutex_exit(&mountgen_lock);
                    168:
                    169:        return mp;
                    170: }
                    171:
                    172: /*
                    173:  * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
                    174:  * initialize a mount structure for it.
                    175:  *
                    176:  * Devname is usually updated by mount(8) after booting.
                    177:  */
                    178: int
                    179: vfs_rootmountalloc(const char *fstypename, const char *devname,
                    180:     struct mount **mpp)
                    181: {
                    182:        struct vfsops *vfsp = NULL;
                    183:        struct mount *mp;
1.57      hannken   184:        int error __diagused;
1.1       rmind     185:
                    186:        mutex_enter(&vfs_list_lock);
                    187:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
                    188:                if (!strncmp(vfsp->vfs_name, fstypename,
                    189:                    sizeof(mp->mnt_stat.f_fstypename)))
                    190:                        break;
                    191:        if (vfsp == NULL) {
                    192:                mutex_exit(&vfs_list_lock);
                    193:                return (ENODEV);
                    194:        }
                    195:        vfsp->vfs_refcount++;
                    196:        mutex_exit(&vfs_list_lock);
                    197:
                    198:        if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
                    199:                return ENOMEM;
1.57      hannken   200:        error = vfs_busy(mp);
                    201:        KASSERT(error == 0);
1.1       rmind     202:        mp->mnt_flag = MNT_RDONLY;
                    203:        (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
                    204:            sizeof(mp->mnt_stat.f_fstypename));
                    205:        mp->mnt_stat.f_mntonname[0] = '/';
                    206:        mp->mnt_stat.f_mntonname[1] = '\0';
                    207:        mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
                    208:            '\0';
                    209:        (void)copystr(devname, mp->mnt_stat.f_mntfromname,
                    210:            sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
                    211:        *mpp = mp;
                    212:        return 0;
                    213: }
                    214:
                    215: /*
                    216:  * vfs_getnewfsid: get a new unique fsid.
                    217:  */
                    218: void
                    219: vfs_getnewfsid(struct mount *mp)
                    220: {
                    221:        static u_short xxxfs_mntid;
                    222:        fsid_t tfsid;
                    223:        int mtype;
                    224:
                    225:        mutex_enter(&mntid_lock);
                    226:        mtype = makefstype(mp->mnt_op->vfs_name);
                    227:        mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
                    228:        mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
                    229:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
                    230:        if (xxxfs_mntid == 0)
                    231:                ++xxxfs_mntid;
                    232:        tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
                    233:        tfsid.__fsid_val[1] = mtype;
1.53      hannken   234:        while (vfs_getvfs(&tfsid)) {
                    235:                tfsid.__fsid_val[0]++;
                    236:                xxxfs_mntid++;
1.1       rmind     237:        }
                    238:        mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
                    239:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
                    240:        mutex_exit(&mntid_lock);
                    241: }
                    242:
                    243: /*
                    244:  * Lookup a mount point by filesystem identifier.
                    245:  *
                    246:  * XXX Needs to add a reference to the mount point.
                    247:  */
                    248: struct mount *
                    249: vfs_getvfs(fsid_t *fsid)
                    250: {
1.53      hannken   251:        mount_iterator_t *iter;
1.1       rmind     252:        struct mount *mp;
                    253:
1.53      hannken   254:        mountlist_iterator_init(&iter);
                    255:        while ((mp = mountlist_iterator_next(iter)) != NULL) {
1.1       rmind     256:                if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
                    257:                    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
1.53      hannken   258:                        mountlist_iterator_destroy(iter);
                    259:                        return mp;
1.1       rmind     260:                }
                    261:        }
1.53      hannken   262:        mountlist_iterator_destroy(iter);
1.1       rmind     263:        return NULL;
                    264: }
                    265:
                    266: /*
1.55      hannken   267:  * Take a reference to a mount structure.
                    268:  */
                    269: void
                    270: vfs_ref(struct mount *mp)
                    271: {
                    272:
                    273:        KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));
                    274:
                    275:        atomic_inc_uint(&mp->mnt_refcnt);
                    276: }
                    277:
                    278: /*
1.1       rmind     279:  * Drop a reference to a mount structure, freeing if the last reference.
                    280:  */
                    281: void
1.55      hannken   282: vfs_rele(struct mount *mp)
1.1       rmind     283: {
                    284:
                    285:        if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
                    286:                return;
                    287:        }
                    288:
                    289:        /*
                    290:         * Nothing else has visibility of the mount: we can now
                    291:         * free the data structures.
                    292:         */
                    293:        KASSERT(mp->mnt_refcnt == 0);
                    294:        specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
                    295:        mutex_destroy(&mp->mnt_updating);
                    296:        mutex_destroy(&mp->mnt_renamelock);
                    297:        if (mp->mnt_op != NULL) {
                    298:                vfs_delref(mp->mnt_op);
                    299:        }
1.70      hannken   300:        fstrans_unmount(mp);
                    301:        /*
                    302:         * Final free of mp gets done from fstrans_mount_dtor().
                    303:         *
                    304:         * Prevents this memory to be reused as a mount before
                    305:         * fstrans releases all references to it.
                    306:         */
1.1       rmind     307: }
                    308:
                    309: /*
                    310:  * Mark a mount point as busy, and gain a new reference to it.  Used to
                    311:  * prevent the file system from being unmounted during critical sections.
                    312:  *
1.18      mlelstv   313:  * vfs_busy can be called multiple times and by multiple threads
                    314:  * and must be accompanied by the same number of vfs_unbusy calls.
                    315:  *
1.1       rmind     316:  * => The caller must hold a pre-existing reference to the mount.
                    317:  * => Will fail if the file system is being unmounted, or is unmounted.
                    318:  */
1.58      hannken   319: static inline int
                    320: _vfs_busy(struct mount *mp, bool wait)
1.1       rmind     321: {
                    322:
                    323:        KASSERT(mp->mnt_refcnt > 0);
                    324:
1.58      hannken   325:        if (wait) {
1.66      hannken   326:                fstrans_start(mp);
1.61      hannken   327:        } else {
1.66      hannken   328:                if (fstrans_start_nowait(mp))
1.61      hannken   329:                        return EBUSY;
1.58      hannken   330:        }
1.1       rmind     331:        if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
1.61      hannken   332:                fstrans_done(mp);
1.1       rmind     333:                return ENOENT;
                    334:        }
1.55      hannken   335:        vfs_ref(mp);
1.1       rmind     336:        return 0;
                    337: }
                    338:
1.58      hannken   339: int
                    340: vfs_busy(struct mount *mp)
                    341: {
                    342:
                    343:        return _vfs_busy(mp, true);
                    344: }
                    345:
                    346: int
                    347: vfs_trybusy(struct mount *mp)
                    348: {
                    349:
                    350:        return _vfs_busy(mp, false);
                    351: }
                    352:
1.1       rmind     353: /*
                    354:  * Unbusy a busy filesystem.
                    355:  *
1.18      mlelstv   356:  * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
1.1       rmind     357:  */
                    358: void
1.56      hannken   359: vfs_unbusy(struct mount *mp)
1.1       rmind     360: {
                    361:
                    362:        KASSERT(mp->mnt_refcnt > 0);
                    363:
1.61      hannken   364:        fstrans_done(mp);
1.56      hannken   365:        vfs_rele(mp);
1.1       rmind     366: }
                    367:
1.27      hannken   368: struct vnode_iterator {
1.44      hannken   369:        vnode_impl_t vi_vnode;
1.40      msaitoh   370: };
1.27      hannken   371:
                    372: void
1.44      hannken   373: vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
1.27      hannken   374: {
1.44      hannken   375:        vnode_t *vp;
                    376:        vnode_impl_t *vip;
1.27      hannken   377:
1.38      hannken   378:        vp = vnalloc_marker(mp);
1.44      hannken   379:        vip = VNODE_TO_VIMPL(vp);
1.27      hannken   380:
                    381:        mutex_enter(&mntvnode_lock);
1.44      hannken   382:        TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
1.27      hannken   383:        vp->v_usecount = 1;
                    384:        mutex_exit(&mntvnode_lock);
                    385:
1.44      hannken   386:        *vnip = (struct vnode_iterator *)vip;
1.27      hannken   387: }
                    388:
                    389: void
1.44      hannken   390: vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
1.27      hannken   391: {
1.44      hannken   392:        vnode_impl_t *mvip = &vni->vi_vnode;
                    393:        vnode_t *mvp = VIMPL_TO_VNODE(mvip);
1.27      hannken   394:
                    395:        mutex_enter(&mntvnode_lock);
1.38      hannken   396:        KASSERT(vnis_marker(mvp));
1.32      hannken   397:        if (mvp->v_usecount != 0) {
1.44      hannken   398:                TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
1.32      hannken   399:                mvp->v_usecount = 0;
                    400:        }
1.27      hannken   401:        mutex_exit(&mntvnode_lock);
1.38      hannken   402:        vnfree_marker(mvp);
1.27      hannken   403: }
                    404:
1.45      hannken   405: static struct vnode *
                    406: vfs_vnode_iterator_next1(struct vnode_iterator *vni,
                    407:     bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
1.27      hannken   408: {
1.44      hannken   409:        vnode_impl_t *mvip = &vni->vi_vnode;
                    410:        struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
                    411:        vnode_t *vp;
                    412:        vnode_impl_t *vip;
1.27      hannken   413:        int error;
                    414:
1.44      hannken   415:        KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
1.27      hannken   416:
                    417:        do {
                    418:                mutex_enter(&mntvnode_lock);
1.44      hannken   419:                vip = TAILQ_NEXT(mvip, vi_mntvnodes);
                    420:                TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
                    421:                VIMPL_TO_VNODE(mvip)->v_usecount = 0;
1.29      christos  422: again:
1.44      hannken   423:                vp = VIMPL_TO_VNODE(vip);
1.27      hannken   424:                if (vp == NULL) {
                    425:                        mutex_exit(&mntvnode_lock);
1.29      christos  426:                        return NULL;
1.27      hannken   427:                }
                    428:                mutex_enter(vp->v_interlock);
1.38      hannken   429:                if (vnis_marker(vp) ||
1.45      hannken   430:                    vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
1.37      hannken   431:                    (f && !(*f)(cl, vp))) {
1.27      hannken   432:                        mutex_exit(vp->v_interlock);
1.44      hannken   433:                        vip = TAILQ_NEXT(vip, vi_mntvnodes);
1.29      christos  434:                        goto again;
1.27      hannken   435:                }
                    436:
1.44      hannken   437:                TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
                    438:                VIMPL_TO_VNODE(mvip)->v_usecount = 1;
1.27      hannken   439:                mutex_exit(&mntvnode_lock);
1.43      hannken   440:                error = vcache_vget(vp);
1.27      hannken   441:                KASSERT(error == 0 || error == ENOENT);
                    442:        } while (error != 0);
                    443:
1.29      christos  444:        return vp;
1.27      hannken   445: }
                    446:
1.45      hannken   447: struct vnode *
                    448: vfs_vnode_iterator_next(struct vnode_iterator *vni,
                    449:     bool (*f)(void *, struct vnode *), void *cl)
                    450: {
                    451:
                    452:        return vfs_vnode_iterator_next1(vni, f, cl, false);
                    453: }
                    454:
1.1       rmind     455: /*
                    456:  * Move a vnode from one mount queue to another.
                    457:  */
                    458: void
                    459: vfs_insmntque(vnode_t *vp, struct mount *mp)
                    460: {
1.44      hannken   461:        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
1.1       rmind     462:        struct mount *omp;
                    463:
1.4       rmind     464:        KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
                    465:            vp->v_tag == VT_VFS);
1.1       rmind     466:
                    467:        mutex_enter(&mntvnode_lock);
                    468:        /*
                    469:         * Delete from old mount point vnode list, if on one.
                    470:         */
                    471:        if ((omp = vp->v_mount) != NULL)
1.44      hannken   472:                TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
1.1       rmind     473:        /*
                    474:         * Insert into list of vnodes for the new mount point, if
                    475:         * available.  The caller must take a reference on the mount
                    476:         * structure and donate to the vnode.
                    477:         */
                    478:        if ((vp->v_mount = mp) != NULL)
1.44      hannken   479:                TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
1.1       rmind     480:        mutex_exit(&mntvnode_lock);
                    481:
                    482:        if (omp != NULL) {
                    483:                /* Release reference to old mount. */
1.55      hannken   484:                vfs_rele(omp);
1.1       rmind     485:        }
                    486: }
                    487:
                    488: /*
                    489:  * Remove any vnodes in the vnode table belonging to mount point mp.
                    490:  *
                    491:  * If FORCECLOSE is not specified, there should not be any active ones,
                    492:  * return error if any are found (nb: this is a user error, not a
                    493:  * system error). If FORCECLOSE is specified, detach any active vnodes
                    494:  * that are found.
                    495:  *
                    496:  * If WRITECLOSE is set, only flush out regular file vnodes open for
                    497:  * writing.
                    498:  *
                    499:  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
                    500:  */
                    501: #ifdef DEBUG
                    502: int busyprt = 0;       /* print out busy vnodes */
                    503: struct ctldebug debug1 = { "busyprt", &busyprt };
                    504: #endif
                    505:
1.46      hannken   506: static vnode_t *
                    507: vflushnext(struct vnode_iterator *marker, int *when)
                    508: {
                    509:        if (hardclock_ticks > *when) {
                    510:                yield();
                    511:                *when = hardclock_ticks + hz / 10;
                    512:        }
                    513:        return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
                    514: }
1.29      christos  515:
1.46      hannken   516: /*
                    517:  * Flush one vnode.  Referenced on entry, unreferenced on return.
                    518:  */
                    519: static int
                    520: vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
1.29      christos  521: {
1.46      hannken   522:        int error;
                    523:        struct vattr vattr;
                    524:
                    525:        if (vp == skipvp ||
                    526:            ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
                    527:                vrele(vp);
                    528:                return 0;
                    529:        }
1.29      christos  530:        /*
1.46      hannken   531:         * If WRITECLOSE is set, only flush out regular file
                    532:         * vnodes open for writing or open and unlinked.
1.29      christos  533:         */
1.46      hannken   534:        if ((flags & WRITECLOSE)) {
                    535:                if (vp->v_type != VREG) {
                    536:                        vrele(vp);
                    537:                        return 0;
                    538:                }
                    539:                error = vn_lock(vp, LK_EXCLUSIVE);
                    540:                if (error) {
                    541:                        KASSERT(error == ENOENT);
                    542:                        vrele(vp);
                    543:                        return 0;
                    544:                }
                    545:                error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
                    546:                if (error == 0)
                    547:                        error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
                    548:                VOP_UNLOCK(vp);
                    549:                if (error) {
                    550:                        vrele(vp);
                    551:                        return error;
                    552:                }
                    553:                if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
                    554:                        vrele(vp);
                    555:                        return 0;
                    556:                }
                    557:        }
1.29      christos  558:        /*
1.46      hannken   559:         * First try to recycle the vnode.
1.29      christos  560:         */
1.46      hannken   561:        if (vrecycle(vp))
                    562:                return 0;
1.29      christos  563:        /*
1.46      hannken   564:         * If FORCECLOSE is set, forcibly close the vnode.
1.67      hannken   565:         * For block or character devices, revert to an
                    566:         * anonymous device.  For all other files, just
                    567:         * kill them.
1.29      christos  568:         */
1.46      hannken   569:        if (flags & FORCECLOSE) {
1.67      hannken   570:                if (vp->v_usecount > 1 &&
                    571:                    (vp->v_type == VBLK || vp->v_type == VCHR))
                    572:                        vcache_make_anon(vp);
                    573:                else
                    574:                        vgone(vp);
1.46      hannken   575:                return 0;
1.29      christos  576:        }
1.46      hannken   577:        vrele(vp);
                    578:        return EBUSY;
1.29      christos  579: }
                    580:
1.1       rmind     581: int
                    582: vflush(struct mount *mp, vnode_t *skipvp, int flags)
                    583: {
1.27      hannken   584:        vnode_t *vp;
                    585:        struct vnode_iterator *marker;
1.47      hannken   586:        int busy, error, when, retries = 2;
1.46      hannken   587:
1.47      hannken   588:        do {
                    589:                busy = error = when = 0;
1.1       rmind     590:
1.47      hannken   591:                /*
                    592:                 * First, flush out any vnode references from the
                    593:                 * deferred vrele list.
                    594:                 */
1.62      hannken   595:                vrele_flush(mp);
1.1       rmind     596:
1.47      hannken   597:                vfs_vnode_iterator_init(mp, &marker);
1.29      christos  598:
1.47      hannken   599:                while ((vp = vflushnext(marker, &when)) != NULL) {
                    600:                        error = vflush_one(vp, skipvp, flags);
                    601:                        if (error == EBUSY) {
                    602:                                error = 0;
                    603:                                busy++;
1.1       rmind     604: #ifdef DEBUG
1.47      hannken   605:                                if (busyprt && retries == 0)
                    606:                                        vprint("vflush: busy vnode", vp);
1.1       rmind     607: #endif
1.47      hannken   608:                        } else if (error != 0) {
                    609:                                break;
                    610:                        }
1.46      hannken   611:                }
                    612:
1.47      hannken   613:                vfs_vnode_iterator_destroy(marker);
                    614:        } while (error == 0 && busy > 0 && retries-- > 0);
1.46      hannken   615:
                    616:        if (error)
                    617:                return error;
1.1       rmind     618:        if (busy)
1.46      hannken   619:                return EBUSY;
1.45      hannken   620:        return 0;
1.1       rmind     621: }
                    622:
                    623: /*
                    624:  * Mount a file system.
                    625:  */
                    626:
                    627: /*
                    628:  * Scan all active processes to see if any of them have a current or root
                    629:  * directory onto which the new filesystem has just been  mounted. If so,
                    630:  * replace them with the new mount point.
                    631:  */
                    632: static void
                    633: mount_checkdirs(vnode_t *olddp)
                    634: {
                    635:        vnode_t *newdp, *rele1, *rele2;
                    636:        struct cwdinfo *cwdi;
                    637:        struct proc *p;
                    638:        bool retry;
                    639:
                    640:        if (olddp->v_usecount == 1) {
                    641:                return;
                    642:        }
                    643:        if (VFS_ROOT(olddp->v_mountedhere, &newdp))
                    644:                panic("mount: lost mount");
                    645:
                    646:        do {
                    647:                retry = false;
                    648:                mutex_enter(proc_lock);
                    649:                PROCLIST_FOREACH(p, &allproc) {
                    650:                        if ((cwdi = p->p_cwdi) == NULL)
                    651:                                continue;
                    652:                        /*
                    653:                         * Cannot change to the old directory any more,
                    654:                         * so even if we see a stale value it is not a
                    655:                         * problem.
                    656:                         */
                    657:                        if (cwdi->cwdi_cdir != olddp &&
                    658:                            cwdi->cwdi_rdir != olddp)
                    659:                                continue;
                    660:                        retry = true;
                    661:                        rele1 = NULL;
                    662:                        rele2 = NULL;
                    663:                        atomic_inc_uint(&cwdi->cwdi_refcnt);
                    664:                        mutex_exit(proc_lock);
                    665:                        rw_enter(&cwdi->cwdi_lock, RW_WRITER);
                    666:                        if (cwdi->cwdi_cdir == olddp) {
                    667:                                rele1 = cwdi->cwdi_cdir;
                    668:                                vref(newdp);
                    669:                                cwdi->cwdi_cdir = newdp;
                    670:                        }
                    671:                        if (cwdi->cwdi_rdir == olddp) {
                    672:                                rele2 = cwdi->cwdi_rdir;
                    673:                                vref(newdp);
                    674:                                cwdi->cwdi_rdir = newdp;
                    675:                        }
                    676:                        rw_exit(&cwdi->cwdi_lock);
                    677:                        cwdfree(cwdi);
                    678:                        if (rele1 != NULL)
                    679:                                vrele(rele1);
                    680:                        if (rele2 != NULL)
                    681:                                vrele(rele2);
                    682:                        mutex_enter(proc_lock);
                    683:                        break;
                    684:                }
                    685:                mutex_exit(proc_lock);
                    686:        } while (retry);
                    687:
                    688:        if (rootvnode == olddp) {
                    689:                vrele(rootvnode);
                    690:                vref(newdp);
                    691:                rootvnode = newdp;
                    692:        }
                    693:        vput(newdp);
                    694: }
                    695:
1.31      manu      696: /*
                    697:  * Start extended attributes
                    698:  */
                    699: static int
                    700: start_extattr(struct mount *mp)
                    701: {
                    702:        int error;
                    703:
                    704:        error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
                    705:        if (error)
                    706:                printf("%s: failed to start extattr: error = %d\n",
                    707:                       mp->mnt_stat.f_mntonname, error);
                    708:
                    709:        return error;
                    710: }
                    711:
1.1       rmind     712: int
                    713: mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
                    714:     const char *path, int flags, void *data, size_t *data_len)
                    715: {
                    716:        vnode_t *vp = *vpp;
                    717:        struct mount *mp;
                    718:        struct pathbuf *pb;
                    719:        struct nameidata nd;
                    720:        int error;
                    721:
                    722:        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
                    723:            KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
                    724:        if (error) {
                    725:                vfs_delref(vfsops);
                    726:                return error;
                    727:        }
                    728:
                    729:        /* Cannot make a non-dir a mount-point (from here anyway). */
                    730:        if (vp->v_type != VDIR) {
                    731:                vfs_delref(vfsops);
                    732:                return ENOTDIR;
                    733:        }
                    734:
                    735:        if (flags & MNT_EXPORTED) {
                    736:                vfs_delref(vfsops);
                    737:                return EINVAL;
                    738:        }
                    739:
                    740:        if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
                    741:                vfs_delref(vfsops);
                    742:                return ENOMEM;
                    743:        }
                    744:
                    745:        mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
                    746:
                    747:        /*
                    748:         * The underlying file system may refuse the mount for
                    749:         * various reasons.  Allow the user to force it to happen.
                    750:         *
                    751:         * Set the mount level flags.
                    752:         */
1.12      christos  753:        mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
1.1       rmind     754:
                    755:        mutex_enter(&mp->mnt_updating);
                    756:        error = VFS_MOUNT(mp, path, data, data_len);
                    757:        mp->mnt_flag &= ~MNT_OP_FLAGS;
                    758:
                    759:        if (error != 0)
                    760:                goto err_unmounted;
                    761:
                    762:        /*
                    763:         * Validate and prepare the mount point.
                    764:         */
                    765:        error = pathbuf_copyin(path, &pb);
                    766:        if (error != 0) {
                    767:                goto err_mounted;
                    768:        }
                    769:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
                    770:        error = namei(&nd);
                    771:        pathbuf_destroy(pb);
                    772:        if (error != 0) {
                    773:                goto err_mounted;
                    774:        }
                    775:        if (nd.ni_vp != vp) {
                    776:                vput(nd.ni_vp);
                    777:                error = EINVAL;
                    778:                goto err_mounted;
                    779:        }
                    780:        if (vp->v_mountedhere != NULL) {
                    781:                vput(nd.ni_vp);
                    782:                error = EBUSY;
                    783:                goto err_mounted;
                    784:        }
                    785:        error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
                    786:        if (error != 0) {
                    787:                vput(nd.ni_vp);
                    788:                goto err_mounted;
                    789:        }
                    790:
                    791:        /*
                    792:         * Put the new filesystem on the mount list after root.
                    793:         */
                    794:        cache_purge(vp);
                    795:        mp->mnt_iflag &= ~IMNT_WANTRDWR;
                    796:
1.52      hannken   797:        mountlist_append(mp);
1.1       rmind     798:        if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
1.35      hannken   799:                vfs_syncer_add_to_worklist(mp);
                    800:        vp->v_mountedhere = mp;
1.1       rmind     801:        vput(nd.ni_vp);
                    802:
                    803:        mount_checkdirs(vp);
                    804:        mutex_exit(&mp->mnt_updating);
                    805:
                    806:        /* Hold an additional reference to the mount across VFS_START(). */
1.56      hannken   807:        vfs_ref(mp);
1.1       rmind     808:        (void) VFS_STATVFS(mp, &mp->mnt_stat);
                    809:        error = VFS_START(mp, 0);
1.71    ! christos  810:        if (error) {
1.1       rmind     811:                vrele(vp);
1.31      manu      812:        } else if (flags & MNT_EXTATTR) {
1.71    ! christos  813:                if (start_extattr(mp) != 0)
        !           814:                        mp->mnt_flag &= ~MNT_EXTATTR;
1.31      manu      815:        }
1.1       rmind     816:        /* Drop reference held for VFS_START(). */
1.55      hannken   817:        vfs_rele(mp);
1.1       rmind     818:        *vpp = NULL;
                    819:        return error;
                    820:
                    821: err_mounted:
                    822:        if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
                    823:                panic("Unmounting fresh file system failed");
                    824:
                    825: err_unmounted:
                    826:        vp->v_mountedhere = NULL;
                    827:        mutex_exit(&mp->mnt_updating);
1.55      hannken   828:        vfs_rele(mp);
1.1       rmind     829:
                    830:        return error;
                    831: }
                    832:
                    833: /*
                    834:  * Do the actual file system unmount.  File system is assumed to have
                    835:  * been locked by the caller.
                    836:  *
                    837:  * => Caller hold reference to the mount, explicitly for dounmount().
                    838:  */
                    839: int
                    840: dounmount(struct mount *mp, int flags, struct lwp *l)
                    841: {
                    842:        vnode_t *coveredvp;
1.31      manu      843:        int error, async, used_syncer, used_extattr;
1.68      hannken   844:        const bool was_suspended = fstrans_is_owner(mp);
1.1       rmind     845:
                    846: #if NVERIEXEC > 0
                    847:        error = veriexec_unmountchk(mp);
                    848:        if (error)
                    849:                return (error);
                    850: #endif /* NVERIEXEC > 0 */
                    851:
1.68      hannken   852:        if (!was_suspended) {
                    853:                error = vfs_suspend(mp, 0);
                    854:                if (error) {
                    855:                        return error;
1.49      hannken   856:                }
                    857:        }
1.62      hannken   858:
1.64      hannken   859:        KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);
1.1       rmind     860:
1.35      hannken   861:        used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
1.31      manu      862:        used_extattr = mp->mnt_flag & MNT_EXTATTR;
1.1       rmind     863:
                    864:        mp->mnt_iflag |= IMNT_UNMOUNT;
1.18      mlelstv   865:        mutex_enter(&mp->mnt_updating);
1.1       rmind     866:        async = mp->mnt_flag & MNT_ASYNC;
                    867:        mp->mnt_flag &= ~MNT_ASYNC;
                    868:        cache_purgevfs(mp);     /* remove cache entries for this file sys */
1.35      hannken   869:        if (used_syncer)
                    870:                vfs_syncer_remove_from_worklist(mp);
1.1       rmind     871:        error = 0;
1.36      manu      872:        if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
1.1       rmind     873:                error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
                    874:        }
                    875:        if (error == 0 || (flags & MNT_FORCE)) {
                    876:                error = VFS_UNMOUNT(mp, flags);
                    877:        }
                    878:        if (error) {
1.18      mlelstv   879:                mp->mnt_iflag &= ~IMNT_UNMOUNT;
1.1       rmind     880:                if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
1.35      hannken   881:                        vfs_syncer_add_to_worklist(mp);
1.1       rmind     882:                mp->mnt_flag |= async;
1.18      mlelstv   883:                mutex_exit(&mp->mnt_updating);
1.68      hannken   884:                if (!was_suspended)
                    885:                        vfs_resume(mp);
1.31      manu      886:                if (used_extattr) {
                    887:                        if (start_extattr(mp) != 0)
                    888:                                mp->mnt_flag &= ~MNT_EXTATTR;
                    889:                        else
                    890:                                mp->mnt_flag |= MNT_EXTATTR;
                    891:                }
1.1       rmind     892:                return (error);
                    893:        }
1.18      mlelstv   894:        mutex_exit(&mp->mnt_updating);
1.19      mlelstv   895:
                    896:        /*
                    897:         * mark filesystem as gone to prevent further umounts
                    898:         * after mnt_umounting lock is gone, this also prevents
                    899:         * vfs_busy() from succeeding.
                    900:         */
                    901:        mp->mnt_iflag |= IMNT_GONE;
1.68      hannken   902:        if (!was_suspended)
                    903:                vfs_resume(mp);
1.19      mlelstv   904:
1.20      hannken   905:        if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
                    906:                vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY);
                    907:                coveredvp->v_mountedhere = NULL;
                    908:                VOP_UNLOCK(coveredvp);
                    909:        }
1.52      hannken   910:        mountlist_remove(mp);
1.25      christos  911:        if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
1.1       rmind     912:                panic("unmount: dangling vnode");
                    913:        vfs_hooks_unmount(mp);
1.19      mlelstv   914:
1.55      hannken   915:        vfs_rele(mp);   /* reference from mount() */
1.1       rmind     916:        if (coveredvp != NULLVP) {
                    917:                vrele(coveredvp);
                    918:        }
                    919:        return (0);
                    920: }
                    921:
                    922: /*
                    923:  * Unmount all file systems.
                    924:  * We traverse the list in reverse order under the assumption that doing so
                    925:  * will avoid needing to worry about dependencies.
                    926:  */
                    927: bool
                    928: vfs_unmountall(struct lwp *l)
                    929: {
                    930:
1.16      pooka     931:        printf("unmounting file systems...\n");
1.1       rmind     932:        return vfs_unmountall1(l, true, true);
                    933: }
                    934:
                    935: static void
                    936: vfs_unmount_print(struct mount *mp, const char *pfx)
                    937: {
                    938:
                    939:        aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
                    940:            mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
                    941:            mp->mnt_stat.f_fstypename);
                    942: }
                    943:
1.53      hannken   944: /*
                    945:  * Return the mount with the highest generation less than "gen".
                    946:  */
                    947: static struct mount *
                    948: vfs_unmount_next(uint64_t gen)
1.1       rmind     949: {
1.53      hannken   950:        mount_iterator_t *iter;
1.1       rmind     951:        struct mount *mp, *nmp;
                    952:
                    953:        nmp = NULL;
                    954:
1.53      hannken   955:        mountlist_iterator_init(&iter);
                    956:        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                    957:                if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) &&
                    958:                    mp->mnt_gen < gen) {
                    959:                        if (nmp != NULL)
1.55      hannken   960:                                vfs_rele(nmp);
1.1       rmind     961:                        nmp = mp;
1.55      hannken   962:                        vfs_ref(nmp);
1.1       rmind     963:                }
                    964:        }
1.53      hannken   965:        mountlist_iterator_destroy(iter);
                    966:
                    967:        return nmp;
                    968: }
                    969:
                    970: bool
                    971: vfs_unmount_forceone(struct lwp *l)
                    972: {
                    973:        struct mount *mp;
                    974:        int error;
                    975:
                    976:        mp = vfs_unmount_next(mountgen);
                    977:        if (mp == NULL) {
1.1       rmind     978:                return false;
                    979:        }
                    980:
                    981: #ifdef DEBUG
1.16      pooka     982:        printf("forcefully unmounting %s (%s)...\n",
1.53      hannken   983:            mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.1       rmind     984: #endif
1.53      hannken   985:        if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
                    986:                vfs_unmount_print(mp, "forcefully ");
1.1       rmind     987:                return true;
                    988:        } else {
1.55      hannken   989:                vfs_rele(mp);
1.1       rmind     990:        }
                    991:
                    992: #ifdef DEBUG
                    993:        printf("forceful unmount of %s failed with error %d\n",
1.53      hannken   994:            mp->mnt_stat.f_mntonname, error);
1.1       rmind     995: #endif
                    996:
                    997:        return false;
                    998: }
                    999:
                   1000: bool
                   1001: vfs_unmountall1(struct lwp *l, bool force, bool verbose)
                   1002: {
1.53      hannken  1003:        struct mount *mp;
1.1       rmind    1004:        bool any_error = false, progress = false;
1.53      hannken  1005:        uint64_t gen;
1.1       rmind    1006:        int error;
                   1007:
1.53      hannken  1008:        gen = mountgen;
                   1009:        for (;;) {
                   1010:                mp = vfs_unmount_next(gen);
                   1011:                if (mp == NULL)
                   1012:                        break;
                   1013:                gen = mp->mnt_gen;
                   1014:
1.1       rmind    1015: #ifdef DEBUG
1.16      pooka    1016:                printf("unmounting %p %s (%s)...\n",
1.1       rmind    1017:                    (void *)mp, mp->mnt_stat.f_mntonname,
                   1018:                    mp->mnt_stat.f_mntfromname);
                   1019: #endif
                   1020:                if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
                   1021:                        vfs_unmount_print(mp, "");
                   1022:                        progress = true;
                   1023:                } else {
1.55      hannken  1024:                        vfs_rele(mp);
1.1       rmind    1025:                        if (verbose) {
                   1026:                                printf("unmount of %s failed with error %d\n",
                   1027:                                    mp->mnt_stat.f_mntonname, error);
                   1028:                        }
                   1029:                        any_error = true;
                   1030:                }
                   1031:        }
                   1032:        if (verbose) {
1.16      pooka    1033:                printf("unmounting done\n");
1.1       rmind    1034:        }
                   1035:        if (any_error && verbose) {
                   1036:                printf("WARNING: some file systems would not unmount\n");
                   1037:        }
                   1038:        return progress;
                   1039: }
                   1040:
                   1041: void
                   1042: vfs_sync_all(struct lwp *l)
                   1043: {
                   1044:        printf("syncing disks... ");
                   1045:
                   1046:        /* remove user processes from run queue */
                   1047:        suspendsched();
                   1048:        (void)spl0();
                   1049:
                   1050:        /* avoid coming back this way again if we panic. */
                   1051:        doing_shutdown = 1;
                   1052:
1.5       dsl      1053:        do_sys_sync(l);
1.1       rmind    1054:
                   1055:        /* Wait for sync to finish. */
                   1056:        if (buf_syncwait() != 0) {
                   1057: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   1058:                Debugger();
                   1059: #endif
                   1060:                printf("giving up\n");
                   1061:                return;
                   1062:        } else
                   1063:                printf("done\n");
                   1064: }
                   1065:
1.3       rmind    1066: /*
                   1067:  * Sync and unmount file systems before shutting down.
                   1068:  */
                   1069: void
                   1070: vfs_shutdown(void)
1.1       rmind    1071: {
1.3       rmind    1072:        lwp_t *l = curlwp;
1.1       rmind    1073:
                   1074:        vfs_sync_all(l);
                   1075:
                   1076:        /*
1.3       rmind    1077:         * If we have paniced - do not make the situation potentially
1.1       rmind    1078:         * worse by unmounting the file systems.
                   1079:         */
1.3       rmind    1080:        if (panicstr != NULL) {
1.1       rmind    1081:                return;
1.3       rmind    1082:        }
1.1       rmind    1083:
                   1084:        /* Unmount file systems. */
                   1085:        vfs_unmountall(l);
                   1086: }
                   1087:
                   1088: /*
                   1089:  * Print a list of supported file system types (used by vfs_mountroot)
                   1090:  */
                   1091: static void
                   1092: vfs_print_fstypes(void)
                   1093: {
                   1094:        struct vfsops *v;
                   1095:        int cnt = 0;
                   1096:
                   1097:        mutex_enter(&vfs_list_lock);
                   1098:        LIST_FOREACH(v, &vfs_list, vfs_list)
                   1099:                ++cnt;
                   1100:        mutex_exit(&vfs_list_lock);
                   1101:
                   1102:        if (cnt == 0) {
                   1103:                printf("WARNING: No file system modules have been loaded.\n");
                   1104:                return;
                   1105:        }
                   1106:
                   1107:        printf("Supported file systems:");
                   1108:        mutex_enter(&vfs_list_lock);
                   1109:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   1110:                printf(" %s", v->vfs_name);
                   1111:        }
                   1112:        mutex_exit(&vfs_list_lock);
                   1113:        printf("\n");
                   1114: }
                   1115:
                   1116: /*
                   1117:  * Mount the root file system.  If the operator didn't specify a
                   1118:  * file system to use, try all possible file systems until one
                   1119:  * succeeds.
                   1120:  */
                   1121: int
                   1122: vfs_mountroot(void)
                   1123: {
                   1124:        struct vfsops *v;
                   1125:        int error = ENODEV;
                   1126:
                   1127:        if (root_device == NULL)
                   1128:                panic("vfs_mountroot: root device unknown");
                   1129:
                   1130:        switch (device_class(root_device)) {
                   1131:        case DV_IFNET:
                   1132:                if (rootdev != NODEV)
                   1133:                        panic("vfs_mountroot: rootdev set for DV_IFNET "
                   1134:                            "(0x%llx -> %llu,%llu)",
                   1135:                            (unsigned long long)rootdev,
                   1136:                            (unsigned long long)major(rootdev),
                   1137:                            (unsigned long long)minor(rootdev));
                   1138:                break;
                   1139:
                   1140:        case DV_DISK:
                   1141:                if (rootdev == NODEV)
                   1142:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
                   1143:                if (bdevvp(rootdev, &rootvp))
                   1144:                        panic("vfs_mountroot: can't get vnode for rootdev");
                   1145:                error = VOP_OPEN(rootvp, FREAD, FSCRED);
                   1146:                if (error) {
                   1147:                        printf("vfs_mountroot: can't open root device\n");
                   1148:                        return (error);
                   1149:                }
                   1150:                break;
                   1151:
                   1152:        case DV_VIRTUAL:
                   1153:                break;
                   1154:
                   1155:        default:
                   1156:                printf("%s: inappropriate for root file system\n",
                   1157:                    device_xname(root_device));
                   1158:                return (ENODEV);
                   1159:        }
                   1160:
                   1161:        /*
                   1162:         * If user specified a root fs type, use it.  Make sure the
                   1163:         * specified type exists and has a mount_root()
                   1164:         */
                   1165:        if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
                   1166:                v = vfs_getopsbyname(rootfstype);
                   1167:                error = EFTYPE;
                   1168:                if (v != NULL) {
                   1169:                        if (v->vfs_mountroot != NULL) {
                   1170:                                error = (v->vfs_mountroot)();
                   1171:                        }
                   1172:                        v->vfs_refcount--;
                   1173:                }
                   1174:                goto done;
                   1175:        }
                   1176:
                   1177:        /*
                   1178:         * Try each file system currently configured into the kernel.
                   1179:         */
                   1180:        mutex_enter(&vfs_list_lock);
                   1181:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   1182:                if (v->vfs_mountroot == NULL)
                   1183:                        continue;
                   1184: #ifdef DEBUG
                   1185:                aprint_normal("mountroot: trying %s...\n", v->vfs_name);
                   1186: #endif
                   1187:                v->vfs_refcount++;
                   1188:                mutex_exit(&vfs_list_lock);
                   1189:                error = (*v->vfs_mountroot)();
                   1190:                mutex_enter(&vfs_list_lock);
                   1191:                v->vfs_refcount--;
                   1192:                if (!error) {
                   1193:                        aprint_normal("root file system type: %s\n",
                   1194:                            v->vfs_name);
                   1195:                        break;
                   1196:                }
                   1197:        }
                   1198:        mutex_exit(&vfs_list_lock);
                   1199:
                   1200:        if (v == NULL) {
                   1201:                vfs_print_fstypes();
                   1202:                printf("no file system for %s", device_xname(root_device));
                   1203:                if (device_class(root_device) == DV_DISK)
                   1204:                        printf(" (dev 0x%llx)", (unsigned long long)rootdev);
                   1205:                printf("\n");
                   1206:                error = EFTYPE;
                   1207:        }
                   1208:
                   1209: done:
                   1210:        if (error && device_class(root_device) == DV_DISK) {
                   1211:                VOP_CLOSE(rootvp, FREAD, FSCRED);
                   1212:                vrele(rootvp);
                   1213:        }
                   1214:        if (error == 0) {
1.53      hannken  1215:                mount_iterator_t *iter;
1.24      christos 1216:                struct mount *mp;
1.1       rmind    1217:                extern struct cwdinfo cwdi0;
                   1218:
1.53      hannken  1219:                mountlist_iterator_init(&iter);
                   1220:                mp = mountlist_iterator_next(iter);
                   1221:                KASSERT(mp != NULL);
                   1222:                mountlist_iterator_destroy(iter);
                   1223:
1.24      christos 1224:                mp->mnt_flag |= MNT_ROOTFS;
                   1225:                mp->mnt_op->vfs_refcount++;
1.1       rmind    1226:
                   1227:                /*
                   1228:                 * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
                   1229:                 * reference it.
                   1230:                 */
1.24      christos 1231:                error = VFS_ROOT(mp, &rootvnode);
1.1       rmind    1232:                if (error)
                   1233:                        panic("cannot find root vnode, error=%d", error);
                   1234:                cwdi0.cwdi_cdir = rootvnode;
                   1235:                vref(cwdi0.cwdi_cdir);
                   1236:                VOP_UNLOCK(rootvnode);
                   1237:                cwdi0.cwdi_rdir = NULL;
                   1238:
                   1239:                /*
                   1240:                 * Now that root is mounted, we can fixup initproc's CWD
                   1241:                 * info.  All other processes are kthreads, which merely
                   1242:                 * share proc0's CWD info.
                   1243:                 */
                   1244:                initproc->p_cwdi->cwdi_cdir = rootvnode;
                   1245:                vref(initproc->p_cwdi->cwdi_cdir);
                   1246:                initproc->p_cwdi->cwdi_rdir = NULL;
                   1247:                /*
                   1248:                 * Enable loading of modules from the filesystem
                   1249:                 */
                   1250:                module_load_vfs_init();
                   1251:
                   1252:        }
                   1253:        return (error);
                   1254: }
                   1255:
                   1256: /*
                   1257:  * mount_specific_key_create --
                   1258:  *     Create a key for subsystem mount-specific data.
                   1259:  */
                   1260: int
                   1261: mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
                   1262: {
                   1263:
                   1264:        return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
                   1265: }
                   1266:
                   1267: /*
                   1268:  * mount_specific_key_delete --
                   1269:  *     Delete a key for subsystem mount-specific data.
                   1270:  */
                   1271: void
                   1272: mount_specific_key_delete(specificdata_key_t key)
                   1273: {
                   1274:
                   1275:        specificdata_key_delete(mount_specificdata_domain, key);
                   1276: }
                   1277:
                   1278: /*
                   1279:  * mount_initspecific --
                   1280:  *     Initialize a mount's specificdata container.
                   1281:  */
                   1282: void
                   1283: mount_initspecific(struct mount *mp)
                   1284: {
1.22      martin   1285:        int error __diagused;
1.1       rmind    1286:
                   1287:        error = specificdata_init(mount_specificdata_domain,
                   1288:                                  &mp->mnt_specdataref);
                   1289:        KASSERT(error == 0);
                   1290: }
                   1291:
                   1292: /*
                   1293:  * mount_finispecific --
                   1294:  *     Finalize a mount's specificdata container.
                   1295:  */
                   1296: void
                   1297: mount_finispecific(struct mount *mp)
                   1298: {
                   1299:
                   1300:        specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
                   1301: }
                   1302:
                   1303: /*
                   1304:  * mount_getspecific --
                   1305:  *     Return mount-specific data corresponding to the specified key.
                   1306:  */
                   1307: void *
                   1308: mount_getspecific(struct mount *mp, specificdata_key_t key)
                   1309: {
                   1310:
                   1311:        return specificdata_getspecific(mount_specificdata_domain,
                   1312:                                         &mp->mnt_specdataref, key);
                   1313: }
                   1314:
                   1315: /*
                   1316:  * mount_setspecific --
                   1317:  *     Set mount-specific data corresponding to the specified key.
                   1318:  */
                   1319: void
                   1320: mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
                   1321: {
                   1322:
                   1323:        specificdata_setspecific(mount_specificdata_domain,
                   1324:                                 &mp->mnt_specdataref, key, data);
                   1325: }
                   1326:
                   1327: /*
                   1328:  * Check to see if a filesystem is mounted on a block device.
                   1329:  */
                   1330: int
                   1331: vfs_mountedon(vnode_t *vp)
                   1332: {
                   1333:        vnode_t *vq;
                   1334:        int error = 0;
                   1335:
                   1336:        if (vp->v_type != VBLK)
                   1337:                return ENOTBLK;
1.21      hannken  1338:        if (spec_node_getmountedfs(vp) != NULL)
1.17      hannken  1339:                return EBUSY;
                   1340:        if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) {
1.21      hannken  1341:                if (spec_node_getmountedfs(vq) != NULL)
1.1       rmind    1342:                        error = EBUSY;
1.17      hannken  1343:                vrele(vq);
1.1       rmind    1344:        }
1.17      hannken  1345:
                   1346:        return error;
1.1       rmind    1347: }
                   1348:
                   1349: /*
                   1350:  * Check if a device pointed to by vp is mounted.
                   1351:  *
                   1352:  * Returns:
                   1353:  *   EINVAL    if it's not a disk
                   1354:  *   EBUSY     if it's a disk and mounted
                   1355:  *   0         if it's a disk and not mounted
                   1356:  */
                   1357: int
                   1358: rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
                   1359: {
                   1360:        vnode_t *bvp;
                   1361:        dev_t dev;
                   1362:        int d_type;
                   1363:
                   1364:        bvp = NULL;
                   1365:        d_type = D_OTHER;
                   1366:
                   1367:        if (iskmemvp(vp))
                   1368:                return EINVAL;
                   1369:
                   1370:        switch (vp->v_type) {
                   1371:        case VCHR: {
                   1372:                const struct cdevsw *cdev;
                   1373:
1.7       christos 1374:                dev = vp->v_rdev;
1.1       rmind    1375:                cdev = cdevsw_lookup(dev);
                   1376:                if (cdev != NULL) {
                   1377:                        dev_t blkdev;
                   1378:
                   1379:                        blkdev = devsw_chr2blk(dev);
                   1380:                        if (blkdev != NODEV) {
                   1381:                                if (vfinddev(blkdev, VBLK, &bvp) != 0) {
                   1382:                                        d_type = (cdev->d_flag & D_TYPEMASK);
                   1383:                                        /* XXX: what if bvp disappears? */
                   1384:                                        vrele(bvp);
                   1385:                                }
                   1386:                        }
                   1387:                }
                   1388:
                   1389:                break;
                   1390:                }
                   1391:
                   1392:        case VBLK: {
                   1393:                const struct bdevsw *bdev;
                   1394:
1.7       christos 1395:                dev = vp->v_rdev;
1.1       rmind    1396:                bdev = bdevsw_lookup(dev);
                   1397:                if (bdev != NULL)
                   1398:                        d_type = (bdev->d_flag & D_TYPEMASK);
                   1399:
                   1400:                bvp = vp;
                   1401:
                   1402:                break;
                   1403:                }
                   1404:
                   1405:        default:
                   1406:                break;
                   1407:        }
                   1408:
                   1409:        if (d_type != D_DISK)
                   1410:                return EINVAL;
                   1411:
                   1412:        if (bvpp != NULL)
                   1413:                *bvpp = bvp;
                   1414:
                   1415:        /*
                   1416:         * XXX: This is bogus. We should be failing the request
                   1417:         * XXX: not only if this specific slice is mounted, but
                   1418:         * XXX: if it's on a disk with any other mounted slice.
                   1419:         */
                   1420:        if (vfs_mountedon(bvp))
                   1421:                return EBUSY;
                   1422:
                   1423:        return 0;
                   1424: }
                   1425:
                   1426: /*
                   1427:  * Make a 'unique' number from a mount type name.
                   1428:  */
                   1429: long
                   1430: makefstype(const char *type)
                   1431: {
                   1432:        long rv;
                   1433:
                   1434:        for (rv = 0; *type; type++) {
                   1435:                rv <<= 2;
                   1436:                rv ^= *type;
                   1437:        }
                   1438:        return rv;
                   1439: }
1.24      christos 1440:
1.52      hannken  1441: static struct mountlist_entry *
                   1442: mountlist_alloc(enum mountlist_type type, struct mount *mp)
                   1443: {
                   1444:        struct mountlist_entry *me;
                   1445:
                   1446:        me = kmem_zalloc(sizeof(*me), KM_SLEEP);
                   1447:        me->me_mount = mp;
                   1448:        me->me_type = type;
                   1449:
                   1450:        return me;
                   1451: }
                   1452:
                   1453: static void
                   1454: mountlist_free(struct mountlist_entry *me)
                   1455: {
                   1456:
                   1457:        kmem_free(me, sizeof(*me));
                   1458: }
                   1459:
                   1460: void
                   1461: mountlist_iterator_init(mount_iterator_t **mip)
                   1462: {
                   1463:        struct mountlist_entry *me;
                   1464:
                   1465:        me = mountlist_alloc(ME_MARKER, NULL);
                   1466:        mutex_enter(&mountlist_lock);
1.54      hannken  1467:        TAILQ_INSERT_HEAD(&mountlist, me, me_list);
1.52      hannken  1468:        mutex_exit(&mountlist_lock);
                   1469:        *mip = (mount_iterator_t *)me;
                   1470: }
                   1471:
                   1472: void
                   1473: mountlist_iterator_destroy(mount_iterator_t *mi)
                   1474: {
                   1475:        struct mountlist_entry *marker = &mi->mi_entry;
                   1476:
                   1477:        if (marker->me_mount != NULL)
1.56      hannken  1478:                vfs_unbusy(marker->me_mount);
1.52      hannken  1479:
                   1480:        mutex_enter(&mountlist_lock);
1.54      hannken  1481:        TAILQ_REMOVE(&mountlist, marker, me_list);
1.52      hannken  1482:        mutex_exit(&mountlist_lock);
                   1483:
                   1484:        mountlist_free(marker);
                   1485:
                   1486: }
                   1487:
                   1488: /*
                   1489:  * Return the next mount or NULL for this iterator.
                   1490:  * Mark it busy on success.
                   1491:  */
1.58      hannken  1492: static inline struct mount *
                   1493: _mountlist_iterator_next(mount_iterator_t *mi, bool wait)
1.52      hannken  1494: {
                   1495:        struct mountlist_entry *me, *marker = &mi->mi_entry;
                   1496:        struct mount *mp;
1.58      hannken  1497:        int error;
1.52      hannken  1498:
                   1499:        if (marker->me_mount != NULL) {
1.56      hannken  1500:                vfs_unbusy(marker->me_mount);
1.52      hannken  1501:                marker->me_mount = NULL;
                   1502:        }
                   1503:
                   1504:        mutex_enter(&mountlist_lock);
                   1505:        for (;;) {
                   1506:                KASSERT(marker->me_type == ME_MARKER);
                   1507:
                   1508:                me = TAILQ_NEXT(marker, me_list);
                   1509:                if (me == NULL) {
                   1510:                        /* End of list: keep marker and return. */
                   1511:                        mutex_exit(&mountlist_lock);
                   1512:                        return NULL;
                   1513:                }
1.54      hannken  1514:                TAILQ_REMOVE(&mountlist, marker, me_list);
                   1515:                TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
1.52      hannken  1516:
                   1517:                /* Skip other markers. */
                   1518:                if (me->me_type != ME_MOUNT)
                   1519:                        continue;
                   1520:
                   1521:                /* Take an initial reference for vfs_busy() below. */
                   1522:                mp = me->me_mount;
                   1523:                KASSERT(mp != NULL);
1.55      hannken  1524:                vfs_ref(mp);
1.52      hannken  1525:                mutex_exit(&mountlist_lock);
                   1526:
                   1527:                /* Try to mark this mount busy and return on success. */
1.58      hannken  1528:                if (wait)
                   1529:                        error = vfs_busy(mp);
                   1530:                else
                   1531:                        error = vfs_trybusy(mp);
                   1532:                if (error == 0) {
1.55      hannken  1533:                        vfs_rele(mp);
1.52      hannken  1534:                        marker->me_mount = mp;
                   1535:                        return mp;
                   1536:                }
1.55      hannken  1537:                vfs_rele(mp);
1.52      hannken  1538:                mutex_enter(&mountlist_lock);
                   1539:        }
                   1540: }
                   1541:
1.58      hannken  1542: struct mount *
                   1543: mountlist_iterator_next(mount_iterator_t *mi)
                   1544: {
                   1545:
                   1546:        return _mountlist_iterator_next(mi, true);
                   1547: }
                   1548:
                   1549: struct mount *
                   1550: mountlist_iterator_trynext(mount_iterator_t *mi)
                   1551: {
                   1552:
                   1553:        return _mountlist_iterator_next(mi, false);
                   1554: }
                   1555:
1.52      hannken  1556: /*
                   1557:  * Attach new mount to the end of the mount list.
                   1558:  */
1.24      christos 1559: void
                   1560: mountlist_append(struct mount *mp)
                   1561: {
1.52      hannken  1562:        struct mountlist_entry *me;
                   1563:
                   1564:        me = mountlist_alloc(ME_MOUNT, mp);
1.24      christos 1565:        mutex_enter(&mountlist_lock);
1.54      hannken  1566:        TAILQ_INSERT_TAIL(&mountlist, me, me_list);
1.24      christos 1567:        mutex_exit(&mountlist_lock);
                   1568: }
1.52      hannken  1569:
                   1570: /*
                   1571:  * Remove mount from mount list.
                   1572:  */void
                   1573: mountlist_remove(struct mount *mp)
                   1574: {
                   1575:        struct mountlist_entry *me;
                   1576:
                   1577:        mutex_enter(&mountlist_lock);
1.54      hannken  1578:        TAILQ_FOREACH(me, &mountlist, me_list)
1.52      hannken  1579:                if (me->me_type == ME_MOUNT && me->me_mount == mp)
                   1580:                        break;
                   1581:        KASSERT(me != NULL);
1.54      hannken  1582:        TAILQ_REMOVE(&mountlist, me, me_list);
1.52      hannken  1583:        mutex_exit(&mountlist_lock);
                   1584:        mountlist_free(me);
                   1585: }
                   1586:
                   1587: /*
                   1588:  * Unlocked variant to traverse the mountlist.
                   1589:  * To be used from DDB only.
                   1590:  */
                   1591: struct mount *
                   1592: _mountlist_next(struct mount *mp)
                   1593: {
                   1594:        struct mountlist_entry *me;
                   1595:
                   1596:        if (mp == NULL) {
1.54      hannken  1597:                me = TAILQ_FIRST(&mountlist);
1.52      hannken  1598:        } else {
1.54      hannken  1599:                TAILQ_FOREACH(me, &mountlist, me_list)
1.52      hannken  1600:                        if (me->me_type == ME_MOUNT && me->me_mount == mp)
                   1601:                                break;
                   1602:                if (me != NULL)
                   1603:                        me = TAILQ_NEXT(me, me_list);
                   1604:        }
                   1605:
                   1606:        while (me != NULL && me->me_type != ME_MOUNT)
                   1607:                me = TAILQ_NEXT(me, me_list);
                   1608:
                   1609:        return (me ? me->me_mount : NULL);
                   1610: }

CVSweb <webmaster@jp.NetBSD.org>