[BACK]Return to vfs_subr.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_subr.c, Revision 1.157

1.157   ! chs         1: /*     $NetBSD: vfs_subr.c,v 1.156 2001/08/03 06:00:13 jdolecek Exp $  */
1.74      thorpej     2:
                      3: /*-
1.79      thorpej     4:  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
1.74      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.32      cgd        39:
1.29      cgd        40: /*
1.30      mycroft    41:  * Copyright (c) 1989, 1993
                     42:  *     The Regents of the University of California.  All rights reserved.
1.29      cgd        43:  * (c) UNIX System Laboratories, Inc.
                     44:  * All or some portions of this file are derived from material licensed
                     45:  * to the University of California by American Telephone and Telegraph
                     46:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     47:  * the permission of UNIX System Laboratories, Inc.
                     48:  *
                     49:  * Redistribution and use in source and binary forms, with or without
                     50:  * modification, are permitted provided that the following conditions
                     51:  * are met:
                     52:  * 1. Redistributions of source code must retain the above copyright
                     53:  *    notice, this list of conditions and the following disclaimer.
                     54:  * 2. Redistributions in binary form must reproduce the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer in the
                     56:  *    documentation and/or other materials provided with the distribution.
                     57:  * 3. All advertising materials mentioning features or use of this software
                     58:  *    must display the following acknowledgement:
                     59:  *     This product includes software developed by the University of
                     60:  *     California, Berkeley and its contributors.
                     61:  * 4. Neither the name of the University nor the names of its contributors
                     62:  *    may be used to endorse or promote products derived from this software
                     63:  *    without specific prior written permission.
                     64:  *
                     65:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     66:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     67:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     68:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     69:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     70:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     71:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     72:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     73:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     74:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     75:  * SUCH DAMAGE.
                     76:  *
1.32      cgd        77:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
1.29      cgd        78:  */
                     79:
                     80: /*
                     81:  * External virtual filesystem routines
                     82:  */
1.78      mrg        83:
1.125     chs        84: #include "opt_ddb.h"
1.95      thorpej    85: #include "opt_compat_netbsd.h"
1.97      christos   86: #include "opt_compat_43.h"
1.29      cgd        87:
                     88: #include <sys/param.h>
1.30      mycroft    89: #include <sys/systm.h>
1.29      cgd        90: #include <sys/proc.h>
1.138     bouyer     91: #include <sys/kernel.h>
1.29      cgd        92: #include <sys/mount.h>
                     93: #include <sys/time.h>
1.46      mycroft    94: #include <sys/fcntl.h>
1.29      cgd        95: #include <sys/vnode.h>
1.30      mycroft    96: #include <sys/stat.h>
1.29      cgd        97: #include <sys/namei.h>
                     98: #include <sys/ucred.h>
                     99: #include <sys/buf.h>
                    100: #include <sys/errno.h>
                    101: #include <sys/malloc.h>
1.30      mycroft   102: #include <sys/domain.h>
                    103: #include <sys/mbuf.h>
1.51      christos  104: #include <sys/syscallargs.h>
1.58      thorpej   105: #include <sys/device.h>
1.71      fvdl      106: #include <sys/dirent.h>
1.50      christos  107:
1.30      mycroft   108: #include <miscfs/specfs/specdev.h>
1.113     fvdl      109: #include <miscfs/genfs/genfs.h>
                    110: #include <miscfs/syncfs/syncfs.h>
1.30      mycroft   111:
1.125     chs       112: #include <uvm/uvm.h>
                    113: #include <uvm/uvm_ddb.h>
1.129     mrg       114:
                    115: #include <sys/sysctl.h>
1.77      mrg       116:
1.30      mycroft   117: enum vtype iftovt_tab[16] = {
                    118:        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
                    119:        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
                    120: };
1.146     jdolecek  121: const int      vttoif_tab[9] = {
1.30      mycroft   122:        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
                    123:        S_IFSOCK, S_IFIFO, S_IFMT,
                    124: };
                    125:
1.31      mycroft   126: int doforce = 1;               /* 1 => permit forcible unmounting */
                    127: int prtactive = 0;             /* 1 => print out reclaim of active vnodes */
1.29      cgd       128:
1.117     fvdl      129: extern int dovfsusermount;     /* 1 => permit any user to mount filesystems */
                    130:
1.29      cgd       131: /*
                    132:  * Insq/Remq for the vnode usage lists.
                    133:  */
                    134: #define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
                    135: #define        bufremvn(bp) {                                                  \
                    136:        LIST_REMOVE(bp, b_vnbufs);                                      \
                    137:        (bp)->b_vnbufs.le_next = NOLIST;                                \
                    138: }
1.113     fvdl      139: /* TAILQ_HEAD(freelst, vnode) vnode_free_list =        vnode free list (in vnode.h) */
                    140: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114     enami     141: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113     fvdl      142:
1.55      cgd       143: struct mntlist mountlist =                     /* mounted filesystem list */
                    144:     CIRCLEQ_HEAD_INITIALIZER(mountlist);
1.79      thorpej   145: struct vfs_list_head vfs_list =                        /* vfs list */
1.118     mycroft   146:     LIST_HEAD_INITIALIZER(vfs_list);
1.79      thorpej   147:
1.71      fvdl      148: struct nfs_public nfs_pub;                     /* publicly exported FS */
1.58      thorpej   149:
1.135     sommerfe  150: struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
                    151: static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
                    152: struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
                    153: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
                    154: struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
1.80      fvdl      155:
1.79      thorpej   156: /*
                    157:  * These define the root filesystem and device.
                    158:  */
                    159: struct mount *rootfs;
                    160: struct vnode *rootvnode;
1.80      fvdl      161: struct device *root_device;                    /* root device */
1.79      thorpej   162:
1.93      thorpej   163: struct pool vnode_pool;                                /* memory pool for vnodes */
                    164:
1.89      kleink    165: /*
                    166:  * Local declarations.
                    167:  */
1.50      christos  168: void insmntque __P((struct vnode *, struct mount *));
                    169: int getdevvp __P((dev_t, struct vnode **, enum vtype));
                    170: void vgoneall __P((struct vnode *));
                    171:
                    172: static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
                    173:                                  struct export_args *));
                    174: static int vfs_free_netcred __P((struct radix_node *, void *));
                    175: static void vfs_free_addrlist __P((struct netexport *));
1.51      christos  176:
                    177: #ifdef DEBUG
                    178: void printlockedvnodes __P((void));
                    179: #endif
                    180:
1.29      cgd       181: /*
1.30      mycroft   182:  * Initialize the vnode management data structures.
1.29      cgd       183:  */
1.50      christos  184: void
1.30      mycroft   185: vntblinit()
1.29      cgd       186: {
1.93      thorpej   187:
                    188:        pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
                    189:            0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
1.113     fvdl      190:
                    191:        /*
                    192:         * Initialize the filesystem syncer.
                    193:         */
                    194:        vn_initialize_syncerd();
1.29      cgd       195: }
                    196:
                    197: /*
1.80      fvdl      198:  * Mark a mount point as busy. Used to synchronize access and to delay
                    199:  * unmounting. Interlock is not released on failure.
1.29      cgd       200:  */
1.50      christos  201: int
1.80      fvdl      202: vfs_busy(mp, flags, interlkp)
                    203:        struct mount *mp;
                    204:        int flags;
                    205:        struct simplelock *interlkp;
1.29      cgd       206: {
1.80      fvdl      207:        int lkflags;
1.29      cgd       208:
1.103     sommerfe  209:        while (mp->mnt_flag & MNT_UNMOUNT) {
                    210:                int gone;
                    211:
1.80      fvdl      212:                if (flags & LK_NOWAIT)
                    213:                        return (ENOENT);
1.113     fvdl      214:                if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
                    215:                    && mp->mnt_unmounter == curproc)
                    216:                        return (EDEADLK);
1.80      fvdl      217:                if (interlkp)
                    218:                        simple_unlock(interlkp);
                    219:                /*
                    220:                 * Since all busy locks are shared except the exclusive
                    221:                 * lock granted when unmounting, the only place that a
                    222:                 * wakeup needs to be done is at the release of the
                    223:                 * exclusive lock at the end of dounmount.
1.103     sommerfe  224:                 *
1.106     sommerfe  225:                 * XXX MP: add spinlock protecting mnt_wcnt here once you
                    226:                 * can atomically unlock-and-sleep.
1.80      fvdl      227:                 */
1.103     sommerfe  228:                mp->mnt_wcnt++;
1.113     fvdl      229:                tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
1.103     sommerfe  230:                mp->mnt_wcnt--;
                    231:                gone = mp->mnt_flag & MNT_GONE;
                    232:
                    233:                if (mp->mnt_wcnt == 0)
                    234:                        wakeup(&mp->mnt_wcnt);
1.80      fvdl      235:                if (interlkp)
                    236:                        simple_lock(interlkp);
1.103     sommerfe  237:                if (gone)
                    238:                        return (ENOENT);
1.80      fvdl      239:        }
                    240:        lkflags = LK_SHARED;
                    241:        if (interlkp)
                    242:                lkflags |= LK_INTERLOCK;
                    243:        if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
                    244:                panic("vfs_busy: unexpected lock failure");
1.29      cgd       245:        return (0);
                    246: }
                    247:
                    248: /*
1.80      fvdl      249:  * Free a busy filesystem.
1.29      cgd       250:  */
                    251: void
1.80      fvdl      252: vfs_unbusy(mp)
                    253:        struct mount *mp;
1.29      cgd       254: {
                    255:
1.80      fvdl      256:        lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29      cgd       257: }
                    258:
                    259: /*
1.80      fvdl      260:  * Lookup a filesystem type, and if found allocate and initialize
                    261:  * a mount structure for it.
                    262:  *
                    263:  * Devname is usually updated by mount(8) after booting.
1.29      cgd       264:  */
1.50      christos  265: int
1.80      fvdl      266: vfs_rootmountalloc(fstypename, devname, mpp)
                    267:        char *fstypename;
                    268:        char *devname;
                    269:        struct mount **mpp;
1.29      cgd       270: {
1.80      fvdl      271:        struct vfsops *vfsp = NULL;
                    272:        struct mount *mp;
1.29      cgd       273:
1.152     jdolecek  274:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.80      fvdl      275:                if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
                    276:                        break;
                    277:
                    278:        if (vfsp == NULL)
                    279:                return (ENODEV);
                    280:        mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91      perry     281:        memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80      fvdl      282:        lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
                    283:        (void)vfs_busy(mp, LK_NOWAIT, 0);
                    284:        LIST_INIT(&mp->mnt_vnodelist);
                    285:        mp->mnt_op = vfsp;
                    286:        mp->mnt_flag = MNT_RDONLY;
                    287:        mp->mnt_vnodecovered = NULLVP;
                    288:        vfsp->vfs_refcount++;
                    289:        strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
                    290:        mp->mnt_stat.f_mntonname[0] = '/';
                    291:        (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
                    292:        *mpp = mp;
1.29      cgd       293:        return (0);
                    294: }
                    295:
                    296: /*
                    297:  * Lookup a mount point by filesystem identifier.
                    298:  */
                    299: struct mount *
1.80      fvdl      300: vfs_getvfs(fsid)
1.29      cgd       301:        fsid_t *fsid;
                    302: {
1.123     augustss  303:        struct mount *mp;
1.29      cgd       304:
1.80      fvdl      305:        simple_lock(&mountlist_slock);
1.38      mycroft   306:        for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1.80      fvdl      307:             mp = mp->mnt_list.cqe_next) {
1.29      cgd       308:                if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
1.80      fvdl      309:                    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
                    310:                        simple_unlock(&mountlist_slock);
1.29      cgd       311:                        return (mp);
1.80      fvdl      312:                }
                    313:        }
                    314:        simple_unlock(&mountlist_slock);
1.29      cgd       315:        return ((struct mount *)0);
                    316: }
                    317:
                    318: /*
                    319:  * Get a new unique fsid
                    320:  */
                    321: void
1.127     assar     322: vfs_getnewfsid(mp)
1.29      cgd       323:        struct mount *mp;
                    324: {
                    325:        static u_short xxxfs_mntid;
                    326:        fsid_t tfsid;
1.80      fvdl      327:        int mtype;
1.29      cgd       328:
1.80      fvdl      329:        simple_lock(&mntid_slock);
1.127     assar     330:        mtype = makefstype(mp->mnt_op->vfs_name);
1.80      fvdl      331:        mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
1.29      cgd       332:        mp->mnt_stat.f_fsid.val[1] = mtype;
                    333:        if (xxxfs_mntid == 0)
                    334:                ++xxxfs_mntid;
1.33      deraadt   335:        tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
1.29      cgd       336:        tfsid.val[1] = mtype;
1.38      mycroft   337:        if (mountlist.cqh_first != (void *)&mountlist) {
1.80      fvdl      338:                while (vfs_getvfs(&tfsid)) {
1.29      cgd       339:                        tfsid.val[0]++;
                    340:                        xxxfs_mntid++;
                    341:                }
                    342:        }
                    343:        mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
1.80      fvdl      344:        simple_unlock(&mntid_slock);
1.29      cgd       345: }
                    346:
                    347: /*
1.30      mycroft   348:  * Make a 'unique' number from a mount type name.
1.29      cgd       349:  */
                    350: long
                    351: makefstype(type)
1.127     assar     352:        const char *type;
1.29      cgd       353: {
                    354:        long rv;
                    355:
                    356:        for (rv = 0; *type; type++) {
                    357:                rv <<= 2;
                    358:                rv ^= *type;
                    359:        }
                    360:        return rv;
                    361: }
1.30      mycroft   362:
1.80      fvdl      363:
1.30      mycroft   364: /*
                    365:  * Set vnode attributes to VNOVAL
                    366:  */
                    367: void
                    368: vattr_null(vap)
1.123     augustss  369:        struct vattr *vap;
1.30      mycroft   370: {
                    371:
                    372:        vap->va_type = VNON;
1.75      enami     373:
                    374:        /*
                    375:         * Assign individually so that it is safe even if size and
                    376:         * sign of each member are varied.
                    377:         */
                    378:        vap->va_mode = VNOVAL;
                    379:        vap->va_nlink = VNOVAL;
                    380:        vap->va_uid = VNOVAL;
                    381:        vap->va_gid = VNOVAL;
                    382:        vap->va_fsid = VNOVAL;
                    383:        vap->va_fileid = VNOVAL;
1.30      mycroft   384:        vap->va_size = VNOVAL;
1.75      enami     385:        vap->va_blocksize = VNOVAL;
1.76      christos  386:        vap->va_atime.tv_sec =
                    387:            vap->va_mtime.tv_sec =
                    388:            vap->va_ctime.tv_sec = VNOVAL;
                    389:        vap->va_atime.tv_nsec =
                    390:            vap->va_mtime.tv_nsec =
                    391:            vap->va_ctime.tv_nsec = VNOVAL;
1.75      enami     392:        vap->va_gen = VNOVAL;
                    393:        vap->va_flags = VNOVAL;
                    394:        vap->va_rdev = VNOVAL;
1.30      mycroft   395:        vap->va_bytes = VNOVAL;
                    396:        vap->va_vaflags = 0;
                    397: }
                    398:
                    399: /*
                    400:  * Routines having to do with the management of the vnode table.
                    401:  */
1.50      christos  402: extern int (**dead_vnodeop_p) __P((void *));
1.30      mycroft   403: long numvnodes;
                    404:
1.29      cgd       405: /*
                    406:  * Return the next vnode from the free list.
                    407:  */
1.50      christos  408: int
1.29      cgd       409: getnewvnode(tag, mp, vops, vpp)
                    410:        enum vtagtype tag;
                    411:        struct mount *mp;
1.50      christos  412:        int (**vops) __P((void *));
1.29      cgd       413:        struct vnode **vpp;
                    414: {
1.142     chs       415:        extern struct uvm_pagerops uvm_vnodeops;
                    416:        struct uvm_object *uobj;
1.80      fvdl      417:        struct proc *p = curproc;       /* XXX */
1.113     fvdl      418:        struct freelst *listhd;
                    419:        static int toggle;
1.80      fvdl      420:        struct vnode *vp;
1.153     thorpej   421:        int error = 0, tryalloc;
1.81      ross      422: #ifdef DIAGNOSTIC
1.30      mycroft   423:        int s;
1.81      ross      424: #endif
1.103     sommerfe  425:        if (mp) {
                    426:                /*
1.106     sommerfe  427:                 * Mark filesystem busy while we're creating a vnode.
                    428:                 * If unmount is in progress, this will wait; if the
                    429:                 * unmount succeeds (only if umount -f), this will
                    430:                 * return an error.  If the unmount fails, we'll keep
                    431:                 * going afterwards.
                    432:                 * (This puts the per-mount vnode list logically under
                    433:                 * the protection of the vfs_busy lock).
1.103     sommerfe  434:                 */
1.113     fvdl      435:                error = vfs_busy(mp, LK_RECURSEFAIL, 0);
                    436:                if (error && error != EDEADLK)
1.103     sommerfe  437:                        return error;
                    438:        }
1.29      cgd       439:
1.113     fvdl      440:        /*
                    441:         * We must choose whether to allocate a new vnode or recycle an
                    442:         * existing one. The criterion for allocating a new one is that
                    443:         * the total number of vnodes is less than the number desired or
                    444:         * there are no vnodes on either free list. Generally we only
                    445:         * want to recycle vnodes that have no buffers associated with
                    446:         * them, so we look first on the vnode_free_list. If it is empty,
                    447:         * we next consider vnodes with referencing buffers on the
                    448:         * vnode_hold_list. The toggle ensures that half the time we
                    449:         * will use a buffer from the vnode_hold_list, and half the time
                    450:         * we will allocate a new one unless the list has grown to twice
                    451:         * the desired size. We are reticent to recycle vnodes from the
                    452:         * vnode_hold_list because we will lose the identity of all its
                    453:         * referencing buffers.
                    454:         */
1.142     chs       455:
1.153     thorpej   456:  try_again:
                    457:        vp = NULL;
                    458:
                    459:        simple_lock(&vnode_free_list_slock);
                    460:
1.113     fvdl      461:        toggle ^= 1;
                    462:        if (numvnodes > 2 * desiredvnodes)
                    463:                toggle = 0;
                    464:
1.153     thorpej   465:        tryalloc = numvnodes < desiredvnodes ||
1.113     fvdl      466:            (TAILQ_FIRST(listhd = &vnode_free_list) == NULL &&
1.153     thorpej   467:             (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle));
                    468:
                    469:        if (tryalloc &&
                    470:            (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.80      fvdl      471:                simple_unlock(&vnode_free_list_slock);
1.142     chs       472:                memset(vp, 0, sizeof(*vp));
1.153     thorpej   473:                simple_lock_init(&vp->v_interlock);
1.29      cgd       474:                numvnodes++;
                    475:        } else {
1.113     fvdl      476:                for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
                    477:                    vp = TAILQ_NEXT(vp, v_freelist)) {
1.105     wrstuden  478:                        if (simple_lock_try(&vp->v_interlock)) {
1.153     thorpej   479:                                if ((vp->v_flag & VLAYER) == 0)
1.105     wrstuden  480:                                        break;
                    481:                                if (VOP_ISLOCKED(vp) == 0)
                    482:                                        break;
                    483:                                else
                    484:                                        simple_unlock(&vp->v_interlock);
                    485:                        }
1.80      fvdl      486:                }
                    487:                /*
                    488:                 * Unless this is a bad time of the month, at most
                    489:                 * the first NCPUS items on the free list are
                    490:                 * locked, so this is close enough to being empty.
                    491:                 */
                    492:                if (vp == NULLVP) {
                    493:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl      494:                        if (mp && error != EDEADLK)
                    495:                                vfs_unbusy(mp);
1.153     thorpej   496:                        if (tryalloc) {
                    497:                                printf("WARNING: unable to allocate new "
                    498:                                    "vnode, retrying...\n");
                    499:                                (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
                    500:                                goto try_again;
                    501:                        }
1.132     jdolecek  502:                        tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29      cgd       503:                        *vpp = 0;
                    504:                        return (ENFILE);
                    505:                }
1.80      fvdl      506:                if (vp->v_usecount)
1.125     chs       507:                        panic("free vnode isn't, vp %p", vp);
1.113     fvdl      508:                TAILQ_REMOVE(listhd, vp, v_freelist);
1.30      mycroft   509:                /* see comment on why 0xdeadb is set at end of vgone (below) */
1.29      cgd       510:                vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
1.80      fvdl      511:                simple_unlock(&vnode_free_list_slock);
1.30      mycroft   512:                vp->v_lease = NULL;
1.29      cgd       513:                if (vp->v_type != VBAD)
1.80      fvdl      514:                        vgonel(vp, p);
                    515:                else
                    516:                        simple_unlock(&vp->v_interlock);
1.30      mycroft   517: #ifdef DIAGNOSTIC
1.80      fvdl      518:                if (vp->v_data)
1.125     chs       519:                        panic("cleaned vnode isn't, vp %p", vp);
1.30      mycroft   520:                s = splbio();
                    521:                if (vp->v_numoutput)
1.125     chs       522:                        panic("clean vnode has pending I/O's, vp %p", vp);
1.30      mycroft   523:                splx(s);
                    524: #endif
1.29      cgd       525:                vp->v_flag = 0;
                    526:                vp->v_lastr = 0;
1.30      mycroft   527:                vp->v_ralen = 0;
                    528:                vp->v_maxra = 0;
                    529:                vp->v_lastw = 0;
                    530:                vp->v_lasta = 0;
                    531:                vp->v_cstart = 0;
                    532:                vp->v_clen = 0;
1.29      cgd       533:                vp->v_socket = 0;
                    534:        }
                    535:        vp->v_type = VNON;
1.104     wrstuden  536:        vp->v_vnlock = &vp->v_lock;
                    537:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.142     chs       538:        lockinit(&vp->v_glock, PVFS, "glock", 0, 0);
1.29      cgd       539:        cache_purge(vp);
                    540:        vp->v_tag = tag;
                    541:        vp->v_op = vops;
                    542:        insmntque(vp, mp);
1.30      mycroft   543:        *vpp = vp;
1.29      cgd       544:        vp->v_usecount = 1;
1.30      mycroft   545:        vp->v_data = 0;
1.77      mrg       546:        simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
1.142     chs       547:
                    548:        /*
                    549:         * initialize uvm_object within vnode.
                    550:         */
                    551:
                    552:        uobj = &vp->v_uvm.u_obj;
                    553:        uobj->pgops = &uvm_vnodeops;
                    554:        TAILQ_INIT(&uobj->memq);
                    555:        vp->v_uvm.u_size = VSIZENOTSET;
                    556:
1.113     fvdl      557:        if (mp && error != EDEADLK)
                    558:                vfs_unbusy(mp);
1.29      cgd       559:        return (0);
1.130     fvdl      560: }
                    561:
                    562: /*
                    563:  * This is really just the reverse of getnewvnode(). Needed for
                    564:  * VFS_VGET functions who may need to push back a vnode in case
                    565:  * of a locking race.
                    566:  */
                    567: void
1.131     fvdl      568: ungetnewvnode(vp)
1.130     fvdl      569:        struct vnode *vp;
                    570: {
                    571: #ifdef DIAGNOSTIC
                    572:        if (vp->v_usecount != 1)
1.131     fvdl      573:                panic("ungetnewvnode: busy vnode");
1.130     fvdl      574: #endif
                    575:        vp->v_usecount--;
                    576:        insmntque(vp, NULL);
                    577:        vp->v_type = VBAD;
                    578:
                    579:        simple_lock(&vp->v_interlock);
                    580:        /*
                    581:         * Insert at head of LRU list
                    582:         */
                    583:        simple_lock(&vnode_free_list_slock);
                    584:        if (vp->v_holdcnt > 0)
                    585:                TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
                    586:        else
                    587:                TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                    588:        simple_unlock(&vnode_free_list_slock);
                    589:        simple_unlock(&vp->v_interlock);
1.29      cgd       590: }
                    591:
                    592: /*
                    593:  * Move a vnode from one mount queue to another.
                    594:  */
1.50      christos  595: void
1.29      cgd       596: insmntque(vp, mp)
1.123     augustss  597:        struct vnode *vp;
                    598:        struct mount *mp;
1.29      cgd       599: {
                    600:
1.103     sommerfe  601: #ifdef DIAGNOSTIC
                    602:        if ((mp != NULL) &&
1.113     fvdl      603:            (mp->mnt_flag & MNT_UNMOUNT) &&
                    604:            !(mp->mnt_flag & MNT_SOFTDEP) &&
                    605:            vp->v_tag != VT_VFS) {
1.103     sommerfe  606:                panic("insmntque into dying filesystem");
                    607:        }
                    608: #endif
                    609:
1.80      fvdl      610:        simple_lock(&mntvnode_slock);
1.29      cgd       611:        /*
                    612:         * Delete from old mount point vnode list, if on one.
                    613:         */
                    614:        if (vp->v_mount != NULL)
                    615:                LIST_REMOVE(vp, v_mntvnodes);
                    616:        /*
                    617:         * Insert into list of vnodes for the new mount point, if available.
                    618:         */
1.80      fvdl      619:        if ((vp->v_mount = mp) != NULL)
                    620:                LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
                    621:        simple_unlock(&mntvnode_slock);
1.29      cgd       622: }
                    623:
                    624: /*
                    625:  * Update outstanding I/O count and do wakeup if requested.
                    626:  */
1.50      christos  627: void
1.29      cgd       628: vwakeup(bp)
1.123     augustss  629:        struct buf *bp;
1.29      cgd       630: {
1.123     augustss  631:        struct vnode *vp;
1.29      cgd       632:
1.50      christos  633:        if ((vp = bp->b_vp) != NULL) {
1.30      mycroft   634:                if (--vp->v_numoutput < 0)
1.125     chs       635:                        panic("vwakeup: neg numoutput, vp %p", vp);
1.29      cgd       636:                if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
                    637:                        vp->v_flag &= ~VBWAIT;
                    638:                        wakeup((caddr_t)&vp->v_numoutput);
                    639:                }
                    640:        }
                    641: }
                    642:
                    643: /*
                    644:  * Flush out and invalidate all buffers associated with a vnode.
1.126     mycroft   645:  * Called with the underlying vnode locked, which should prevent new dirty
                    646:  * buffers from being queued.
1.29      cgd       647:  */
1.30      mycroft   648: int
                    649: vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1.123     augustss  650:        struct vnode *vp;
1.30      mycroft   651:        int flags;
                    652:        struct ucred *cred;
                    653:        struct proc *p;
                    654:        int slpflag, slptimeo;
1.29      cgd       655: {
1.142     chs       656:        struct uvm_object *uobj = &vp->v_uvm.u_obj;
1.126     mycroft   657:        struct buf *bp, *nbp;
1.142     chs       658:        int s, error, rv;
                    659:        int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO|
                    660:                (flags & V_SAVE ? PGO_CLEANIT : 0);
                    661:
                    662:        /* XXXUBC this doesn't look at flags or slp* */
                    663:        if (vp->v_type == VREG) {
                    664:                simple_lock(&uobj->vmobjlock);
                    665:                rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
                    666:                simple_unlock(&uobj->vmobjlock);
                    667:                if (!rv) {
                    668:                        return EIO;
                    669:                }
                    670:        }
1.30      mycroft   671:        if (flags & V_SAVE) {
1.140     fvdl      672:                error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
1.126     mycroft   673:                if (error)
1.122     fvdl      674:                        return (error);
1.126     mycroft   675: #ifdef DIAGNOSTIC
1.122     fvdl      676:                s = splbio();
1.126     mycroft   677:                if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
1.125     chs       678:                        panic("vinvalbuf: dirty bufs, vp %p", vp);
1.113     fvdl      679:                splx(s);
1.126     mycroft   680: #endif
1.30      mycroft   681:        }
1.113     fvdl      682:
1.115     fvdl      683:        s = splbio();
                    684:
1.126     mycroft   685: restart:
                    686:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    687:                nbp = LIST_NEXT(bp, b_vnbufs);
                    688:                if (bp->b_flags & B_BUSY) {
                    689:                        bp->b_flags |= B_WANTED;
                    690:                        error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
                    691:                            "vinvalbuf", slptimeo);
                    692:                        if (error) {
                    693:                                splx(s);
                    694:                                return (error);
                    695:                        }
                    696:                        goto restart;
1.113     fvdl      697:                }
1.126     mycroft   698:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    699:                brelse(bp);
                    700:        }
1.30      mycroft   701:
1.126     mycroft   702:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    703:                nbp = LIST_NEXT(bp, b_vnbufs);
                    704:                if (bp->b_flags & B_BUSY) {
                    705:                        bp->b_flags |= B_WANTED;
                    706:                        error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
                    707:                            "vinvalbuf", slptimeo);
                    708:                        if (error) {
                    709:                                splx(s);
                    710:                                return (error);
1.29      cgd       711:                        }
1.126     mycroft   712:                        goto restart;
                    713:                }
                    714:                /*
                    715:                 * XXX Since there are no node locks for NFS, I believe
                    716:                 * there is a slight chance that a delayed write will
                    717:                 * occur while sleeping just above, so check for it.
                    718:                 */
                    719:                if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
                    720: #ifdef DEBUG
                    721:                        printf("buffer still DELWRI\n");
                    722: #endif
1.63      mycroft   723:                        bp->b_flags |= B_BUSY | B_VFLUSH;
1.126     mycroft   724:                        VOP_BWRITE(bp);
                    725:                        goto restart;
                    726:                }
                    727:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    728:                brelse(bp);
                    729:        }
                    730:
                    731: #ifdef DIAGNOSTIC
                    732:        if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
                    733:                panic("vinvalbuf: flush failed, vp %p", vp);
1.113     fvdl      734: #endif
1.126     mycroft   735:
                    736:        splx(s);
                    737:
                    738:        return (0);
                    739: }
                    740:
                    741: /*
                    742:  * Destroy any in core blocks past the truncation length.
                    743:  * Called with the underlying vnode locked, which should prevent new dirty
                    744:  * buffers from being queued.
                    745:  */
                    746: int
                    747: vtruncbuf(vp, lbn, slpflag, slptimeo)
                    748:        struct vnode *vp;
                    749:        daddr_t lbn;
                    750:        int slpflag, slptimeo;
                    751: {
1.142     chs       752:        struct uvm_object *uobj = &vp->v_uvm.u_obj;
1.126     mycroft   753:        struct buf *bp, *nbp;
1.142     chs       754:        int s, error, rv;
1.126     mycroft   755:
                    756:        s = splbio();
1.142     chs       757:        if (vp->v_type == VREG) {
                    758:                simple_lock(&uobj->vmobjlock);
                    759:                rv = (uobj->pgops->pgo_flush)(uobj,
1.145     chs       760:                    round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift), 0,
                    761:                    PGO_FREE|PGO_SYNCIO);
1.142     chs       762:                simple_unlock(&uobj->vmobjlock);
                    763:                if (!rv) {
                    764:                        splx(s);
                    765:                        return EIO;
                    766:                }
                    767:        }
1.126     mycroft   768:
                    769: restart:
                    770:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    771:                nbp = LIST_NEXT(bp, b_vnbufs);
                    772:                if (bp->b_lblkno < lbn)
                    773:                        continue;
                    774:                if (bp->b_flags & B_BUSY) {
                    775:                        bp->b_flags |= B_WANTED;
1.142     chs       776:                        error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126     mycroft   777:                            "vtruncbuf", slptimeo);
                    778:                        if (error) {
                    779:                                splx(s);
                    780:                                return (error);
1.29      cgd       781:                        }
1.126     mycroft   782:                        goto restart;
1.29      cgd       783:                }
1.126     mycroft   784:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    785:                brelse(bp);
1.29      cgd       786:        }
1.115     fvdl      787:
1.126     mycroft   788:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    789:                nbp = LIST_NEXT(bp, b_vnbufs);
                    790:                if (bp->b_lblkno < lbn)
                    791:                        continue;
                    792:                if (bp->b_flags & B_BUSY) {
                    793:                        bp->b_flags |= B_WANTED;
1.142     chs       794:                        error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126     mycroft   795:                            "vtruncbuf", slptimeo);
                    796:                        if (error) {
                    797:                                splx(s);
                    798:                                return (error);
                    799:                        }
                    800:                        goto restart;
                    801:                }
                    802:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    803:                brelse(bp);
                    804:        }
1.115     fvdl      805:
                    806:        splx(s);
                    807:
1.30      mycroft   808:        return (0);
                    809: }
                    810:
                    811: void
                    812: vflushbuf(vp, sync)
1.123     augustss  813:        struct vnode *vp;
1.30      mycroft   814:        int sync;
                    815: {
1.142     chs       816:        struct uvm_object *uobj = &vp->v_uvm.u_obj;
1.123     augustss  817:        struct buf *bp, *nbp;
1.30      mycroft   818:        int s;
                    819:
1.142     chs       820:        if (vp->v_type == VREG) {
                    821:                int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0);
                    822:
                    823:                simple_lock(&uobj->vmobjlock);
                    824:                (uobj->pgops->pgo_flush)(uobj, 0, 0, flags);
                    825:                simple_unlock(&uobj->vmobjlock);
                    826:        }
                    827:
1.30      mycroft   828: loop:
                    829:        s = splbio();
1.126     mycroft   830:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    831:                nbp = LIST_NEXT(bp, b_vnbufs);
1.30      mycroft   832:                if ((bp->b_flags & B_BUSY))
                    833:                        continue;
                    834:                if ((bp->b_flags & B_DELWRI) == 0)
1.125     chs       835:                        panic("vflushbuf: not dirty, bp %p", bp);
1.63      mycroft   836:                bp->b_flags |= B_BUSY | B_VFLUSH;
1.30      mycroft   837:                splx(s);
                    838:                /*
                    839:                 * Wait for I/O associated with indirect blocks to complete,
                    840:                 * since there is no way to quickly wait for them below.
                    841:                 */
                    842:                if (bp->b_vp == vp || sync == 0)
                    843:                        (void) bawrite(bp);
                    844:                else
                    845:                        (void) bwrite(bp);
                    846:                goto loop;
                    847:        }
                    848:        if (sync == 0) {
                    849:                splx(s);
                    850:                return;
                    851:        }
                    852:        while (vp->v_numoutput) {
                    853:                vp->v_flag |= VBWAIT;
                    854:                tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
                    855:        }
                    856:        splx(s);
1.126     mycroft   857:        if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1.30      mycroft   858:                vprint("vflushbuf: dirty", vp);
                    859:                goto loop;
                    860:        }
1.29      cgd       861: }
                    862:
                    863: /*
                    864:  * Associate a buffer with a vnode.
                    865:  */
1.50      christos  866: void
1.29      cgd       867: bgetvp(vp, bp)
1.123     augustss  868:        struct vnode *vp;
                    869:        struct buf *bp;
1.29      cgd       870: {
1.115     fvdl      871:        int s;
1.29      cgd       872:
                    873:        if (bp->b_vp)
1.125     chs       874:                panic("bgetvp: not free, bp %p", bp);
1.29      cgd       875:        VHOLD(vp);
1.115     fvdl      876:        s = splbio();
1.29      cgd       877:        bp->b_vp = vp;
                    878:        if (vp->v_type == VBLK || vp->v_type == VCHR)
                    879:                bp->b_dev = vp->v_rdev;
                    880:        else
                    881:                bp->b_dev = NODEV;
                    882:        /*
                    883:         * Insert onto list for new vnode.
                    884:         */
                    885:        bufinsvn(bp, &vp->v_cleanblkhd);
1.115     fvdl      886:        splx(s);
1.29      cgd       887: }
                    888:
                    889: /*
                    890:  * Disassociate a buffer from a vnode.
                    891:  */
1.50      christos  892: void
1.29      cgd       893: brelvp(bp)
1.123     augustss  894:        struct buf *bp;
1.29      cgd       895: {
                    896:        struct vnode *vp;
1.115     fvdl      897:        int s;
1.29      cgd       898:
1.125     chs       899:        if (bp->b_vp == NULL)
                    900:                panic("brelvp: vp NULL, bp %p", bp);
1.115     fvdl      901:
                    902:        s = splbio();
1.113     fvdl      903:        vp = bp->b_vp;
1.29      cgd       904:        /*
                    905:         * Delete from old vnode list, if on one.
                    906:         */
                    907:        if (bp->b_vnbufs.le_next != NOLIST)
                    908:                bufremvn(bp);
1.142     chs       909:
                    910:        if (vp->v_type != VREG && (vp->v_flag & VONWORKLST) &&
                    911:            LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1.113     fvdl      912:                vp->v_flag &= ~VONWORKLST;
                    913:                LIST_REMOVE(vp, v_synclist);
                    914:        }
1.142     chs       915:
                    916:        bp->b_vp = NULL;
1.29      cgd       917:        HOLDRELE(vp);
1.115     fvdl      918:        splx(s);
1.29      cgd       919: }
                    920:
                    921: /*
                    922:  * Reassign a buffer from one vnode to another.
                    923:  * Used to assign file specific control information
                    924:  * (indirect blocks) to the vnode to which they belong.
1.115     fvdl      925:  *
                    926:  * This function must be called at splbio().
1.29      cgd       927:  */
1.50      christos  928: void
1.29      cgd       929: reassignbuf(bp, newvp)
1.113     fvdl      930:        struct buf *bp;
                    931:        struct vnode *newvp;
1.29      cgd       932: {
1.113     fvdl      933:        struct buflists *listheadp;
                    934:        int delay;
1.29      cgd       935:
                    936:        /*
                    937:         * Delete from old vnode list, if on one.
                    938:         */
                    939:        if (bp->b_vnbufs.le_next != NOLIST)
                    940:                bufremvn(bp);
                    941:        /*
                    942:         * If dirty, put on list of dirty buffers;
                    943:         * otherwise insert onto list of clean buffers.
                    944:         */
1.113     fvdl      945:        if ((bp->b_flags & B_DELWRI) == 0) {
                    946:                listheadp = &newvp->v_cleanblkhd;
1.142     chs       947:                if (newvp->v_type != VREG &&
                    948:                    (newvp->v_flag & VONWORKLST) &&
1.113     fvdl      949:                    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
                    950:                        newvp->v_flag &= ~VONWORKLST;
                    951:                        LIST_REMOVE(newvp, v_synclist);
                    952:                }
                    953:        } else {
1.29      cgd       954:                listheadp = &newvp->v_dirtyblkhd;
1.113     fvdl      955:                if ((newvp->v_flag & VONWORKLST) == 0) {
                    956:                        switch (newvp->v_type) {
                    957:                        case VDIR:
                    958:                                delay = dirdelay;
                    959:                                break;
                    960:                        case VBLK:
                    961:                                if (newvp->v_specmountpoint != NULL) {
                    962:                                        delay = metadelay;
                    963:                                        break;
                    964:                                }
                    965:                                /* fall through */
                    966:                        default:
1.118     mycroft   967:                                delay = filedelay;
                    968:                                break;
1.113     fvdl      969:                        }
1.118     mycroft   970:                        if (!newvp->v_mount ||
                    971:                            (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
                    972:                                vn_syncer_add_to_worklist(newvp, delay);
1.113     fvdl      973:                }
                    974:        }
1.29      cgd       975:        bufinsvn(bp, listheadp);
                    976: }
                    977:
                    978: /*
                    979:  * Create a vnode for a block device.
1.59      thorpej   980:  * Used for root filesystem and swap areas.
1.29      cgd       981:  * Also used for memory file system special devices.
                    982:  */
1.50      christos  983: int
1.29      cgd       984: bdevvp(dev, vpp)
                    985:        dev_t dev;
                    986:        struct vnode **vpp;
                    987: {
1.30      mycroft   988:
                    989:        return (getdevvp(dev, vpp, VBLK));
1.29      cgd       990: }
                    991:
                    992: /*
                    993:  * Create a vnode for a character device.
                    994:  * Used for kernfs and some console handling.
                    995:  */
1.50      christos  996: int
1.29      cgd       997: cdevvp(dev, vpp)
                    998:        dev_t dev;
                    999:        struct vnode **vpp;
                   1000: {
1.30      mycroft  1001:
                   1002:        return (getdevvp(dev, vpp, VCHR));
1.29      cgd      1003: }
                   1004:
                   1005: /*
                   1006:  * Create a vnode for a device.
                   1007:  * Used by bdevvp (block device) for root file system etc.,
                   1008:  * and by cdevvp (character device) for console and kernfs.
                   1009:  */
1.50      christos 1010: int
1.29      cgd      1011: getdevvp(dev, vpp, type)
                   1012:        dev_t dev;
                   1013:        struct vnode **vpp;
                   1014:        enum vtype type;
                   1015: {
1.123     augustss 1016:        struct vnode *vp;
1.29      cgd      1017:        struct vnode *nvp;
                   1018:        int error;
                   1019:
1.80      fvdl     1020:        if (dev == NODEV) {
                   1021:                *vpp = NULLVP;
1.29      cgd      1022:                return (0);
1.80      fvdl     1023:        }
1.50      christos 1024:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29      cgd      1025:        if (error) {
                   1026:                *vpp = NULLVP;
                   1027:                return (error);
                   1028:        }
                   1029:        vp = nvp;
                   1030:        vp->v_type = type;
1.50      christos 1031:        if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29      cgd      1032:                vput(vp);
                   1033:                vp = nvp;
                   1034:        }
                   1035:        *vpp = vp;
                   1036:        return (0);
                   1037: }
                   1038:
                   1039: /*
                   1040:  * Check to see if the new vnode represents a special device
                   1041:  * for which we already have a vnode (either because of
                   1042:  * bdevvp() or because of a different vnode representing
                   1043:  * the same block device). If such an alias exists, deallocate
                   1044:  * the existing contents and return the aliased vnode. The
                   1045:  * caller is responsible for filling it with its new contents.
                   1046:  */
                   1047: struct vnode *
                   1048: checkalias(nvp, nvp_rdev, mp)
1.123     augustss 1049:        struct vnode *nvp;
1.29      cgd      1050:        dev_t nvp_rdev;
                   1051:        struct mount *mp;
                   1052: {
1.80      fvdl     1053:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1054:        struct vnode *vp;
1.29      cgd      1055:        struct vnode **vpp;
                   1056:
                   1057:        if (nvp->v_type != VBLK && nvp->v_type != VCHR)
                   1058:                return (NULLVP);
                   1059:
                   1060:        vpp = &speclisth[SPECHASH(nvp_rdev)];
                   1061: loop:
1.80      fvdl     1062:        simple_lock(&spechash_slock);
1.29      cgd      1063:        for (vp = *vpp; vp; vp = vp->v_specnext) {
                   1064:                if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
                   1065:                        continue;
                   1066:                /*
                   1067:                 * Alias, but not in use, so flush it out.
                   1068:                 */
1.80      fvdl     1069:                simple_lock(&vp->v_interlock);
1.29      cgd      1070:                if (vp->v_usecount == 0) {
1.80      fvdl     1071:                        simple_unlock(&spechash_slock);
                   1072:                        vgonel(vp, p);
1.29      cgd      1073:                        goto loop;
                   1074:                }
1.80      fvdl     1075:                if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
                   1076:                        simple_unlock(&spechash_slock);
1.29      cgd      1077:                        goto loop;
1.80      fvdl     1078:                }
1.29      cgd      1079:                break;
                   1080:        }
1.34      cgd      1081:        if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29      cgd      1082:                MALLOC(nvp->v_specinfo, struct specinfo *,
1.150     thorpej  1083:                        sizeof(struct specinfo), M_VNODE, M_NOWAIT);
                   1084:                /* XXX Erg. */
                   1085:                if (nvp->v_specinfo == NULL) {
                   1086:                        simple_unlock(&spechash_slock);
                   1087:                        uvm_wait("checkalias");
                   1088:                        goto loop;
                   1089:                }
                   1090:
1.29      cgd      1091:                nvp->v_rdev = nvp_rdev;
                   1092:                nvp->v_hashchain = vpp;
                   1093:                nvp->v_specnext = *vpp;
1.113     fvdl     1094:                nvp->v_specmountpoint = NULL;
1.80      fvdl     1095:                simple_unlock(&spechash_slock);
1.62      kleink   1096:                nvp->v_speclockf = NULL;
1.29      cgd      1097:                *vpp = nvp;
1.80      fvdl     1098:                if (vp != NULLVP) {
1.29      cgd      1099:                        nvp->v_flag |= VALIASED;
                   1100:                        vp->v_flag |= VALIASED;
                   1101:                        vput(vp);
                   1102:                }
                   1103:                return (NULLVP);
                   1104:        }
1.80      fvdl     1105:        simple_unlock(&spechash_slock);
                   1106:        VOP_UNLOCK(vp, 0);
                   1107:        simple_lock(&vp->v_interlock);
                   1108:        vclean(vp, 0, p);
1.29      cgd      1109:        vp->v_op = nvp->v_op;
                   1110:        vp->v_tag = nvp->v_tag;
1.104     wrstuden 1111:        vp->v_vnlock = &vp->v_lock;
                   1112:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd      1113:        nvp->v_type = VNON;
                   1114:        insmntque(vp, mp);
                   1115:        return (vp);
                   1116: }
                   1117:
                   1118: /*
                   1119:  * Grab a particular vnode from the free list, increment its
1.83      fvdl     1120:  * reference count and lock it. If the vnode lock bit is set the
                   1121:  * vnode is being eliminated in vgone. In that case, we can not
                   1122:  * grab the vnode, so the process is awakened when the transition is
                   1123:  * completed, and an error returned to indicate that the vnode is no
                   1124:  * longer usable (possibly having been changed to a new file system type).
1.29      cgd      1125:  */
1.30      mycroft  1126: int
1.80      fvdl     1127: vget(vp, flags)
                   1128:        struct vnode *vp;
                   1129:        int flags;
1.29      cgd      1130: {
1.80      fvdl     1131:        int error;
1.29      cgd      1132:
1.30      mycroft  1133:        /*
                   1134:         * If the vnode is in the process of being cleaned out for
                   1135:         * another use, we wait for the cleaning to finish and then
1.80      fvdl     1136:         * return failure. Cleaning is determined by checking that
                   1137:         * the VXLOCK flag is set.
                   1138:         */
1.142     chs      1139:
1.80      fvdl     1140:        if ((flags & LK_INTERLOCK) == 0)
                   1141:                simple_lock(&vp->v_interlock);
                   1142:        if (vp->v_flag & VXLOCK) {
1.142     chs      1143:                if (flags & LK_NOWAIT) {
1.143     sommerfe 1144:                        simple_unlock(&vp->v_interlock);
1.142     chs      1145:                        return EBUSY;
                   1146:                }
1.29      cgd      1147:                vp->v_flag |= VXWANT;
1.134     sommerfe 1148:                ltsleep((caddr_t)vp, PINOD|PNORELOCK,
                   1149:                    "vget", 0, &vp->v_interlock);
1.80      fvdl     1150:                return (ENOENT);
1.29      cgd      1151:        }
1.80      fvdl     1152:        if (vp->v_usecount == 0) {
                   1153:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1154:                if (vp->v_holdcnt > 0)
                   1155:                        TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1156:                else
                   1157:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1158:                simple_unlock(&vnode_free_list_slock);
                   1159:        }
1.29      cgd      1160:        vp->v_usecount++;
1.112     mycroft  1161: #ifdef DIAGNOSTIC
                   1162:        if (vp->v_usecount == 0) {
                   1163:                vprint("vget", vp);
1.125     chs      1164:                panic("vget: usecount overflow, vp %p", vp);
1.112     mycroft  1165:        }
                   1166: #endif
1.80      fvdl     1167:        if (flags & LK_TYPE_MASK) {
1.113     fvdl     1168:                if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
                   1169:                        /*
                   1170:                         * must expand vrele here because we do not want
                   1171:                         * to call VOP_INACTIVE if the reference count
                   1172:                         * drops back to zero since it was never really
                   1173:                         * active. We must remove it from the free list
                   1174:                         * before sleeping so that multiple processes do
                   1175:                         * not try to recycle it.
                   1176:                         */
                   1177:                        simple_lock(&vp->v_interlock);
                   1178:                        vp->v_usecount--;
                   1179:                        if (vp->v_usecount > 0) {
                   1180:                                simple_unlock(&vp->v_interlock);
                   1181:                                return (error);
                   1182:                        }
                   1183:                        /*
                   1184:                         * insert at tail of LRU list
                   1185:                         */
                   1186:                        simple_lock(&vnode_free_list_slock);
                   1187:                        if (vp->v_holdcnt > 0)
                   1188:                                TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
                   1189:                                    v_freelist);
                   1190:                        else
                   1191:                                TAILQ_INSERT_TAIL(&vnode_free_list, vp,
                   1192:                                    v_freelist);
                   1193:                        simple_unlock(&vnode_free_list_slock);
                   1194:                        simple_unlock(&vp->v_interlock);
                   1195:                }
1.80      fvdl     1196:                return (error);
                   1197:        }
                   1198:        simple_unlock(&vp->v_interlock);
1.29      cgd      1199:        return (0);
                   1200: }
                   1201:
                   1202: /*
                   1203:  * vput(), just unlock and vrele()
                   1204:  */
                   1205: void
                   1206: vput(vp)
1.80      fvdl     1207:        struct vnode *vp;
1.29      cgd      1208: {
1.80      fvdl     1209:        struct proc *p = curproc;       /* XXX */
1.30      mycroft  1210:
1.111     mycroft  1211: #ifdef DIAGNOSTIC
1.80      fvdl     1212:        if (vp == NULL)
                   1213:                panic("vput: null vp");
                   1214: #endif
                   1215:        simple_lock(&vp->v_interlock);
                   1216:        vp->v_usecount--;
                   1217:        if (vp->v_usecount > 0) {
                   1218:                simple_unlock(&vp->v_interlock);
                   1219:                VOP_UNLOCK(vp, 0);
                   1220:                return;
                   1221:        }
                   1222: #ifdef DIAGNOSTIC
                   1223:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1224:                vprint("vput: bad ref count", vp);
                   1225:                panic("vput: ref cnt");
                   1226:        }
                   1227: #endif
                   1228:        /*
1.87      pk       1229:         * Insert at tail of LRU list.
1.80      fvdl     1230:         */
                   1231:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1232:        if (vp->v_holdcnt > 0)
                   1233:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1234:        else
                   1235:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1236:        simple_unlock(&vnode_free_list_slock);
1.147     chs      1237:        if (vp->v_flag & VTEXT) {
                   1238:                uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
                   1239:                uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
                   1240:        }
1.142     chs      1241:        vp->v_flag &= ~VTEXT;
1.80      fvdl     1242:        simple_unlock(&vp->v_interlock);
                   1243:        VOP_INACTIVE(vp, p);
1.29      cgd      1244: }
                   1245:
                   1246: /*
                   1247:  * Vnode release.
                   1248:  * If count drops to zero, call inactive routine and return to freelist.
                   1249:  */
                   1250: void
                   1251: vrele(vp)
1.80      fvdl     1252:        struct vnode *vp;
1.29      cgd      1253: {
1.80      fvdl     1254:        struct proc *p = curproc;       /* XXX */
1.29      cgd      1255:
                   1256: #ifdef DIAGNOSTIC
                   1257:        if (vp == NULL)
                   1258:                panic("vrele: null vp");
                   1259: #endif
1.80      fvdl     1260:        simple_lock(&vp->v_interlock);
1.29      cgd      1261:        vp->v_usecount--;
1.80      fvdl     1262:        if (vp->v_usecount > 0) {
                   1263:                simple_unlock(&vp->v_interlock);
1.29      cgd      1264:                return;
1.80      fvdl     1265:        }
1.29      cgd      1266: #ifdef DIAGNOSTIC
1.80      fvdl     1267:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29      cgd      1268:                vprint("vrele: bad ref count", vp);
1.142     chs      1269:                panic("vrele: ref cnt vp %p", vp);
1.29      cgd      1270:        }
                   1271: #endif
1.30      mycroft  1272:        /*
1.87      pk       1273:         * Insert at tail of LRU list.
1.30      mycroft  1274:         */
1.80      fvdl     1275:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1276:        if (vp->v_holdcnt > 0)
                   1277:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1278:        else
                   1279:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1280:        simple_unlock(&vnode_free_list_slock);
1.147     chs      1281:        if (vp->v_flag & VTEXT) {
                   1282:                uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
                   1283:                uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
                   1284:        }
1.142     chs      1285:        vp->v_flag &= ~VTEXT;
1.80      fvdl     1286:        if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
                   1287:                VOP_INACTIVE(vp, p);
1.29      cgd      1288: }
                   1289:
1.80      fvdl     1290: #ifdef DIAGNOSTIC
1.29      cgd      1291: /*
                   1292:  * Page or buffer structure gets a reference.
                   1293:  */
1.30      mycroft  1294: void
1.29      cgd      1295: vhold(vp)
1.123     augustss 1296:        struct vnode *vp;
1.29      cgd      1297: {
                   1298:
1.113     fvdl     1299:        /*
                   1300:         * If it is on the freelist and the hold count is currently
                   1301:         * zero, move it to the hold list. The test of the back
                   1302:         * pointer and the use reference count of zero is because
                   1303:         * it will be removed from a free list by getnewvnode,
                   1304:         * but will not have its reference count incremented until
                   1305:         * after calling vgone. If the reference count were
                   1306:         * incremented first, vgone would (incorrectly) try to
                   1307:         * close the previous instance of the underlying object.
                   1308:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1309:         * getnewvnode after removing it from a freelist to ensure
                   1310:         * that we do not try to move it here.
                   1311:         */
                   1312:        simple_lock(&vp->v_interlock);
                   1313:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1314:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1315:                simple_lock(&vnode_free_list_slock);
                   1316:                TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1317:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1318:                simple_unlock(&vnode_free_list_slock);
                   1319:        }
1.29      cgd      1320:        vp->v_holdcnt++;
1.80      fvdl     1321:        simple_unlock(&vp->v_interlock);
1.29      cgd      1322: }
                   1323:
                   1324: /*
                   1325:  * Page or buffer structure frees a reference.
                   1326:  */
1.30      mycroft  1327: void
1.29      cgd      1328: holdrele(vp)
1.123     augustss 1329:        struct vnode *vp;
1.29      cgd      1330: {
                   1331:
1.80      fvdl     1332:        simple_lock(&vp->v_interlock);
1.29      cgd      1333:        if (vp->v_holdcnt <= 0)
1.125     chs      1334:                panic("holdrele: holdcnt vp %p", vp);
1.29      cgd      1335:        vp->v_holdcnt--;
1.142     chs      1336:
1.113     fvdl     1337:        /*
                   1338:         * If it is on the holdlist and the hold count drops to
                   1339:         * zero, move it to the free list. The test of the back
                   1340:         * pointer and the use reference count of zero is because
                   1341:         * it will be removed from a free list by getnewvnode,
                   1342:         * but will not have its reference count incremented until
                   1343:         * after calling vgone. If the reference count were
                   1344:         * incremented first, vgone would (incorrectly) try to
                   1345:         * close the previous instance of the underlying object.
                   1346:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1347:         * getnewvnode after removing it from a freelist to ensure
                   1348:         * that we do not try to move it here.
                   1349:         */
1.142     chs      1350:
1.113     fvdl     1351:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1352:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1353:                simple_lock(&vnode_free_list_slock);
                   1354:                TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1355:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1356:                simple_unlock(&vnode_free_list_slock);
                   1357:        }
1.81      ross     1358:        simple_unlock(&vp->v_interlock);
                   1359: }
                   1360:
                   1361: /*
                   1362:  * Vnode reference.
                   1363:  */
                   1364: void
                   1365: vref(vp)
                   1366:        struct vnode *vp;
                   1367: {
                   1368:
                   1369:        simple_lock(&vp->v_interlock);
                   1370:        if (vp->v_usecount <= 0)
1.125     chs      1371:                panic("vref used where vget required, vp %p", vp);
1.81      ross     1372:        vp->v_usecount++;
1.112     mycroft  1373: #ifdef DIAGNOSTIC
                   1374:        if (vp->v_usecount == 0) {
                   1375:                vprint("vref", vp);
1.125     chs      1376:                panic("vref: usecount overflow, vp %p", vp);
1.112     mycroft  1377:        }
                   1378: #endif
1.80      fvdl     1379:        simple_unlock(&vp->v_interlock);
1.29      cgd      1380: }
1.80      fvdl     1381: #endif /* DIAGNOSTIC */
1.29      cgd      1382:
                   1383: /*
                   1384:  * Remove any vnodes in the vnode table belonging to mount point mp.
                   1385:  *
                   1386:  * If MNT_NOFORCE is specified, there should not be any active ones,
                   1387:  * return error if any are found (nb: this is a user error, not a
                   1388:  * system error). If MNT_FORCE is specified, detach any active vnodes
                   1389:  * that are found.
                   1390:  */
1.30      mycroft  1391: #ifdef DEBUG
                   1392: int busyprt = 0;       /* print out busy vnodes */
                   1393: struct ctldebug debug1 = { "busyprt", &busyprt };
                   1394: #endif
1.29      cgd      1395:
1.50      christos 1396: int
1.29      cgd      1397: vflush(mp, skipvp, flags)
                   1398:        struct mount *mp;
                   1399:        struct vnode *skipvp;
                   1400:        int flags;
                   1401: {
1.80      fvdl     1402:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1403:        struct vnode *vp, *nvp;
1.29      cgd      1404:        int busy = 0;
                   1405:
1.80      fvdl     1406:        simple_lock(&mntvnode_slock);
1.29      cgd      1407: loop:
                   1408:        for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
                   1409:                if (vp->v_mount != mp)
                   1410:                        goto loop;
                   1411:                nvp = vp->v_mntvnodes.le_next;
                   1412:                /*
                   1413:                 * Skip over a selected vnode.
                   1414:                 */
                   1415:                if (vp == skipvp)
                   1416:                        continue;
1.80      fvdl     1417:                simple_lock(&vp->v_interlock);
1.29      cgd      1418:                /*
                   1419:                 * Skip over a vnodes marked VSYSTEM.
                   1420:                 */
1.80      fvdl     1421:                if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
                   1422:                        simple_unlock(&vp->v_interlock);
1.29      cgd      1423:                        continue;
1.80      fvdl     1424:                }
1.29      cgd      1425:                /*
1.30      mycroft  1426:                 * If WRITECLOSE is set, only flush out regular file
                   1427:                 * vnodes open for writing.
                   1428:                 */
                   1429:                if ((flags & WRITECLOSE) &&
1.92      thorpej  1430:                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
                   1431:                        simple_unlock(&vp->v_interlock);
1.30      mycroft  1432:                        continue;
1.92      thorpej  1433:                }
1.30      mycroft  1434:                /*
1.29      cgd      1435:                 * With v_usecount == 0, all we need to do is clear
                   1436:                 * out the vnode data structures and we are done.
                   1437:                 */
                   1438:                if (vp->v_usecount == 0) {
1.80      fvdl     1439:                        simple_unlock(&mntvnode_slock);
                   1440:                        vgonel(vp, p);
                   1441:                        simple_lock(&mntvnode_slock);
1.29      cgd      1442:                        continue;
                   1443:                }
                   1444:                /*
1.30      mycroft  1445:                 * If FORCECLOSE is set, forcibly close the vnode.
1.29      cgd      1446:                 * For block or character devices, revert to an
                   1447:                 * anonymous device. For all other files, just kill them.
                   1448:                 */
                   1449:                if (flags & FORCECLOSE) {
1.80      fvdl     1450:                        simple_unlock(&mntvnode_slock);
1.29      cgd      1451:                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.80      fvdl     1452:                                vgonel(vp, p);
1.29      cgd      1453:                        } else {
1.80      fvdl     1454:                                vclean(vp, 0, p);
1.30      mycroft  1455:                                vp->v_op = spec_vnodeop_p;
1.29      cgd      1456:                                insmntque(vp, (struct mount *)0);
                   1457:                        }
1.80      fvdl     1458:                        simple_lock(&mntvnode_slock);
1.29      cgd      1459:                        continue;
                   1460:                }
1.30      mycroft  1461: #ifdef DEBUG
1.29      cgd      1462:                if (busyprt)
                   1463:                        vprint("vflush: busy vnode", vp);
1.30      mycroft  1464: #endif
1.80      fvdl     1465:                simple_unlock(&vp->v_interlock);
1.29      cgd      1466:                busy++;
                   1467:        }
1.80      fvdl     1468:        simple_unlock(&mntvnode_slock);
1.29      cgd      1469:        if (busy)
                   1470:                return (EBUSY);
                   1471:        return (0);
                   1472: }
                   1473:
                   1474: /*
                   1475:  * Disassociate the underlying file system from a vnode.
                   1476:  */
                   1477: void
1.80      fvdl     1478: vclean(vp, flags, p)
1.123     augustss 1479:        struct vnode *vp;
1.29      cgd      1480:        int flags;
1.80      fvdl     1481:        struct proc *p;
1.29      cgd      1482: {
                   1483:        int active;
                   1484:
                   1485:        /*
                   1486:         * Check to see if the vnode is in use.
                   1487:         * If so we have to reference it before we clean it out
                   1488:         * so that its count cannot fall to zero and generate a
                   1489:         * race against ourselves to recycle it.
                   1490:         */
1.112     mycroft  1491:        if ((active = vp->v_usecount) != 0) {
1.87      pk       1492:                /* We have the vnode interlock. */
                   1493:                vp->v_usecount++;
1.112     mycroft  1494: #ifdef DIAGNOSTIC
                   1495:                if (vp->v_usecount == 0) {
                   1496:                        vprint("vclean", vp);
                   1497:                        panic("vclean: usecount overflow");
                   1498:                }
                   1499: #endif
                   1500:        }
1.87      pk       1501:
1.29      cgd      1502:        /*
                   1503:         * Prevent the vnode from being recycled or
                   1504:         * brought into use while we clean it out.
                   1505:         */
                   1506:        if (vp->v_flag & VXLOCK)
1.125     chs      1507:                panic("vclean: deadlock, vp %p", vp);
1.29      cgd      1508:        vp->v_flag |= VXLOCK;
1.147     chs      1509:        if (vp->v_flag & VTEXT) {
                   1510:                uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
                   1511:                uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
                   1512:        }
1.142     chs      1513:        vp->v_flag &= ~VTEXT;
                   1514:
1.29      cgd      1515:        /*
1.80      fvdl     1516:         * Even if the count is zero, the VOP_INACTIVE routine may still
                   1517:         * have the object locked while it cleans it out. The VOP_LOCK
                   1518:         * ensures that the VOP_INACTIVE routine is done with its work.
                   1519:         * For active vnodes, it ensures that no other activity can
                   1520:         * occur while the underlying object is being cleaned out.
                   1521:         */
                   1522:        VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
                   1523:
1.98      wrstuden 1524:        /*
1.142     chs      1525:         * Clean out any cached data associated with the vnode.
1.29      cgd      1526:         */
                   1527:        if (flags & DOCLOSE)
1.80      fvdl     1528:                vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
                   1529:
1.29      cgd      1530:        /*
1.30      mycroft  1531:         * If purging an active vnode, it must be closed and
1.80      fvdl     1532:         * deactivated before being reclaimed. Note that the
                   1533:         * VOP_INACTIVE will unlock the vnode.
1.29      cgd      1534:         */
                   1535:        if (active) {
                   1536:                if (flags & DOCLOSE)
1.86      pk       1537:                        VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1.80      fvdl     1538:                VOP_INACTIVE(vp, p);
                   1539:        } else {
                   1540:                /*
                   1541:                 * Any other processes trying to obtain this lock must first
                   1542:                 * wait for VXLOCK to clear, then call the new lock operation.
                   1543:                 */
                   1544:                VOP_UNLOCK(vp, 0);
1.29      cgd      1545:        }
                   1546:        /*
                   1547:         * Reclaim the vnode.
                   1548:         */
1.80      fvdl     1549:        if (VOP_RECLAIM(vp, p))
1.125     chs      1550:                panic("vclean: cannot reclaim, vp %p", vp);
1.87      pk       1551:        if (active) {
                   1552:                /*
                   1553:                 * Inline copy of vrele() since VOP_INACTIVE
                   1554:                 * has already been called.
                   1555:                 */
                   1556:                simple_lock(&vp->v_interlock);
                   1557:                if (--vp->v_usecount <= 0) {
                   1558: #ifdef DIAGNOSTIC
                   1559:                        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1560:                                vprint("vclean: bad ref count", vp);
                   1561:                                panic("vclean: ref cnt");
                   1562:                        }
                   1563: #endif
                   1564:                        /*
                   1565:                         * Insert at tail of LRU list.
                   1566:                         */
1.142     chs      1567:
1.113     fvdl     1568:                        simple_unlock(&vp->v_interlock);
1.87      pk       1569:                        simple_lock(&vnode_free_list_slock);
1.104     wrstuden 1570: #ifdef DIAGNOSTIC
                   1571:                        if (vp->v_vnlock) {
                   1572:                                if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
                   1573:                                        vprint("vclean: lock not drained", vp);
                   1574:                        }
1.113     fvdl     1575:                        if (vp->v_holdcnt > 0)
1.125     chs      1576:                                panic("vclean: not clean, vp %p", vp);
1.104     wrstuden 1577: #endif
1.87      pk       1578:                        TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1579:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl     1580:                } else
                   1581:                        simple_unlock(&vp->v_interlock);
1.87      pk       1582:        }
1.30      mycroft  1583:
1.80      fvdl     1584:        cache_purge(vp);
                   1585:
1.29      cgd      1586:        /*
1.30      mycroft  1587:         * Done with purge, notify sleepers of the grim news.
1.29      cgd      1588:         */
1.30      mycroft  1589:        vp->v_op = dead_vnodeop_p;
                   1590:        vp->v_tag = VT_NON;
1.139     enami    1591:        simple_lock(&vp->v_interlock);
1.29      cgd      1592:        vp->v_flag &= ~VXLOCK;
                   1593:        if (vp->v_flag & VXWANT) {
                   1594:                vp->v_flag &= ~VXWANT;
1.139     enami    1595:                simple_unlock(&vp->v_interlock);
1.29      cgd      1596:                wakeup((caddr_t)vp);
1.139     enami    1597:        } else
                   1598:                simple_unlock(&vp->v_interlock);
1.29      cgd      1599: }
                   1600:
                   1601: /*
1.80      fvdl     1602:  * Recycle an unused vnode to the front of the free list.
                   1603:  * Release the passed interlock if the vnode will be recycled.
1.29      cgd      1604:  */
1.80      fvdl     1605: int
                   1606: vrecycle(vp, inter_lkp, p)
                   1607:        struct vnode *vp;
                   1608:        struct simplelock *inter_lkp;
                   1609:        struct proc *p;
                   1610: {
                   1611:
                   1612:        simple_lock(&vp->v_interlock);
                   1613:        if (vp->v_usecount == 0) {
                   1614:                if (inter_lkp)
                   1615:                        simple_unlock(inter_lkp);
                   1616:                vgonel(vp, p);
                   1617:                return (1);
1.29      cgd      1618:        }
1.80      fvdl     1619:        simple_unlock(&vp->v_interlock);
                   1620:        return (0);
1.29      cgd      1621: }
                   1622:
                   1623: /*
                   1624:  * Eliminate all activity associated with a vnode
                   1625:  * in preparation for reuse.
                   1626:  */
                   1627: void
                   1628: vgone(vp)
1.80      fvdl     1629:        struct vnode *vp;
                   1630: {
                   1631:        struct proc *p = curproc;       /* XXX */
                   1632:
                   1633:        simple_lock(&vp->v_interlock);
                   1634:        vgonel(vp, p);
                   1635: }
                   1636:
                   1637: /*
                   1638:  * vgone, with the vp interlock held.
                   1639:  */
                   1640: void
                   1641: vgonel(vp, p)
1.123     augustss 1642:        struct vnode *vp;
1.80      fvdl     1643:        struct proc *p;
1.29      cgd      1644: {
1.80      fvdl     1645:        struct vnode *vq;
1.29      cgd      1646:        struct vnode *vx;
                   1647:
                   1648:        /*
                   1649:         * If a vgone (or vclean) is already in progress,
                   1650:         * wait until it is done and return.
                   1651:         */
                   1652:        if (vp->v_flag & VXLOCK) {
                   1653:                vp->v_flag |= VXWANT;
1.134     sommerfe 1654:                ltsleep((caddr_t)vp, PINOD | PNORELOCK,
                   1655:                    "vgone", 0, &vp->v_interlock);
1.29      cgd      1656:                return;
                   1657:        }
                   1658:        /*
                   1659:         * Clean out the filesystem specific data.
                   1660:         */
1.80      fvdl     1661:        vclean(vp, DOCLOSE, p);
1.29      cgd      1662:        /*
                   1663:         * Delete from old mount point vnode list, if on one.
                   1664:         */
1.80      fvdl     1665:        if (vp->v_mount != NULL)
                   1666:                insmntque(vp, (struct mount *)0);
1.29      cgd      1667:        /*
                   1668:         * If special device, remove it from special device alias list.
1.80      fvdl     1669:         * if it is on one.
1.29      cgd      1670:         */
1.80      fvdl     1671:        if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
                   1672:                simple_lock(&spechash_slock);
1.110     wrstuden 1673:                if (vp->v_hashchain != NULL) {
                   1674:                        if (*vp->v_hashchain == vp) {
                   1675:                                *vp->v_hashchain = vp->v_specnext;
                   1676:                        } else {
                   1677:                                for (vq = *vp->v_hashchain; vq;
                   1678:                                                        vq = vq->v_specnext) {
                   1679:                                        if (vq->v_specnext != vp)
                   1680:                                                continue;
                   1681:                                        vq->v_specnext = vp->v_specnext;
                   1682:                                        break;
                   1683:                                }
                   1684:                                if (vq == NULL)
                   1685:                                        panic("missing bdev");
1.29      cgd      1686:                        }
1.110     wrstuden 1687:                        if (vp->v_flag & VALIASED) {
                   1688:                                vx = NULL;
                   1689:                                for (vq = *vp->v_hashchain; vq;
                   1690:                                                        vq = vq->v_specnext) {
                   1691:                                        if (vq->v_rdev != vp->v_rdev ||
                   1692:                                            vq->v_type != vp->v_type)
                   1693:                                                continue;
                   1694:                                        if (vx)
                   1695:                                                break;
                   1696:                                        vx = vq;
                   1697:                                }
                   1698:                                if (vx == NULL)
                   1699:                                        panic("missing alias");
                   1700:                                if (vq == NULL)
                   1701:                                        vx->v_flag &= ~VALIASED;
                   1702:                                vp->v_flag &= ~VALIASED;
1.29      cgd      1703:                        }
                   1704:                }
1.80      fvdl     1705:                simple_unlock(&spechash_slock);
1.29      cgd      1706:                FREE(vp->v_specinfo, M_VNODE);
                   1707:                vp->v_specinfo = NULL;
                   1708:        }
                   1709:        /*
1.30      mycroft  1710:         * If it is on the freelist and not already at the head,
                   1711:         * move it to the head of the list. The test of the back
                   1712:         * pointer and the reference count of zero is because
                   1713:         * it will be removed from the free list by getnewvnode,
                   1714:         * but will not have its reference count incremented until
                   1715:         * after calling vgone. If the reference count were
                   1716:         * incremented first, vgone would (incorrectly) try to
                   1717:         * close the previous instance of the underlying object.
                   1718:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1719:         * getnewvnode after removing it from the freelist to ensure
                   1720:         * that we do not try to move it here.
1.29      cgd      1721:         */
1.80      fvdl     1722:        if (vp->v_usecount == 0) {
                   1723:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1724:                if (vp->v_holdcnt > 0)
1.125     chs      1725:                        panic("vgonel: not clean, vp %p", vp);
1.80      fvdl     1726:                if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1.113     fvdl     1727:                    TAILQ_FIRST(&vnode_free_list) != vp) {
1.80      fvdl     1728:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1729:                        TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                   1730:                }
                   1731:                simple_unlock(&vnode_free_list_slock);
1.29      cgd      1732:        }
                   1733:        vp->v_type = VBAD;
                   1734: }
                   1735:
                   1736: /*
                   1737:  * Lookup a vnode by device number.
                   1738:  */
1.50      christos 1739: int
1.29      cgd      1740: vfinddev(dev, type, vpp)
                   1741:        dev_t dev;
                   1742:        enum vtype type;
                   1743:        struct vnode **vpp;
                   1744: {
1.80      fvdl     1745:        struct vnode *vp;
                   1746:        int rc = 0;
1.29      cgd      1747:
1.80      fvdl     1748:        simple_lock(&spechash_slock);
1.29      cgd      1749:        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                   1750:                if (dev != vp->v_rdev || type != vp->v_type)
                   1751:                        continue;
                   1752:                *vpp = vp;
1.80      fvdl     1753:                rc = 1;
                   1754:                break;
1.29      cgd      1755:        }
1.80      fvdl     1756:        simple_unlock(&spechash_slock);
                   1757:        return (rc);
1.96      thorpej  1758: }
                   1759:
                   1760: /*
                   1761:  * Revoke all the vnodes corresponding to the specified minor number
                   1762:  * range (endpoints inclusive) of the specified major.
                   1763:  */
                   1764: void
                   1765: vdevgone(maj, minl, minh, type)
                   1766:        int maj, minl, minh;
                   1767:        enum vtype type;
                   1768: {
                   1769:        struct vnode *vp;
                   1770:        int mn;
                   1771:
                   1772:        for (mn = minl; mn <= minh; mn++)
                   1773:                if (vfinddev(makedev(maj, mn), type, &vp))
                   1774:                        VOP_REVOKE(vp, REVOKEALL);
1.29      cgd      1775: }
                   1776:
                   1777: /*
                   1778:  * Calculate the total number of references to a special device.
                   1779:  */
1.30      mycroft  1780: int
1.29      cgd      1781: vcount(vp)
1.123     augustss 1782:        struct vnode *vp;
1.29      cgd      1783: {
1.123     augustss 1784:        struct vnode *vq, *vnext;
1.29      cgd      1785:        int count;
                   1786:
                   1787: loop:
                   1788:        if ((vp->v_flag & VALIASED) == 0)
                   1789:                return (vp->v_usecount);
1.80      fvdl     1790:        simple_lock(&spechash_slock);
1.30      mycroft  1791:        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
                   1792:                vnext = vq->v_specnext;
1.29      cgd      1793:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   1794:                        continue;
                   1795:                /*
                   1796:                 * Alias, but not in use, so flush it out.
                   1797:                 */
1.151     wrstuden 1798:                if (vq->v_usecount == 0 && vq != vp &&
                   1799:                    (vq->v_flag & VXLOCK) == 0) {
1.80      fvdl     1800:                        simple_unlock(&spechash_slock);
1.29      cgd      1801:                        vgone(vq);
                   1802:                        goto loop;
                   1803:                }
                   1804:                count += vq->v_usecount;
                   1805:        }
1.80      fvdl     1806:        simple_unlock(&spechash_slock);
1.29      cgd      1807:        return (count);
                   1808: }
                   1809:
                   1810: /*
                   1811:  * Print out a description of a vnode.
                   1812:  */
1.146     jdolecek 1813: static const char * const typename[] =
1.29      cgd      1814:    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
                   1815:
                   1816: void
                   1817: vprint(label, vp)
                   1818:        char *label;
1.123     augustss 1819:        struct vnode *vp;
1.29      cgd      1820: {
                   1821:        char buf[64];
                   1822:
                   1823:        if (label != NULL)
1.57      christos 1824:                printf("%s: ", label);
1.142     chs      1825:        printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
1.113     fvdl     1826:            vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1.56      christos 1827:            vp->v_holdcnt);
1.29      cgd      1828:        buf[0] = '\0';
                   1829:        if (vp->v_flag & VROOT)
                   1830:                strcat(buf, "|VROOT");
                   1831:        if (vp->v_flag & VTEXT)
                   1832:                strcat(buf, "|VTEXT");
                   1833:        if (vp->v_flag & VSYSTEM)
                   1834:                strcat(buf, "|VSYSTEM");
                   1835:        if (vp->v_flag & VXLOCK)
                   1836:                strcat(buf, "|VXLOCK");
                   1837:        if (vp->v_flag & VXWANT)
                   1838:                strcat(buf, "|VXWANT");
                   1839:        if (vp->v_flag & VBWAIT)
                   1840:                strcat(buf, "|VBWAIT");
                   1841:        if (vp->v_flag & VALIASED)
                   1842:                strcat(buf, "|VALIASED");
                   1843:        if (buf[0] != '\0')
1.57      christos 1844:                printf(" flags (%s)", &buf[1]);
1.30      mycroft  1845:        if (vp->v_data == NULL) {
1.57      christos 1846:                printf("\n");
1.30      mycroft  1847:        } else {
1.57      christos 1848:                printf("\n\t");
1.30      mycroft  1849:                VOP_PRINT(vp);
                   1850:        }
1.29      cgd      1851: }
                   1852:
                   1853: #ifdef DEBUG
                   1854: /*
                   1855:  * List all of the locked vnodes in the system.
                   1856:  * Called when debugging the kernel.
                   1857:  */
1.51      christos 1858: void
1.29      cgd      1859: printlockedvnodes()
                   1860: {
1.80      fvdl     1861:        struct mount *mp, *nmp;
                   1862:        struct vnode *vp;
1.29      cgd      1863:
1.57      christos 1864:        printf("Locked vnodes\n");
1.80      fvdl     1865:        simple_lock(&mountlist_slock);
                   1866:        for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
                   1867:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
                   1868:                        nmp = mp->mnt_list.cqe_next;
                   1869:                        continue;
                   1870:                }
1.30      mycroft  1871:                for (vp = mp->mnt_vnodelist.lh_first;
                   1872:                     vp != NULL;
1.80      fvdl     1873:                     vp = vp->v_mntvnodes.le_next) {
1.29      cgd      1874:                        if (VOP_ISLOCKED(vp))
                   1875:                                vprint((char *)0, vp);
1.80      fvdl     1876:                }
                   1877:                simple_lock(&mountlist_slock);
                   1878:                nmp = mp->mnt_list.cqe_next;
                   1879:                vfs_unbusy(mp);
1.29      cgd      1880:        }
1.80      fvdl     1881:        simple_unlock(&mountlist_slock);
1.29      cgd      1882: }
                   1883: #endif
                   1884:
1.101     mrg      1885: /*
1.80      fvdl     1886:  * Top level filesystem related information gathering.
                   1887:  */
                   1888: int
                   1889: vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
                   1890:        int *name;
                   1891:        u_int namelen;
                   1892:        void *oldp;
                   1893:        size_t *oldlenp;
                   1894:        void *newp;
                   1895:        size_t newlen;
                   1896:        struct proc *p;
                   1897: {
1.95      thorpej  1898: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80      fvdl     1899:        struct vfsconf vfc;
1.154     jdolecek 1900:        extern const char * const mountcompatnames[];
                   1901:        extern int nmountcompatnames;
1.95      thorpej  1902: #endif
1.80      fvdl     1903:        struct vfsops *vfsp;
                   1904:
                   1905:        /* all sysctl names at this level are at least name and field */
                   1906:        if (namelen < 2)
                   1907:                return (ENOTDIR);               /* overloaded */
1.94      thorpej  1908:
                   1909:        /* Not generic: goes to file system. */
1.80      fvdl     1910:        if (name[0] != VFS_GENERIC) {
1.155     jdolecek 1911:                static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES;
1.154     jdolecek 1912:                const char *vfsname;
                   1913:
                   1914:                if (name[0] < 0 || name[0] > VFS_MAXID
                   1915:                    || (vfsname = vfsnames[name[0]].ctl_name) == NULL)
1.80      fvdl     1916:                        return (EOPNOTSUPP);
1.154     jdolecek 1917:
                   1918:                vfsp = vfs_getopsbyname(vfsname);
1.94      thorpej  1919:                if (vfsp == NULL || vfsp->vfs_sysctl == NULL)
                   1920:                        return (EOPNOTSUPP);
1.80      fvdl     1921:                return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
                   1922:                    oldp, oldlenp, newp, newlen, p));
                   1923:        }
1.94      thorpej  1924:
                   1925:        /* The rest are generic vfs sysctls. */
1.80      fvdl     1926:        switch (name[1]) {
1.117     fvdl     1927:        case VFS_USERMOUNT:
                   1928:                return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount);
1.95      thorpej  1929: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80      fvdl     1930:        case VFS_MAXTYPENUM:
1.94      thorpej  1931:                /*
                   1932:                 * Provided for 4.4BSD-Lite2 compatibility.
                   1933:                 */
1.80      fvdl     1934:                return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
                   1935:        case VFS_CONF:
1.94      thorpej  1936:                /*
                   1937:                 * Special: a node, next is a file system name.
                   1938:                 * Provided for 4.4BSD-Lite2 compatibility.
                   1939:                 */
1.80      fvdl     1940:                if (namelen < 3)
                   1941:                        return (ENOTDIR);       /* overloaded */
                   1942:                if (name[2] >= nmountcompatnames || name[2] < 0 ||
                   1943:                    mountcompatnames[name[2]] == NULL)
                   1944:                        return (EOPNOTSUPP);
                   1945:                vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
                   1946:                if (vfsp == NULL)
1.94      thorpej  1947:                        return (EOPNOTSUPP);
                   1948:                vfc.vfc_vfsops = vfsp;
1.80      fvdl     1949:                strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
                   1950:                vfc.vfc_typenum = name[2];
1.94      thorpej  1951:                vfc.vfc_refcount = vfsp->vfs_refcount;
1.80      fvdl     1952:                vfc.vfc_flags = 0;
                   1953:                vfc.vfc_mountroot = vfsp->vfs_mountroot;
                   1954:                vfc.vfc_next = NULL;
                   1955:                return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
                   1956:                    sizeof(struct vfsconf)));
1.95      thorpej  1957: #endif
                   1958:        default:
                   1959:                break;
1.80      fvdl     1960:        }
                   1961:        return (EOPNOTSUPP);
                   1962: }
                   1963:
1.29      cgd      1964: int kinfo_vdebug = 1;
                   1965: int kinfo_vgetfailed;
                   1966: #define KINFO_VNODESLOP        10
                   1967: /*
                   1968:  * Dump vnode list (via sysctl).
                   1969:  * Copyout address of vnode followed by vnode.
                   1970:  */
                   1971: /* ARGSUSED */
1.50      christos 1972: int
1.80      fvdl     1973: sysctl_vnode(where, sizep, p)
1.29      cgd      1974:        char *where;
                   1975:        size_t *sizep;
1.80      fvdl     1976:        struct proc *p;
1.29      cgd      1977: {
1.80      fvdl     1978:        struct mount *mp, *nmp;
                   1979:        struct vnode *nvp, *vp;
                   1980:        char *bp = where, *savebp;
1.29      cgd      1981:        char *ewhere;
                   1982:        int error;
                   1983:
1.90      perry    1984: #define VPTRSZ sizeof(struct vnode *)
                   1985: #define VNODESZ        sizeof(struct vnode)
1.29      cgd      1986:        if (where == NULL) {
                   1987:                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                   1988:                return (0);
                   1989:        }
                   1990:        ewhere = where + *sizep;
1.80      fvdl     1991:
                   1992:        simple_lock(&mountlist_slock);
1.38      mycroft  1993:        for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1.80      fvdl     1994:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
                   1995:                        nmp = mp->mnt_list.cqe_next;
1.29      cgd      1996:                        continue;
1.80      fvdl     1997:                }
1.29      cgd      1998:                savebp = bp;
                   1999: again:
1.80      fvdl     2000:                simple_lock(&mntvnode_slock);
1.29      cgd      2001:                for (vp = mp->mnt_vnodelist.lh_first;
                   2002:                     vp != NULL;
1.80      fvdl     2003:                     vp = nvp) {
1.29      cgd      2004:                        /*
                   2005:                         * Check that the vp is still associated with
                   2006:                         * this filesystem.  RACE: could have been
                   2007:                         * recycled onto the same filesystem.
                   2008:                         */
                   2009:                        if (vp->v_mount != mp) {
1.80      fvdl     2010:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2011:                                if (kinfo_vdebug)
1.57      christos 2012:                                        printf("kinfo: vp changed\n");
1.29      cgd      2013:                                bp = savebp;
                   2014:                                goto again;
                   2015:                        }
1.80      fvdl     2016:                        nvp = vp->v_mntvnodes.le_next;
1.29      cgd      2017:                        if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80      fvdl     2018:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2019:                                *sizep = bp - where;
                   2020:                                return (ENOMEM);
                   2021:                        }
1.80      fvdl     2022:                        simple_unlock(&mntvnode_slock);
1.29      cgd      2023:                        if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
                   2024:                           (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
                   2025:                                return (error);
                   2026:                        bp += VPTRSZ + VNODESZ;
1.80      fvdl     2027:                        simple_lock(&mntvnode_slock);
1.29      cgd      2028:                }
1.80      fvdl     2029:                simple_unlock(&mntvnode_slock);
                   2030:                simple_lock(&mountlist_slock);
                   2031:                nmp = mp->mnt_list.cqe_next;
1.29      cgd      2032:                vfs_unbusy(mp);
                   2033:        }
1.80      fvdl     2034:        simple_unlock(&mountlist_slock);
1.29      cgd      2035:
                   2036:        *sizep = bp - where;
                   2037:        return (0);
1.30      mycroft  2038: }
                   2039:
                   2040: /*
                   2041:  * Check to see if a filesystem is mounted on a block device.
                   2042:  */
                   2043: int
                   2044: vfs_mountedon(vp)
1.80      fvdl     2045:        struct vnode *vp;
1.30      mycroft  2046: {
1.80      fvdl     2047:        struct vnode *vq;
                   2048:        int error = 0;
1.30      mycroft  2049:
1.113     fvdl     2050:        if (vp->v_specmountpoint != NULL)
1.30      mycroft  2051:                return (EBUSY);
                   2052:        if (vp->v_flag & VALIASED) {
1.80      fvdl     2053:                simple_lock(&spechash_slock);
1.30      mycroft  2054:                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   2055:                        if (vq->v_rdev != vp->v_rdev ||
                   2056:                            vq->v_type != vp->v_type)
                   2057:                                continue;
1.113     fvdl     2058:                        if (vq->v_specmountpoint != NULL) {
1.80      fvdl     2059:                                error = EBUSY;
                   2060:                                break;
                   2061:                        }
1.30      mycroft  2062:                }
1.80      fvdl     2063:                simple_unlock(&spechash_slock);
1.30      mycroft  2064:        }
1.80      fvdl     2065:        return (error);
1.30      mycroft  2066: }
                   2067:
                   2068: /*
                   2069:  * Build hash lists of net addresses and hang them off the mount point.
                   2070:  * Called by ufs_mount() to set up the lists of export addresses.
                   2071:  */
                   2072: static int
                   2073: vfs_hang_addrlist(mp, nep, argp)
                   2074:        struct mount *mp;
                   2075:        struct netexport *nep;
                   2076:        struct export_args *argp;
                   2077: {
1.123     augustss 2078:        struct netcred *np, *enp;
                   2079:        struct radix_node_head *rnh;
                   2080:        int i;
1.30      mycroft  2081:        struct radix_node *rn;
                   2082:        struct sockaddr *saddr, *smask = 0;
                   2083:        struct domain *dom;
                   2084:        int error;
                   2085:
                   2086:        if (argp->ex_addrlen == 0) {
                   2087:                if (mp->mnt_flag & MNT_DEFEXPORTED)
                   2088:                        return (EPERM);
                   2089:                np = &nep->ne_defexported;
                   2090:                np->netc_exflags = argp->ex_flags;
                   2091:                np->netc_anon = argp->ex_anon;
                   2092:                np->netc_anon.cr_ref = 1;
                   2093:                mp->mnt_flag |= MNT_DEFEXPORTED;
                   2094:                return (0);
                   2095:        }
1.156     jdolecek 2096:
                   2097:        if (argp->ex_addrlen > MLEN)
                   2098:                return (EINVAL);
                   2099:
1.30      mycroft  2100:        i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
                   2101:        np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1.91      perry    2102:        memset((caddr_t)np, 0, i);
1.30      mycroft  2103:        saddr = (struct sockaddr *)(np + 1);
1.50      christos 2104:        error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
                   2105:        if (error)
1.30      mycroft  2106:                goto out;
                   2107:        if (saddr->sa_len > argp->ex_addrlen)
                   2108:                saddr->sa_len = argp->ex_addrlen;
                   2109:        if (argp->ex_masklen) {
                   2110:                smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1.66      mycroft  2111:                error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1.30      mycroft  2112:                if (error)
                   2113:                        goto out;
                   2114:                if (smask->sa_len > argp->ex_masklen)
                   2115:                        smask->sa_len = argp->ex_masklen;
                   2116:        }
                   2117:        i = saddr->sa_family;
                   2118:        if ((rnh = nep->ne_rtable[i]) == 0) {
                   2119:                /*
                   2120:                 * Seems silly to initialize every AF when most are not
                   2121:                 * used, do so on demand here
                   2122:                 */
                   2123:                for (dom = domains; dom; dom = dom->dom_next)
                   2124:                        if (dom->dom_family == i && dom->dom_rtattach) {
                   2125:                                dom->dom_rtattach((void **)&nep->ne_rtable[i],
                   2126:                                        dom->dom_rtoffset);
                   2127:                                break;
                   2128:                        }
                   2129:                if ((rnh = nep->ne_rtable[i]) == 0) {
                   2130:                        error = ENOBUFS;
                   2131:                        goto out;
                   2132:                }
                   2133:        }
                   2134:        rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
                   2135:                np->netc_rnodes);
                   2136:        if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1.72      fvdl     2137:                if (rn == 0) {
                   2138:                        enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
                   2139:                                smask, rnh);
                   2140:                        if (enp == 0) {
                   2141:                                error = EPERM;
                   2142:                                goto out;
                   2143:                        }
                   2144:                } else
                   2145:                        enp = (struct netcred *)rn;
                   2146:
                   2147:                if (enp->netc_exflags != argp->ex_flags ||
                   2148:                    enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
                   2149:                    enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
                   2150:                    enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups ||
1.91      perry    2151:                    memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
1.72      fvdl     2152:                        enp->netc_anon.cr_ngroups))
                   2153:                                error = EPERM;
                   2154:                else
                   2155:                        error = 0;
1.30      mycroft  2156:                goto out;
                   2157:        }
                   2158:        np->netc_exflags = argp->ex_flags;
                   2159:        np->netc_anon = argp->ex_anon;
                   2160:        np->netc_anon.cr_ref = 1;
                   2161:        return (0);
                   2162: out:
                   2163:        free(np, M_NETADDR);
                   2164:        return (error);
                   2165: }
                   2166:
                   2167: /* ARGSUSED */
                   2168: static int
                   2169: vfs_free_netcred(rn, w)
                   2170:        struct radix_node *rn;
1.50      christos 2171:        void *w;
1.30      mycroft  2172: {
1.123     augustss 2173:        struct radix_node_head *rnh = (struct radix_node_head *)w;
1.30      mycroft  2174:
                   2175:        (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
                   2176:        free((caddr_t)rn, M_NETADDR);
                   2177:        return (0);
                   2178: }
                   2179:
                   2180: /*
                   2181:  * Free the net address hash lists that are hanging off the mount points.
                   2182:  */
                   2183: static void
                   2184: vfs_free_addrlist(nep)
                   2185:        struct netexport *nep;
                   2186: {
1.123     augustss 2187:        int i;
                   2188:        struct radix_node_head *rnh;
1.30      mycroft  2189:
                   2190:        for (i = 0; i <= AF_MAX; i++)
1.50      christos 2191:                if ((rnh = nep->ne_rtable[i]) != NULL) {
                   2192:                        (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1.30      mycroft  2193:                        free((caddr_t)rnh, M_RTABLE);
                   2194:                        nep->ne_rtable[i] = 0;
                   2195:                }
                   2196: }
                   2197:
                   2198: int
                   2199: vfs_export(mp, nep, argp)
                   2200:        struct mount *mp;
                   2201:        struct netexport *nep;
                   2202:        struct export_args *argp;
                   2203: {
                   2204:        int error;
                   2205:
                   2206:        if (argp->ex_flags & MNT_DELEXPORT) {
1.71      fvdl     2207:                if (mp->mnt_flag & MNT_EXPUBLIC) {
                   2208:                        vfs_setpublicfs(NULL, NULL, NULL);
                   2209:                        mp->mnt_flag &= ~MNT_EXPUBLIC;
                   2210:                }
1.30      mycroft  2211:                vfs_free_addrlist(nep);
                   2212:                mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
                   2213:        }
                   2214:        if (argp->ex_flags & MNT_EXPORTED) {
1.71      fvdl     2215:                if (argp->ex_flags & MNT_EXPUBLIC) {
                   2216:                        if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
                   2217:                                return (error);
                   2218:                        mp->mnt_flag |= MNT_EXPUBLIC;
                   2219:                }
1.50      christos 2220:                if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1.30      mycroft  2221:                        return (error);
                   2222:                mp->mnt_flag |= MNT_EXPORTED;
                   2223:        }
1.71      fvdl     2224:        return (0);
                   2225: }
                   2226:
                   2227: /*
                   2228:  * Set the publicly exported filesystem (WebNFS). Currently, only
                   2229:  * one public filesystem is possible in the spec (RFC 2054 and 2055)
                   2230:  */
                   2231: int
                   2232: vfs_setpublicfs(mp, nep, argp)
                   2233:        struct mount *mp;
                   2234:        struct netexport *nep;
                   2235:        struct export_args *argp;
                   2236: {
                   2237:        int error;
                   2238:        struct vnode *rvp;
                   2239:        char *cp;
                   2240:
                   2241:        /*
                   2242:         * mp == NULL -> invalidate the current info, the FS is
                   2243:         * no longer exported. May be called from either vfs_export
                   2244:         * or unmount, so check if it hasn't already been done.
                   2245:         */
                   2246:        if (mp == NULL) {
                   2247:                if (nfs_pub.np_valid) {
                   2248:                        nfs_pub.np_valid = 0;
                   2249:                        if (nfs_pub.np_index != NULL) {
                   2250:                                FREE(nfs_pub.np_index, M_TEMP);
                   2251:                                nfs_pub.np_index = NULL;
                   2252:                        }
                   2253:                }
                   2254:                return (0);
                   2255:        }
                   2256:
                   2257:        /*
                   2258:         * Only one allowed at a time.
                   2259:         */
                   2260:        if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
                   2261:                return (EBUSY);
                   2262:
                   2263:        /*
                   2264:         * Get real filehandle for root of exported FS.
                   2265:         */
1.91      perry    2266:        memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
1.71      fvdl     2267:        nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
                   2268:
                   2269:        if ((error = VFS_ROOT(mp, &rvp)))
                   2270:                return (error);
                   2271:
                   2272:        if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
                   2273:                return (error);
                   2274:
                   2275:        vput(rvp);
                   2276:
                   2277:        /*
                   2278:         * If an indexfile was specified, pull it in.
                   2279:         */
                   2280:        if (argp->ex_indexfile != NULL) {
                   2281:                MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
                   2282:                    M_WAITOK);
                   2283:                error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
                   2284:                    MAXNAMLEN, (size_t *)0);
                   2285:                if (!error) {
                   2286:                        /*
                   2287:                         * Check for illegal filenames.
                   2288:                         */
                   2289:                        for (cp = nfs_pub.np_index; *cp; cp++) {
                   2290:                                if (*cp == '/') {
                   2291:                                        error = EINVAL;
                   2292:                                        break;
                   2293:                                }
                   2294:                        }
                   2295:                }
                   2296:                if (error) {
                   2297:                        FREE(nfs_pub.np_index, M_TEMP);
                   2298:                        return (error);
                   2299:                }
                   2300:        }
                   2301:
                   2302:        nfs_pub.np_mount = mp;
                   2303:        nfs_pub.np_valid = 1;
1.30      mycroft  2304:        return (0);
                   2305: }
                   2306:
                   2307: struct netcred *
                   2308: vfs_export_lookup(mp, nep, nam)
1.123     augustss 2309:        struct mount *mp;
1.30      mycroft  2310:        struct netexport *nep;
                   2311:        struct mbuf *nam;
                   2312: {
1.123     augustss 2313:        struct netcred *np;
                   2314:        struct radix_node_head *rnh;
1.30      mycroft  2315:        struct sockaddr *saddr;
                   2316:
                   2317:        np = NULL;
                   2318:        if (mp->mnt_flag & MNT_EXPORTED) {
                   2319:                /*
                   2320:                 * Lookup in the export list first.
                   2321:                 */
                   2322:                if (nam != NULL) {
                   2323:                        saddr = mtod(nam, struct sockaddr *);
                   2324:                        rnh = nep->ne_rtable[saddr->sa_family];
                   2325:                        if (rnh != NULL) {
                   2326:                                np = (struct netcred *)
                   2327:                                        (*rnh->rnh_matchaddr)((caddr_t)saddr,
                   2328:                                                              rnh);
                   2329:                                if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
                   2330:                                        np = NULL;
                   2331:                        }
                   2332:                }
                   2333:                /*
                   2334:                 * If no address match, use the default if it exists.
                   2335:                 */
                   2336:                if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
                   2337:                        np = &nep->ne_defexported;
                   2338:        }
                   2339:        return (np);
1.35      ws       2340: }
                   2341:
                   2342: /*
                   2343:  * Do the usual access checking.
                   2344:  * file_mode, uid and gid are from the vnode in question,
                   2345:  * while acc_mode and cred are from the VOP_ACCESS parameter list
                   2346:  */
1.41      mycroft  2347: int
1.68      mycroft  2348: vaccess(type, file_mode, uid, gid, acc_mode, cred)
                   2349:        enum vtype type;
1.35      ws       2350:        mode_t file_mode;
                   2351:        uid_t uid;
                   2352:        gid_t gid;
                   2353:        mode_t acc_mode;
                   2354:        struct ucred *cred;
                   2355: {
                   2356:        mode_t mask;
                   2357:
1.64      mycroft  2358:        /*
                   2359:         * Super-user always gets read/write access, but execute access depends
                   2360:         * on at least one execute bit being set.
                   2361:         */
                   2362:        if (cred->cr_uid == 0) {
1.69      mycroft  2363:                if ((acc_mode & VEXEC) && type != VDIR &&
1.68      mycroft  2364:                    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1.64      mycroft  2365:                        return (EACCES);
1.68      mycroft  2366:                return (0);
1.64      mycroft  2367:        }
1.35      ws       2368:
                   2369:        mask = 0;
                   2370:
                   2371:        /* Otherwise, check the owner. */
                   2372:        if (cred->cr_uid == uid) {
1.68      mycroft  2373:                if (acc_mode & VEXEC)
1.35      ws       2374:                        mask |= S_IXUSR;
                   2375:                if (acc_mode & VREAD)
                   2376:                        mask |= S_IRUSR;
                   2377:                if (acc_mode & VWRITE)
                   2378:                        mask |= S_IWUSR;
1.64      mycroft  2379:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2380:        }
                   2381:
                   2382:        /* Otherwise, check the groups. */
1.44      jtc      2383:        if (cred->cr_gid == gid || groupmember(gid, cred)) {
1.68      mycroft  2384:                if (acc_mode & VEXEC)
1.35      ws       2385:                        mask |= S_IXGRP;
                   2386:                if (acc_mode & VREAD)
                   2387:                        mask |= S_IRGRP;
                   2388:                if (acc_mode & VWRITE)
                   2389:                        mask |= S_IWGRP;
1.64      mycroft  2390:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2391:        }
                   2392:
                   2393:        /* Otherwise, check everyone else. */
1.68      mycroft  2394:        if (acc_mode & VEXEC)
1.35      ws       2395:                mask |= S_IXOTH;
                   2396:        if (acc_mode & VREAD)
                   2397:                mask |= S_IROTH;
                   2398:        if (acc_mode & VWRITE)
                   2399:                mask |= S_IWOTH;
1.64      mycroft  2400:        return ((file_mode & mask) == mask ? 0 : EACCES);
1.39      mycroft  2401: }
                   2402:
                   2403: /*
                   2404:  * Unmount all file systems.
                   2405:  * We traverse the list in reverse order under the assumption that doing so
                   2406:  * will avoid needing to worry about dependencies.
                   2407:  */
                   2408: void
1.128     sommerfe 2409: vfs_unmountall(p)
                   2410:        struct proc *p;
1.39      mycroft  2411: {
1.123     augustss 2412:        struct mount *mp, *nmp;
1.40      mycroft  2413:        int allerror, error;
1.39      mycroft  2414:
                   2415:        for (allerror = 0,
                   2416:             mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
                   2417:                nmp = mp->mnt_list.cqe_prev;
1.54      jtk      2418: #ifdef DEBUG
1.57      christos 2419:                printf("unmounting %s (%s)...\n",
1.56      christos 2420:                    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54      jtk      2421: #endif
1.149     thorpej  2422:                /*
                   2423:                 * XXX Freeze syncer.  Must do this before locking the
                   2424:                 * mount point.  See dounmount() for details.
                   2425:                 */
                   2426:                lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
                   2427:                if (vfs_busy(mp, 0, 0)) {
                   2428:                        lockmgr(&syncer_lock, LK_RELEASE, NULL);
1.60      fvdl     2429:                        continue;
1.149     thorpej  2430:                }
1.80      fvdl     2431:                if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
1.57      christos 2432:                        printf("unmount of %s failed with error %d\n",
1.40      mycroft  2433:                            mp->mnt_stat.f_mntonname, error);
                   2434:                        allerror = 1;
                   2435:                }
1.39      mycroft  2436:        }
                   2437:        if (allerror)
1.57      christos 2438:                printf("WARNING: some file systems would not unmount\n");
1.40      mycroft  2439: }
                   2440:
                   2441: /*
                   2442:  * Sync and unmount file systems before shutting down.
                   2443:  */
                   2444: void
                   2445: vfs_shutdown()
                   2446: {
1.123     augustss 2447:        struct buf *bp;
1.138     bouyer   2448:        int iter, nbusy, nbusy_prev = 0, dcount, s;
1.128     sommerfe 2449:        struct proc *p = curproc;
1.40      mycroft  2450:
1.128     sommerfe 2451:        /* XXX we're certainly not running in proc0's context! */
                   2452:        if (p == NULL)
                   2453:                p = &proc0;
                   2454:
1.70      cgd      2455:        printf("syncing disks... ");
                   2456:
1.138     bouyer   2457:        /* remove user process from run queue */
                   2458:        suspendsched();
1.40      mycroft  2459:        (void) spl0();
                   2460:
1.128     sommerfe 2461:        /* avoid coming back this way again if we panic. */
                   2462:        doing_shutdown = 1;
                   2463:
1.142     chs      2464:        sys_sync(p, NULL, NULL);
1.40      mycroft  2465:
                   2466:        /* Wait for sync to finish. */
1.113     fvdl     2467:        dcount = 10000;
1.138     bouyer   2468:        for (iter = 0; iter < 20;) {
1.40      mycroft  2469:                nbusy = 0;
1.113     fvdl     2470:                for (bp = &buf[nbuf]; --bp >= buf; ) {
1.133     fvdl     2471:                        if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.40      mycroft  2472:                                nbusy++;
1.113     fvdl     2473:                        /*
                   2474:                         * With soft updates, some buffers that are
                   2475:                         * written will be remarked as dirty until other
                   2476:                         * buffers are written.
                   2477:                         */
1.116     perseant 2478:                        if (bp->b_vp && bp->b_vp->v_mount
                   2479:                            && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
                   2480:                            && (bp->b_flags & B_DELWRI)) {
1.113     fvdl     2481:                                s = splbio();
                   2482:                                bremfree(bp);
                   2483:                                bp->b_flags |= B_BUSY;
                   2484:                                splx(s);
                   2485:                                nbusy++;
                   2486:                                bawrite(bp);
                   2487:                                if (dcount-- <= 0) {
                   2488:                                        printf("softdep ");
                   2489:                                        goto fail;
                   2490:                                }
                   2491:                        }
                   2492:                }
1.40      mycroft  2493:                if (nbusy == 0)
                   2494:                        break;
1.138     bouyer   2495:                if (nbusy_prev == 0)
                   2496:                        nbusy_prev = nbusy;
1.57      christos 2497:                printf("%d ", nbusy);
1.138     bouyer   2498:                tsleep(&nbusy, PRIBIO, "bflush",
                   2499:                    (iter == 0) ? 1 : hz / 25 * iter);
                   2500:                if (nbusy >= nbusy_prev) /* we didn't flush anything */
                   2501:                        iter++;
                   2502:                else
                   2503:                        nbusy_prev = nbusy;
1.40      mycroft  2504:        }
1.73      thorpej  2505:        if (nbusy) {
1.113     fvdl     2506: fail:
1.124     augustss 2507: #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
1.108     simonb   2508:                printf("giving up\nPrinting vnodes for busy buffers\n");
                   2509:                for (bp = &buf[nbuf]; --bp >= buf; )
1.133     fvdl     2510:                        if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.109     thorpej  2511:                                vprint(NULL, bp->b_vp);
1.124     augustss 2512:
                   2513: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   2514:                Debugger();
                   2515: #endif
                   2516:
                   2517: #else  /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.57      christos 2518:                printf("giving up\n");
1.124     augustss 2519: #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.84      thorpej  2520:                return;
1.73      thorpej  2521:        } else
1.57      christos 2522:                printf("done\n");
1.73      thorpej  2523:
1.84      thorpej  2524:        /*
                   2525:         * If we've panic'd, don't make the situation potentially
                   2526:         * worse by unmounting the file systems.
                   2527:         */
                   2528:        if (panicstr != NULL)
                   2529:                return;
                   2530:
                   2531:        /* Release inodes held by texts before update. */
1.73      thorpej  2532: #ifdef notdef
1.84      thorpej  2533:        vnshutdown();
1.73      thorpej  2534: #endif
1.84      thorpej  2535:        /* Unmount file systems. */
1.128     sommerfe 2536:        vfs_unmountall(p);
1.58      thorpej  2537: }
                   2538:
                   2539: /*
                   2540:  * Mount the root file system.  If the operator didn't specify a
                   2541:  * file system to use, try all possible file systems until one
                   2542:  * succeeds.
                   2543:  */
                   2544: int
                   2545: vfs_mountroot()
                   2546: {
                   2547:        extern int (*mountroot) __P((void));
1.79      thorpej  2548:        struct vfsops *v;
1.58      thorpej  2549:
                   2550:        if (root_device == NULL)
                   2551:                panic("vfs_mountroot: root device unknown");
                   2552:
                   2553:        switch (root_device->dv_class) {
                   2554:        case DV_IFNET:
                   2555:                if (rootdev != NODEV)
                   2556:                        panic("vfs_mountroot: rootdev set for DV_IFNET");
                   2557:                break;
                   2558:
                   2559:        case DV_DISK:
                   2560:                if (rootdev == NODEV)
                   2561:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
                   2562:                break;
                   2563:
                   2564:        default:
                   2565:                printf("%s: inappropriate for root file system\n",
                   2566:                    root_device->dv_xname);
                   2567:                return (ENODEV);
                   2568:        }
                   2569:
                   2570:        /*
                   2571:         * If user specified a file system, use it.
                   2572:         */
                   2573:        if (mountroot != NULL)
                   2574:                return ((*mountroot)());
                   2575:
                   2576:        /*
                   2577:         * Try each file system currently configured into the kernel.
                   2578:         */
1.79      thorpej  2579:        for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
                   2580:                if (v->vfs_mountroot == NULL)
1.58      thorpej  2581:                        continue;
                   2582: #ifdef DEBUG
1.79      thorpej  2583:                printf("mountroot: trying %s...\n", v->vfs_name);
1.58      thorpej  2584: #endif
1.79      thorpej  2585:                if ((*v->vfs_mountroot)() == 0) {
                   2586:                        printf("root file system type: %s\n", v->vfs_name);
                   2587:                        break;
1.58      thorpej  2588:                }
                   2589:        }
                   2590:
1.79      thorpej  2591:        if (v == NULL) {
                   2592:                printf("no file system for %s", root_device->dv_xname);
                   2593:                if (root_device->dv_class == DV_DISK)
                   2594:                        printf(" (dev 0x%x)", rootdev);
                   2595:                printf("\n");
                   2596:                return (EFTYPE);
                   2597:        }
                   2598:        return (0);
1.58      thorpej  2599: }
                   2600:
                   2601: /*
                   2602:  * Given a file system name, look up the vfsops for that
                   2603:  * file system, or return NULL if file system isn't present
                   2604:  * in the kernel.
                   2605:  */
                   2606: struct vfsops *
                   2607: vfs_getopsbyname(name)
                   2608:        const char *name;
                   2609: {
1.79      thorpej  2610:        struct vfsops *v;
                   2611:
                   2612:        for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
                   2613:                if (strcmp(v->vfs_name, name) == 0)
                   2614:                        break;
                   2615:        }
                   2616:
                   2617:        return (v);
                   2618: }
                   2619:
                   2620: /*
                   2621:  * Establish a file system and initialize it.
                   2622:  */
                   2623: int
                   2624: vfs_attach(vfs)
                   2625:        struct vfsops *vfs;
                   2626: {
                   2627:        struct vfsops *v;
                   2628:        int error = 0;
                   2629:
1.58      thorpej  2630:
1.79      thorpej  2631:        /*
                   2632:         * Make sure this file system doesn't already exist.
                   2633:         */
1.157   ! chs      2634:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2635:                if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
                   2636:                        error = EEXIST;
                   2637:                        goto out;
                   2638:                }
                   2639:        }
                   2640:
                   2641:        /*
                   2642:         * Initialize the vnode operations for this file system.
                   2643:         */
                   2644:        vfs_opv_init(vfs->vfs_opv_descs);
                   2645:
                   2646:        /*
                   2647:         * Now initialize the file system itself.
                   2648:         */
                   2649:        (*vfs->vfs_init)();
                   2650:
                   2651:        /*
                   2652:         * ...and link it into the kernel's list.
                   2653:         */
                   2654:        LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
                   2655:
                   2656:        /*
                   2657:         * Sanity: make sure the reference count is 0.
                   2658:         */
                   2659:        vfs->vfs_refcount = 0;
                   2660:
                   2661:  out:
                   2662:        return (error);
                   2663: }
                   2664:
                   2665: /*
                   2666:  * Remove a file system from the kernel.
                   2667:  */
                   2668: int
                   2669: vfs_detach(vfs)
                   2670:        struct vfsops *vfs;
                   2671: {
                   2672:        struct vfsops *v;
                   2673:
                   2674:        /*
                   2675:         * Make sure no one is using the filesystem.
                   2676:         */
                   2677:        if (vfs->vfs_refcount != 0)
                   2678:                return (EBUSY);
                   2679:
                   2680:        /*
                   2681:         * ...and remove it from the kernel's list.
                   2682:         */
1.157   ! chs      2683:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2684:                if (v == vfs) {
                   2685:                        LIST_REMOVE(v, vfs_list);
                   2686:                        break;
                   2687:                }
                   2688:        }
                   2689:
                   2690:        if (v == NULL)
                   2691:                return (ESRCH);
1.121     jdolecek 2692:
                   2693:        /*
                   2694:         * Now run the file system-specific cleanups.
                   2695:         */
                   2696:        (*vfs->vfs_done)();
1.79      thorpej  2697:
                   2698:        /*
                   2699:         * Free the vnode operations vector.
                   2700:         */
                   2701:        vfs_opv_free(vfs->vfs_opv_descs);
                   2702:        return (0);
1.157   ! chs      2703: }
        !          2704:
        !          2705: void
        !          2706: vfs_reinit(void)
        !          2707: {
        !          2708:        struct vfsops *vfs;
        !          2709:
        !          2710:        LIST_FOREACH(vfs, &vfs_list, vfs_list) {
        !          2711:                if (vfs->vfs_reinit) {
        !          2712:                        (*vfs->vfs_reinit)();
        !          2713:                }
        !          2714:        }
1.29      cgd      2715: }
1.125     chs      2716:
                   2717: #ifdef DDB
                   2718: const char buf_flagbits[] =
1.142     chs      2719:        "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
1.125     chs      2720:        "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
1.142     chs      2721:        "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
                   2722:        "\32XXX\33VFLUSH";
1.125     chs      2723:
                   2724: void
                   2725: vfs_buf_print(bp, full, pr)
                   2726:        struct buf *bp;
                   2727:        int full;
                   2728:        void (*pr) __P((const char *, ...));
                   2729: {
                   2730:        char buf[1024];
                   2731:
                   2732:        (*pr)("  vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n",
                   2733:                  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
                   2734:
                   2735:        bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
                   2736:        (*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
                   2737:
                   2738:        (*pr)("  bufsize 0x%x bcount 0x%x resid 0x%x\n",
                   2739:                  bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.142     chs      2740:        (*pr)("  data %p saveaddr %p dep %p\n",
                   2741:                  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
1.125     chs      2742:        (*pr)("  iodone %p\n", bp->b_iodone);
                   2743: }
                   2744:
                   2745:
                   2746: const char vnode_flagbits[] =
                   2747:        "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED"
1.148     enami    2748:        "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
1.125     chs      2749:
                   2750: const char *vnode_types[] = {
                   2751:        "VNON",
                   2752:        "VREG",
                   2753:        "VDIR",
                   2754:        "VBLK",
                   2755:        "VCHR",
                   2756:        "VLNK",
                   2757:        "VSOCK",
                   2758:        "VFIFO",
                   2759:        "VBAD",
                   2760: };
                   2761:
                   2762: const char *vnode_tags[] = {
                   2763:        "VT_NON",
                   2764:        "VT_UFS",
                   2765:        "VT_NFS",
                   2766:        "VT_MFS",
                   2767:        "VT_MSDOSFS",
                   2768:        "VT_LFS",
                   2769:        "VT_LOFS",
                   2770:        "VT_FDESC",
                   2771:        "VT_PORTAL",
                   2772:        "VT_NULL",
                   2773:        "VT_UMAP",
                   2774:        "VT_KERNFS",
                   2775:        "VT_PROCFS",
                   2776:        "VT_AFS",
                   2777:        "VT_ISOFS",
                   2778:        "VT_UNION",
                   2779:        "VT_ADOSFS",
                   2780:        "VT_EXT2FS",
                   2781:        "VT_CODA",
                   2782:        "VT_FILECORE",
                   2783:        "VT_NTFS",
                   2784:        "VT_VFS",
                   2785:        "VT_OVERLAY"
                   2786: };
                   2787:
                   2788: void
                   2789: vfs_vnode_print(vp, full, pr)
                   2790:        struct vnode *vp;
                   2791:        int full;
                   2792:        void (*pr) __P((const char *, ...));
                   2793: {
1.142     chs      2794:        char buf[256];
1.125     chs      2795:
                   2796:        const char *vtype, *vtag;
                   2797:
                   2798:        uvm_object_printit(&vp->v_uvm.u_obj, full, pr);
                   2799:        bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
                   2800:        (*pr)("\nVNODE flags %s\n", buf);
1.142     chs      2801:        (*pr)("mp %p nio %d size 0x%x rwlock 0x%x glock 0x%x\n",
                   2802:              vp->v_mount, vp->v_uvm.u_nio, (int)vp->v_uvm.u_size,
                   2803:              vp->v_vnlock ? lockstatus(vp->v_vnlock) : 0x999,
                   2804:              lockstatus(&vp->v_glock));
1.125     chs      2805:
                   2806:        (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n",
                   2807:              vp->v_data, vp->v_usecount, vp->v_writecount,
                   2808:              vp->v_holdcnt, vp->v_numoutput);
                   2809:
                   2810:        vtype = (vp->v_type >= 0 &&
                   2811:                 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
                   2812:                vnode_types[vp->v_type] : "UNKNOWN";
                   2813:        vtag = (vp->v_tag >= 0 &&
                   2814:                vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
                   2815:                vnode_tags[vp->v_tag] : "UNKNOWN";
                   2816:
                   2817:        (*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n",
                   2818:              vtype, vp->v_type, vtag, vp->v_tag,
                   2819:              vp->v_id, vp->v_mount, vp->v_mountedhere);
                   2820:        (*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n",
                   2821:              vp->v_lastr, vp->v_lastw, vp->v_lasta);
                   2822:        (*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n",
                   2823:              vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra);
                   2824:
                   2825:        if (full) {
                   2826:                struct buf *bp;
                   2827:
                   2828:                (*pr)("clean bufs:\n");
1.142     chs      2829:                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                   2830:                        (*pr)(" bp %p\n", bp);
1.125     chs      2831:                        vfs_buf_print(bp, full, pr);
                   2832:                }
                   2833:
                   2834:                (*pr)("dirty bufs:\n");
1.142     chs      2835:                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                   2836:                        (*pr)(" bp %p\n", bp);
1.125     chs      2837:                        vfs_buf_print(bp, full, pr);
                   2838:                }
                   2839:        }
                   2840: }
                   2841: #endif

CVSweb <webmaster@jp.NetBSD.org>