[BACK]Return to vfs_subr.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_subr.c, Revision 1.177

1.177   ! matt        1: /*     $NetBSD: vfs_subr.c,v 1.176 2002/08/26 01:26:29 thorpej Exp $   */
1.74      thorpej     2:
                      3: /*-
1.79      thorpej     4:  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
1.74      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.32      cgd        39:
1.29      cgd        40: /*
1.30      mycroft    41:  * Copyright (c) 1989, 1993
                     42:  *     The Regents of the University of California.  All rights reserved.
1.29      cgd        43:  * (c) UNIX System Laboratories, Inc.
                     44:  * All or some portions of this file are derived from material licensed
                     45:  * to the University of California by American Telephone and Telegraph
                     46:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     47:  * the permission of UNIX System Laboratories, Inc.
                     48:  *
                     49:  * Redistribution and use in source and binary forms, with or without
                     50:  * modification, are permitted provided that the following conditions
                     51:  * are met:
                     52:  * 1. Redistributions of source code must retain the above copyright
                     53:  *    notice, this list of conditions and the following disclaimer.
                     54:  * 2. Redistributions in binary form must reproduce the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer in the
                     56:  *    documentation and/or other materials provided with the distribution.
                     57:  * 3. All advertising materials mentioning features or use of this software
                     58:  *    must display the following acknowledgement:
                     59:  *     This product includes software developed by the University of
                     60:  *     California, Berkeley and its contributors.
                     61:  * 4. Neither the name of the University nor the names of its contributors
                     62:  *    may be used to endorse or promote products derived from this software
                     63:  *    without specific prior written permission.
                     64:  *
                     65:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     66:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     67:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     68:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     69:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     70:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     71:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     72:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     73:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     74:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     75:  * SUCH DAMAGE.
                     76:  *
1.32      cgd        77:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
1.29      cgd        78:  */
                     79:
                     80: /*
                     81:  * External virtual filesystem routines
                     82:  */
1.162     lukem      83:
                     84: #include <sys/cdefs.h>
1.177   ! matt       85: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.176 2002/08/26 01:26:29 thorpej Exp $");
1.78      mrg        86:
1.125     chs        87: #include "opt_ddb.h"
1.95      thorpej    88: #include "opt_compat_netbsd.h"
1.97      christos   89: #include "opt_compat_43.h"
1.29      cgd        90:
                     91: #include <sys/param.h>
1.30      mycroft    92: #include <sys/systm.h>
1.29      cgd        93: #include <sys/proc.h>
1.138     bouyer     94: #include <sys/kernel.h>
1.29      cgd        95: #include <sys/mount.h>
                     96: #include <sys/time.h>
1.46      mycroft    97: #include <sys/fcntl.h>
1.29      cgd        98: #include <sys/vnode.h>
1.30      mycroft    99: #include <sys/stat.h>
1.29      cgd       100: #include <sys/namei.h>
                    101: #include <sys/ucred.h>
                    102: #include <sys/buf.h>
                    103: #include <sys/errno.h>
                    104: #include <sys/malloc.h>
1.30      mycroft   105: #include <sys/domain.h>
                    106: #include <sys/mbuf.h>
1.51      christos  107: #include <sys/syscallargs.h>
1.58      thorpej   108: #include <sys/device.h>
1.71      fvdl      109: #include <sys/dirent.h>
1.50      christos  110:
1.30      mycroft   111: #include <miscfs/specfs/specdev.h>
1.113     fvdl      112: #include <miscfs/genfs/genfs.h>
                    113: #include <miscfs/syncfs/syncfs.h>
1.30      mycroft   114:
1.125     chs       115: #include <uvm/uvm.h>
                    116: #include <uvm/uvm_ddb.h>
1.129     mrg       117:
                    118: #include <sys/sysctl.h>
1.77      mrg       119:
1.30      mycroft   120: enum vtype iftovt_tab[16] = {
                    121:        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
                    122:        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
                    123: };
1.146     jdolecek  124: const int      vttoif_tab[9] = {
1.30      mycroft   125:        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
                    126:        S_IFSOCK, S_IFIFO, S_IFMT,
                    127: };
                    128:
1.31      mycroft   129: int doforce = 1;               /* 1 => permit forcible unmounting */
                    130: int prtactive = 0;             /* 1 => print out reclaim of active vnodes */
1.29      cgd       131:
1.117     fvdl      132: extern int dovfsusermount;     /* 1 => permit any user to mount filesystems */
                    133:
1.29      cgd       134: /*
                    135:  * Insq/Remq for the vnode usage lists.
                    136:  */
                    137: #define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
                    138: #define        bufremvn(bp) {                                                  \
                    139:        LIST_REMOVE(bp, b_vnbufs);                                      \
                    140:        (bp)->b_vnbufs.le_next = NOLIST;                                \
                    141: }
1.113     fvdl      142: /* TAILQ_HEAD(freelst, vnode) vnode_free_list =        vnode free list (in vnode.h) */
                    143: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114     enami     144: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113     fvdl      145:
1.55      cgd       146: struct mntlist mountlist =                     /* mounted filesystem list */
                    147:     CIRCLEQ_HEAD_INITIALIZER(mountlist);
1.79      thorpej   148: struct vfs_list_head vfs_list =                        /* vfs list */
1.118     mycroft   149:     LIST_HEAD_INITIALIZER(vfs_list);
1.79      thorpej   150:
1.71      fvdl      151: struct nfs_public nfs_pub;                     /* publicly exported FS */
1.58      thorpej   152:
1.135     sommerfe  153: struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
                    154: static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
                    155: struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
                    156: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
                    157: struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
1.80      fvdl      158:
1.79      thorpej   159: /*
                    160:  * These define the root filesystem and device.
                    161:  */
                    162: struct mount *rootfs;
                    163: struct vnode *rootvnode;
1.80      fvdl      164: struct device *root_device;                    /* root device */
1.79      thorpej   165:
1.93      thorpej   166: struct pool vnode_pool;                                /* memory pool for vnodes */
                    167:
1.89      kleink    168: /*
                    169:  * Local declarations.
                    170:  */
1.50      christos  171: void insmntque __P((struct vnode *, struct mount *));
                    172: int getdevvp __P((dev_t, struct vnode **, enum vtype));
                    173: void vgoneall __P((struct vnode *));
                    174:
                    175: static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
                    176:                                  struct export_args *));
                    177: static int vfs_free_netcred __P((struct radix_node *, void *));
                    178: static void vfs_free_addrlist __P((struct netexport *));
1.51      christos  179:
                    180: #ifdef DEBUG
                    181: void printlockedvnodes __P((void));
                    182: #endif
                    183:
1.29      cgd       184: /*
1.30      mycroft   185:  * Initialize the vnode management data structures.
1.29      cgd       186:  */
1.50      christos  187: void
1.30      mycroft   188: vntblinit()
1.29      cgd       189: {
1.93      thorpej   190:
                    191:        pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
1.171     thorpej   192:            &pool_allocator_nointr);
1.113     fvdl      193:
                    194:        /*
                    195:         * Initialize the filesystem syncer.
                    196:         */
                    197:        vn_initialize_syncerd();
1.29      cgd       198: }
                    199:
                    200: /*
1.80      fvdl      201:  * Mark a mount point as busy. Used to synchronize access and to delay
                    202:  * unmounting. Interlock is not released on failure.
1.29      cgd       203:  */
1.50      christos  204: int
1.80      fvdl      205: vfs_busy(mp, flags, interlkp)
                    206:        struct mount *mp;
                    207:        int flags;
                    208:        struct simplelock *interlkp;
1.29      cgd       209: {
1.80      fvdl      210:        int lkflags;
1.29      cgd       211:
1.103     sommerfe  212:        while (mp->mnt_flag & MNT_UNMOUNT) {
                    213:                int gone;
                    214:
1.80      fvdl      215:                if (flags & LK_NOWAIT)
                    216:                        return (ENOENT);
1.113     fvdl      217:                if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
                    218:                    && mp->mnt_unmounter == curproc)
                    219:                        return (EDEADLK);
1.80      fvdl      220:                if (interlkp)
                    221:                        simple_unlock(interlkp);
                    222:                /*
                    223:                 * Since all busy locks are shared except the exclusive
                    224:                 * lock granted when unmounting, the only place that a
                    225:                 * wakeup needs to be done is at the release of the
                    226:                 * exclusive lock at the end of dounmount.
1.103     sommerfe  227:                 *
1.106     sommerfe  228:                 * XXX MP: add spinlock protecting mnt_wcnt here once you
                    229:                 * can atomically unlock-and-sleep.
1.80      fvdl      230:                 */
1.103     sommerfe  231:                mp->mnt_wcnt++;
1.113     fvdl      232:                tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
1.103     sommerfe  233:                mp->mnt_wcnt--;
                    234:                gone = mp->mnt_flag & MNT_GONE;
                    235:
                    236:                if (mp->mnt_wcnt == 0)
                    237:                        wakeup(&mp->mnt_wcnt);
1.80      fvdl      238:                if (interlkp)
                    239:                        simple_lock(interlkp);
1.103     sommerfe  240:                if (gone)
                    241:                        return (ENOENT);
1.80      fvdl      242:        }
                    243:        lkflags = LK_SHARED;
                    244:        if (interlkp)
                    245:                lkflags |= LK_INTERLOCK;
                    246:        if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
                    247:                panic("vfs_busy: unexpected lock failure");
1.29      cgd       248:        return (0);
                    249: }
                    250:
                    251: /*
1.80      fvdl      252:  * Free a busy filesystem.
1.29      cgd       253:  */
                    254: void
1.80      fvdl      255: vfs_unbusy(mp)
                    256:        struct mount *mp;
1.29      cgd       257: {
                    258:
1.80      fvdl      259:        lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29      cgd       260: }
                    261:
                    262: /*
1.80      fvdl      263:  * Lookup a filesystem type, and if found allocate and initialize
                    264:  * a mount structure for it.
                    265:  *
                    266:  * Devname is usually updated by mount(8) after booting.
1.29      cgd       267:  */
1.50      christos  268: int
1.80      fvdl      269: vfs_rootmountalloc(fstypename, devname, mpp)
                    270:        char *fstypename;
                    271:        char *devname;
                    272:        struct mount **mpp;
1.29      cgd       273: {
1.80      fvdl      274:        struct vfsops *vfsp = NULL;
                    275:        struct mount *mp;
1.29      cgd       276:
1.152     jdolecek  277:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.80      fvdl      278:                if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
                    279:                        break;
                    280:
                    281:        if (vfsp == NULL)
                    282:                return (ENODEV);
                    283:        mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91      perry     284:        memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80      fvdl      285:        lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
                    286:        (void)vfs_busy(mp, LK_NOWAIT, 0);
                    287:        LIST_INIT(&mp->mnt_vnodelist);
                    288:        mp->mnt_op = vfsp;
                    289:        mp->mnt_flag = MNT_RDONLY;
                    290:        mp->mnt_vnodecovered = NULLVP;
                    291:        vfsp->vfs_refcount++;
                    292:        strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
                    293:        mp->mnt_stat.f_mntonname[0] = '/';
                    294:        (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
                    295:        *mpp = mp;
1.29      cgd       296:        return (0);
                    297: }
                    298:
                    299: /*
                    300:  * Lookup a mount point by filesystem identifier.
                    301:  */
                    302: struct mount *
1.80      fvdl      303: vfs_getvfs(fsid)
1.29      cgd       304:        fsid_t *fsid;
                    305: {
1.123     augustss  306:        struct mount *mp;
1.29      cgd       307:
1.80      fvdl      308:        simple_lock(&mountlist_slock);
1.177   ! matt      309:        CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
1.29      cgd       310:                if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
1.80      fvdl      311:                    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
                    312:                        simple_unlock(&mountlist_slock);
1.29      cgd       313:                        return (mp);
1.80      fvdl      314:                }
                    315:        }
                    316:        simple_unlock(&mountlist_slock);
1.29      cgd       317:        return ((struct mount *)0);
                    318: }
                    319:
                    320: /*
                    321:  * Get a new unique fsid
                    322:  */
                    323: void
1.127     assar     324: vfs_getnewfsid(mp)
1.29      cgd       325:        struct mount *mp;
                    326: {
                    327:        static u_short xxxfs_mntid;
                    328:        fsid_t tfsid;
1.80      fvdl      329:        int mtype;
1.29      cgd       330:
1.80      fvdl      331:        simple_lock(&mntid_slock);
1.127     assar     332:        mtype = makefstype(mp->mnt_op->vfs_name);
1.80      fvdl      333:        mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
1.29      cgd       334:        mp->mnt_stat.f_fsid.val[1] = mtype;
                    335:        if (xxxfs_mntid == 0)
                    336:                ++xxxfs_mntid;
1.33      deraadt   337:        tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
1.29      cgd       338:        tfsid.val[1] = mtype;
1.177   ! matt      339:        if (!CIRCLEQ_EMPTY(&mountlist)) {
1.80      fvdl      340:                while (vfs_getvfs(&tfsid)) {
1.29      cgd       341:                        tfsid.val[0]++;
                    342:                        xxxfs_mntid++;
                    343:                }
                    344:        }
                    345:        mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
1.80      fvdl      346:        simple_unlock(&mntid_slock);
1.29      cgd       347: }
                    348:
                    349: /*
1.30      mycroft   350:  * Make a 'unique' number from a mount type name.
1.29      cgd       351:  */
                    352: long
                    353: makefstype(type)
1.127     assar     354:        const char *type;
1.29      cgd       355: {
                    356:        long rv;
                    357:
                    358:        for (rv = 0; *type; type++) {
                    359:                rv <<= 2;
                    360:                rv ^= *type;
                    361:        }
                    362:        return rv;
                    363: }
1.30      mycroft   364:
1.80      fvdl      365:
1.30      mycroft   366: /*
                    367:  * Set vnode attributes to VNOVAL
                    368:  */
                    369: void
                    370: vattr_null(vap)
1.123     augustss  371:        struct vattr *vap;
1.30      mycroft   372: {
                    373:
                    374:        vap->va_type = VNON;
1.75      enami     375:
                    376:        /*
                    377:         * Assign individually so that it is safe even if size and
                    378:         * sign of each member are varied.
                    379:         */
                    380:        vap->va_mode = VNOVAL;
                    381:        vap->va_nlink = VNOVAL;
                    382:        vap->va_uid = VNOVAL;
                    383:        vap->va_gid = VNOVAL;
                    384:        vap->va_fsid = VNOVAL;
                    385:        vap->va_fileid = VNOVAL;
1.30      mycroft   386:        vap->va_size = VNOVAL;
1.75      enami     387:        vap->va_blocksize = VNOVAL;
1.76      christos  388:        vap->va_atime.tv_sec =
                    389:            vap->va_mtime.tv_sec =
                    390:            vap->va_ctime.tv_sec = VNOVAL;
                    391:        vap->va_atime.tv_nsec =
                    392:            vap->va_mtime.tv_nsec =
                    393:            vap->va_ctime.tv_nsec = VNOVAL;
1.75      enami     394:        vap->va_gen = VNOVAL;
                    395:        vap->va_flags = VNOVAL;
                    396:        vap->va_rdev = VNOVAL;
1.30      mycroft   397:        vap->va_bytes = VNOVAL;
                    398:        vap->va_vaflags = 0;
                    399: }
                    400:
                    401: /*
                    402:  * Routines having to do with the management of the vnode table.
                    403:  */
1.50      christos  404: extern int (**dead_vnodeop_p) __P((void *));
1.30      mycroft   405: long numvnodes;
                    406:
1.29      cgd       407: /*
                    408:  * Return the next vnode from the free list.
                    409:  */
1.50      christos  410: int
1.29      cgd       411: getnewvnode(tag, mp, vops, vpp)
                    412:        enum vtagtype tag;
                    413:        struct mount *mp;
1.50      christos  414:        int (**vops) __P((void *));
1.29      cgd       415:        struct vnode **vpp;
                    416: {
1.142     chs       417:        extern struct uvm_pagerops uvm_vnodeops;
                    418:        struct uvm_object *uobj;
1.80      fvdl      419:        struct proc *p = curproc;       /* XXX */
1.113     fvdl      420:        struct freelst *listhd;
                    421:        static int toggle;
1.80      fvdl      422:        struct vnode *vp;
1.153     thorpej   423:        int error = 0, tryalloc;
1.158     chs       424:
1.159     enami     425:  try_again:
1.103     sommerfe  426:        if (mp) {
                    427:                /*
1.106     sommerfe  428:                 * Mark filesystem busy while we're creating a vnode.
                    429:                 * If unmount is in progress, this will wait; if the
                    430:                 * unmount succeeds (only if umount -f), this will
                    431:                 * return an error.  If the unmount fails, we'll keep
                    432:                 * going afterwards.
                    433:                 * (This puts the per-mount vnode list logically under
                    434:                 * the protection of the vfs_busy lock).
1.103     sommerfe  435:                 */
1.113     fvdl      436:                error = vfs_busy(mp, LK_RECURSEFAIL, 0);
                    437:                if (error && error != EDEADLK)
1.103     sommerfe  438:                        return error;
                    439:        }
1.29      cgd       440:
1.113     fvdl      441:        /*
                    442:         * We must choose whether to allocate a new vnode or recycle an
                    443:         * existing one. The criterion for allocating a new one is that
                    444:         * the total number of vnodes is less than the number desired or
                    445:         * there are no vnodes on either free list. Generally we only
                    446:         * want to recycle vnodes that have no buffers associated with
                    447:         * them, so we look first on the vnode_free_list. If it is empty,
                    448:         * we next consider vnodes with referencing buffers on the
                    449:         * vnode_hold_list. The toggle ensures that half the time we
                    450:         * will use a buffer from the vnode_hold_list, and half the time
                    451:         * we will allocate a new one unless the list has grown to twice
                    452:         * the desired size. We are reticent to recycle vnodes from the
                    453:         * vnode_hold_list because we will lose the identity of all its
                    454:         * referencing buffers.
                    455:         */
1.142     chs       456:
1.153     thorpej   457:        vp = NULL;
                    458:
                    459:        simple_lock(&vnode_free_list_slock);
                    460:
1.113     fvdl      461:        toggle ^= 1;
                    462:        if (numvnodes > 2 * desiredvnodes)
                    463:                toggle = 0;
                    464:
1.153     thorpej   465:        tryalloc = numvnodes < desiredvnodes ||
1.159     enami     466:            (TAILQ_FIRST(&vnode_free_list) == NULL &&
                    467:             (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153     thorpej   468:
                    469:        if (tryalloc &&
                    470:            (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.80      fvdl      471:                simple_unlock(&vnode_free_list_slock);
1.142     chs       472:                memset(vp, 0, sizeof(*vp));
1.158     chs       473:                simple_lock_init(&vp->v_interlock);
                    474:                uobj = &vp->v_uobj;
                    475:                uobj->pgops = &uvm_vnodeops;
                    476:                uobj->uo_npages = 0;
                    477:                TAILQ_INIT(&uobj->memq);
1.29      cgd       478:                numvnodes++;
                    479:        } else {
1.159     enami     480:                if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL)
                    481:                        vp = TAILQ_FIRST(listhd = &vnode_hold_list);
                    482:                for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) {
1.105     wrstuden  483:                        if (simple_lock_try(&vp->v_interlock)) {
1.158     chs       484:                                if ((vp->v_flag & VLAYER) == 0) {
1.105     wrstuden  485:                                        break;
1.158     chs       486:                                }
1.166     chs       487:                                if (VOP_ISLOCKED(vp) == 0)
                    488:                                        break;
                    489:                                else
                    490:                                        simple_unlock(&vp->v_interlock);
1.105     wrstuden  491:                        }
1.80      fvdl      492:                }
                    493:                /*
                    494:                 * Unless this is a bad time of the month, at most
                    495:                 * the first NCPUS items on the free list are
                    496:                 * locked, so this is close enough to being empty.
                    497:                 */
                    498:                if (vp == NULLVP) {
                    499:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl      500:                        if (mp && error != EDEADLK)
                    501:                                vfs_unbusy(mp);
1.153     thorpej   502:                        if (tryalloc) {
                    503:                                printf("WARNING: unable to allocate new "
                    504:                                    "vnode, retrying...\n");
                    505:                                (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
                    506:                                goto try_again;
                    507:                        }
1.132     jdolecek  508:                        tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29      cgd       509:                        *vpp = 0;
                    510:                        return (ENFILE);
                    511:                }
1.80      fvdl      512:                if (vp->v_usecount)
1.125     chs       513:                        panic("free vnode isn't, vp %p", vp);
1.113     fvdl      514:                TAILQ_REMOVE(listhd, vp, v_freelist);
1.30      mycroft   515:                /* see comment on why 0xdeadb is set at end of vgone (below) */
1.29      cgd       516:                vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
1.80      fvdl      517:                simple_unlock(&vnode_free_list_slock);
1.30      mycroft   518:                vp->v_lease = NULL;
1.158     chs       519:
1.29      cgd       520:                if (vp->v_type != VBAD)
1.80      fvdl      521:                        vgonel(vp, p);
                    522:                else
                    523:                        simple_unlock(&vp->v_interlock);
1.30      mycroft   524: #ifdef DIAGNOSTIC
1.158     chs       525:                if (vp->v_data || vp->v_uobj.uo_npages ||
                    526:                    TAILQ_FIRST(&vp->v_uobj.memq))
1.125     chs       527:                        panic("cleaned vnode isn't, vp %p", vp);
1.30      mycroft   528:                if (vp->v_numoutput)
1.125     chs       529:                        panic("clean vnode has pending I/O's, vp %p", vp);
1.30      mycroft   530: #endif
1.166     chs       531:                KASSERT((vp->v_flag & VONWORKLST) == 0);
1.29      cgd       532:                vp->v_flag = 0;
1.158     chs       533:                vp->v_socket = NULL;
1.29      cgd       534:        }
                    535:        vp->v_type = VNON;
1.104     wrstuden  536:        vp->v_vnlock = &vp->v_lock;
                    537:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd       538:        cache_purge(vp);
                    539:        vp->v_tag = tag;
                    540:        vp->v_op = vops;
                    541:        insmntque(vp, mp);
1.30      mycroft   542:        *vpp = vp;
1.29      cgd       543:        vp->v_usecount = 1;
1.30      mycroft   544:        vp->v_data = 0;
1.158     chs       545:        simple_lock_init(&vp->v_uobj.vmobjlock);
1.142     chs       546:
                    547:        /*
                    548:         * initialize uvm_object within vnode.
                    549:         */
                    550:
1.158     chs       551:        uobj = &vp->v_uobj;
                    552:        KASSERT(uobj->pgops == &uvm_vnodeops);
                    553:        KASSERT(uobj->uo_npages == 0);
                    554:        KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
                    555:        vp->v_size = VSIZENOTSET;
1.142     chs       556:
1.113     fvdl      557:        if (mp && error != EDEADLK)
                    558:                vfs_unbusy(mp);
1.29      cgd       559:        return (0);
1.130     fvdl      560: }
                    561:
                    562: /*
                    563:  * This is really just the reverse of getnewvnode(). Needed for
                    564:  * VFS_VGET functions who may need to push back a vnode in case
                    565:  * of a locking race.
                    566:  */
                    567: void
1.131     fvdl      568: ungetnewvnode(vp)
1.130     fvdl      569:        struct vnode *vp;
                    570: {
                    571: #ifdef DIAGNOSTIC
                    572:        if (vp->v_usecount != 1)
1.131     fvdl      573:                panic("ungetnewvnode: busy vnode");
1.130     fvdl      574: #endif
                    575:        vp->v_usecount--;
                    576:        insmntque(vp, NULL);
                    577:        vp->v_type = VBAD;
                    578:
                    579:        simple_lock(&vp->v_interlock);
                    580:        /*
                    581:         * Insert at head of LRU list
                    582:         */
                    583:        simple_lock(&vnode_free_list_slock);
                    584:        if (vp->v_holdcnt > 0)
                    585:                TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
                    586:        else
                    587:                TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                    588:        simple_unlock(&vnode_free_list_slock);
                    589:        simple_unlock(&vp->v_interlock);
1.29      cgd       590: }
                    591:
                    592: /*
                    593:  * Move a vnode from one mount queue to another.
                    594:  */
1.50      christos  595: void
1.29      cgd       596: insmntque(vp, mp)
1.123     augustss  597:        struct vnode *vp;
                    598:        struct mount *mp;
1.29      cgd       599: {
                    600:
1.103     sommerfe  601: #ifdef DIAGNOSTIC
                    602:        if ((mp != NULL) &&
1.113     fvdl      603:            (mp->mnt_flag & MNT_UNMOUNT) &&
                    604:            !(mp->mnt_flag & MNT_SOFTDEP) &&
                    605:            vp->v_tag != VT_VFS) {
1.103     sommerfe  606:                panic("insmntque into dying filesystem");
                    607:        }
                    608: #endif
                    609:
1.80      fvdl      610:        simple_lock(&mntvnode_slock);
1.29      cgd       611:        /*
                    612:         * Delete from old mount point vnode list, if on one.
                    613:         */
                    614:        if (vp->v_mount != NULL)
                    615:                LIST_REMOVE(vp, v_mntvnodes);
                    616:        /*
                    617:         * Insert into list of vnodes for the new mount point, if available.
                    618:         */
1.80      fvdl      619:        if ((vp->v_mount = mp) != NULL)
                    620:                LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
                    621:        simple_unlock(&mntvnode_slock);
1.29      cgd       622: }
                    623:
                    624: /*
                    625:  * Update outstanding I/O count and do wakeup if requested.
                    626:  */
1.50      christos  627: void
1.29      cgd       628: vwakeup(bp)
1.123     augustss  629:        struct buf *bp;
1.29      cgd       630: {
1.123     augustss  631:        struct vnode *vp;
1.29      cgd       632:
1.50      christos  633:        if ((vp = bp->b_vp) != NULL) {
1.30      mycroft   634:                if (--vp->v_numoutput < 0)
1.125     chs       635:                        panic("vwakeup: neg numoutput, vp %p", vp);
1.29      cgd       636:                if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
                    637:                        vp->v_flag &= ~VBWAIT;
                    638:                        wakeup((caddr_t)&vp->v_numoutput);
                    639:                }
                    640:        }
                    641: }
                    642:
                    643: /*
                    644:  * Flush out and invalidate all buffers associated with a vnode.
1.126     mycroft   645:  * Called with the underlying vnode locked, which should prevent new dirty
                    646:  * buffers from being queued.
1.29      cgd       647:  */
1.30      mycroft   648: int
                    649: vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1.123     augustss  650:        struct vnode *vp;
1.30      mycroft   651:        int flags;
                    652:        struct ucred *cred;
                    653:        struct proc *p;
                    654:        int slpflag, slptimeo;
1.29      cgd       655: {
1.126     mycroft   656:        struct buf *bp, *nbp;
1.158     chs       657:        int s, error;
1.166     chs       658:        int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
1.142     chs       659:                (flags & V_SAVE ? PGO_CLEANIT : 0);
                    660:
                    661:        /* XXXUBC this doesn't look at flags or slp* */
1.166     chs       662:        simple_lock(&vp->v_interlock);
                    663:        error = VOP_PUTPAGES(vp, 0, 0, flushflags);
                    664:        if (error) {
                    665:                return error;
1.142     chs       666:        }
1.166     chs       667:
1.30      mycroft   668:        if (flags & V_SAVE) {
1.140     fvdl      669:                error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
1.126     mycroft   670:                if (error)
1.122     fvdl      671:                        return (error);
1.126     mycroft   672: #ifdef DIAGNOSTIC
1.122     fvdl      673:                s = splbio();
1.126     mycroft   674:                if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
1.125     chs       675:                        panic("vinvalbuf: dirty bufs, vp %p", vp);
1.113     fvdl      676:                splx(s);
1.126     mycroft   677: #endif
1.30      mycroft   678:        }
1.113     fvdl      679:
1.115     fvdl      680:        s = splbio();
                    681:
1.126     mycroft   682: restart:
                    683:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    684:                nbp = LIST_NEXT(bp, b_vnbufs);
                    685:                if (bp->b_flags & B_BUSY) {
                    686:                        bp->b_flags |= B_WANTED;
                    687:                        error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
                    688:                            "vinvalbuf", slptimeo);
                    689:                        if (error) {
                    690:                                splx(s);
                    691:                                return (error);
                    692:                        }
                    693:                        goto restart;
1.113     fvdl      694:                }
1.126     mycroft   695:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    696:                brelse(bp);
                    697:        }
1.30      mycroft   698:
1.126     mycroft   699:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    700:                nbp = LIST_NEXT(bp, b_vnbufs);
                    701:                if (bp->b_flags & B_BUSY) {
                    702:                        bp->b_flags |= B_WANTED;
                    703:                        error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
                    704:                            "vinvalbuf", slptimeo);
                    705:                        if (error) {
                    706:                                splx(s);
                    707:                                return (error);
1.29      cgd       708:                        }
1.126     mycroft   709:                        goto restart;
                    710:                }
                    711:                /*
                    712:                 * XXX Since there are no node locks for NFS, I believe
                    713:                 * there is a slight chance that a delayed write will
                    714:                 * occur while sleeping just above, so check for it.
                    715:                 */
                    716:                if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
                    717: #ifdef DEBUG
                    718:                        printf("buffer still DELWRI\n");
                    719: #endif
1.63      mycroft   720:                        bp->b_flags |= B_BUSY | B_VFLUSH;
1.126     mycroft   721:                        VOP_BWRITE(bp);
                    722:                        goto restart;
                    723:                }
                    724:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    725:                brelse(bp);
                    726:        }
                    727:
                    728: #ifdef DIAGNOSTIC
                    729:        if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
                    730:                panic("vinvalbuf: flush failed, vp %p", vp);
1.113     fvdl      731: #endif
1.126     mycroft   732:
                    733:        splx(s);
                    734:
                    735:        return (0);
                    736: }
                    737:
                    738: /*
                    739:  * Destroy any in core blocks past the truncation length.
                    740:  * Called with the underlying vnode locked, which should prevent new dirty
                    741:  * buffers from being queued.
                    742:  */
                    743: int
                    744: vtruncbuf(vp, lbn, slpflag, slptimeo)
                    745:        struct vnode *vp;
                    746:        daddr_t lbn;
                    747:        int slpflag, slptimeo;
                    748: {
                    749:        struct buf *bp, *nbp;
1.158     chs       750:        int s, error;
1.166     chs       751:        voff_t off;
                    752:
                    753:        off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
                    754:        simple_lock(&vp->v_interlock);
                    755:        error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
                    756:        if (error) {
                    757:                return error;
                    758:        }
1.126     mycroft   759:
                    760:        s = splbio();
                    761:
                    762: restart:
                    763:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    764:                nbp = LIST_NEXT(bp, b_vnbufs);
                    765:                if (bp->b_lblkno < lbn)
                    766:                        continue;
                    767:                if (bp->b_flags & B_BUSY) {
                    768:                        bp->b_flags |= B_WANTED;
1.142     chs       769:                        error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126     mycroft   770:                            "vtruncbuf", slptimeo);
                    771:                        if (error) {
                    772:                                splx(s);
                    773:                                return (error);
1.29      cgd       774:                        }
1.126     mycroft   775:                        goto restart;
1.29      cgd       776:                }
1.126     mycroft   777:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    778:                brelse(bp);
1.29      cgd       779:        }
1.115     fvdl      780:
1.126     mycroft   781:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    782:                nbp = LIST_NEXT(bp, b_vnbufs);
                    783:                if (bp->b_lblkno < lbn)
                    784:                        continue;
                    785:                if (bp->b_flags & B_BUSY) {
                    786:                        bp->b_flags |= B_WANTED;
1.142     chs       787:                        error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126     mycroft   788:                            "vtruncbuf", slptimeo);
                    789:                        if (error) {
                    790:                                splx(s);
                    791:                                return (error);
                    792:                        }
                    793:                        goto restart;
                    794:                }
                    795:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    796:                brelse(bp);
                    797:        }
1.115     fvdl      798:
                    799:        splx(s);
                    800:
1.30      mycroft   801:        return (0);
                    802: }
                    803:
                    804: void
                    805: vflushbuf(vp, sync)
1.123     augustss  806:        struct vnode *vp;
1.30      mycroft   807:        int sync;
                    808: {
1.123     augustss  809:        struct buf *bp, *nbp;
1.166     chs       810:        int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
1.30      mycroft   811:        int s;
                    812:
1.166     chs       813:        simple_lock(&vp->v_interlock);
                    814:        (void) VOP_PUTPAGES(vp, 0, 0, flags);
1.142     chs       815:
1.30      mycroft   816: loop:
                    817:        s = splbio();
1.126     mycroft   818:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    819:                nbp = LIST_NEXT(bp, b_vnbufs);
1.30      mycroft   820:                if ((bp->b_flags & B_BUSY))
                    821:                        continue;
                    822:                if ((bp->b_flags & B_DELWRI) == 0)
1.125     chs       823:                        panic("vflushbuf: not dirty, bp %p", bp);
1.63      mycroft   824:                bp->b_flags |= B_BUSY | B_VFLUSH;
1.30      mycroft   825:                splx(s);
                    826:                /*
                    827:                 * Wait for I/O associated with indirect blocks to complete,
                    828:                 * since there is no way to quickly wait for them below.
                    829:                 */
                    830:                if (bp->b_vp == vp || sync == 0)
                    831:                        (void) bawrite(bp);
                    832:                else
                    833:                        (void) bwrite(bp);
                    834:                goto loop;
                    835:        }
                    836:        if (sync == 0) {
                    837:                splx(s);
                    838:                return;
                    839:        }
                    840:        while (vp->v_numoutput) {
                    841:                vp->v_flag |= VBWAIT;
                    842:                tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
                    843:        }
                    844:        splx(s);
1.126     mycroft   845:        if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1.30      mycroft   846:                vprint("vflushbuf: dirty", vp);
                    847:                goto loop;
                    848:        }
1.29      cgd       849: }
                    850:
                    851: /*
                    852:  * Associate a buffer with a vnode.
                    853:  */
1.50      christos  854: void
1.29      cgd       855: bgetvp(vp, bp)
1.123     augustss  856:        struct vnode *vp;
                    857:        struct buf *bp;
1.29      cgd       858: {
1.115     fvdl      859:        int s;
1.29      cgd       860:
                    861:        if (bp->b_vp)
1.125     chs       862:                panic("bgetvp: not free, bp %p", bp);
1.29      cgd       863:        VHOLD(vp);
1.115     fvdl      864:        s = splbio();
1.29      cgd       865:        bp->b_vp = vp;
                    866:        if (vp->v_type == VBLK || vp->v_type == VCHR)
                    867:                bp->b_dev = vp->v_rdev;
                    868:        else
                    869:                bp->b_dev = NODEV;
                    870:        /*
                    871:         * Insert onto list for new vnode.
                    872:         */
                    873:        bufinsvn(bp, &vp->v_cleanblkhd);
1.115     fvdl      874:        splx(s);
1.29      cgd       875: }
                    876:
                    877: /*
                    878:  * Disassociate a buffer from a vnode.
                    879:  */
1.50      christos  880: void
1.29      cgd       881: brelvp(bp)
1.123     augustss  882:        struct buf *bp;
1.29      cgd       883: {
                    884:        struct vnode *vp;
1.115     fvdl      885:        int s;
1.29      cgd       886:
1.125     chs       887:        if (bp->b_vp == NULL)
                    888:                panic("brelvp: vp NULL, bp %p", bp);
1.115     fvdl      889:
                    890:        s = splbio();
1.113     fvdl      891:        vp = bp->b_vp;
1.29      cgd       892:        /*
                    893:         * Delete from old vnode list, if on one.
                    894:         */
1.177   ! matt      895:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29      cgd       896:                bufremvn(bp);
1.142     chs       897:
1.158     chs       898:        if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
1.142     chs       899:            LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1.113     fvdl      900:                vp->v_flag &= ~VONWORKLST;
                    901:                LIST_REMOVE(vp, v_synclist);
                    902:        }
1.142     chs       903:
                    904:        bp->b_vp = NULL;
1.29      cgd       905:        HOLDRELE(vp);
1.115     fvdl      906:        splx(s);
1.29      cgd       907: }
                    908:
                    909: /*
                    910:  * Reassign a buffer from one vnode to another.
                    911:  * Used to assign file specific control information
                    912:  * (indirect blocks) to the vnode to which they belong.
1.115     fvdl      913:  *
                    914:  * This function must be called at splbio().
1.29      cgd       915:  */
1.50      christos  916: void
1.29      cgd       917: reassignbuf(bp, newvp)
1.113     fvdl      918:        struct buf *bp;
                    919:        struct vnode *newvp;
1.29      cgd       920: {
1.113     fvdl      921:        struct buflists *listheadp;
                    922:        int delay;
1.29      cgd       923:
                    924:        /*
                    925:         * Delete from old vnode list, if on one.
                    926:         */
1.177   ! matt      927:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29      cgd       928:                bufremvn(bp);
                    929:        /*
                    930:         * If dirty, put on list of dirty buffers;
                    931:         * otherwise insert onto list of clean buffers.
                    932:         */
1.113     fvdl      933:        if ((bp->b_flags & B_DELWRI) == 0) {
                    934:                listheadp = &newvp->v_cleanblkhd;
1.158     chs       935:                if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1.142     chs       936:                    (newvp->v_flag & VONWORKLST) &&
1.113     fvdl      937:                    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
                    938:                        newvp->v_flag &= ~VONWORKLST;
                    939:                        LIST_REMOVE(newvp, v_synclist);
                    940:                }
                    941:        } else {
1.29      cgd       942:                listheadp = &newvp->v_dirtyblkhd;
1.113     fvdl      943:                if ((newvp->v_flag & VONWORKLST) == 0) {
                    944:                        switch (newvp->v_type) {
                    945:                        case VDIR:
                    946:                                delay = dirdelay;
                    947:                                break;
                    948:                        case VBLK:
                    949:                                if (newvp->v_specmountpoint != NULL) {
                    950:                                        delay = metadelay;
                    951:                                        break;
                    952:                                }
                    953:                                /* fall through */
                    954:                        default:
1.118     mycroft   955:                                delay = filedelay;
                    956:                                break;
1.113     fvdl      957:                        }
1.118     mycroft   958:                        if (!newvp->v_mount ||
                    959:                            (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
                    960:                                vn_syncer_add_to_worklist(newvp, delay);
1.113     fvdl      961:                }
                    962:        }
1.29      cgd       963:        bufinsvn(bp, listheadp);
                    964: }
                    965:
                    966: /*
                    967:  * Create a vnode for a block device.
1.59      thorpej   968:  * Used for root filesystem and swap areas.
1.29      cgd       969:  * Also used for memory file system special devices.
                    970:  */
1.50      christos  971: int
1.29      cgd       972: bdevvp(dev, vpp)
                    973:        dev_t dev;
                    974:        struct vnode **vpp;
                    975: {
1.30      mycroft   976:
                    977:        return (getdevvp(dev, vpp, VBLK));
1.29      cgd       978: }
                    979:
                    980: /*
                    981:  * Create a vnode for a character device.
                    982:  * Used for kernfs and some console handling.
                    983:  */
1.50      christos  984: int
1.29      cgd       985: cdevvp(dev, vpp)
                    986:        dev_t dev;
                    987:        struct vnode **vpp;
                    988: {
1.30      mycroft   989:
                    990:        return (getdevvp(dev, vpp, VCHR));
1.29      cgd       991: }
                    992:
                    993: /*
                    994:  * Create a vnode for a device.
                    995:  * Used by bdevvp (block device) for root file system etc.,
                    996:  * and by cdevvp (character device) for console and kernfs.
                    997:  */
1.50      christos  998: int
1.29      cgd       999: getdevvp(dev, vpp, type)
                   1000:        dev_t dev;
                   1001:        struct vnode **vpp;
                   1002:        enum vtype type;
                   1003: {
1.123     augustss 1004:        struct vnode *vp;
1.29      cgd      1005:        struct vnode *nvp;
                   1006:        int error;
                   1007:
1.80      fvdl     1008:        if (dev == NODEV) {
                   1009:                *vpp = NULLVP;
1.29      cgd      1010:                return (0);
1.80      fvdl     1011:        }
1.50      christos 1012:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29      cgd      1013:        if (error) {
                   1014:                *vpp = NULLVP;
                   1015:                return (error);
                   1016:        }
                   1017:        vp = nvp;
                   1018:        vp->v_type = type;
1.50      christos 1019:        if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29      cgd      1020:                vput(vp);
                   1021:                vp = nvp;
                   1022:        }
                   1023:        *vpp = vp;
                   1024:        return (0);
                   1025: }
                   1026:
                   1027: /*
                   1028:  * Check to see if the new vnode represents a special device
                   1029:  * for which we already have a vnode (either because of
                   1030:  * bdevvp() or because of a different vnode representing
                   1031:  * the same block device). If such an alias exists, deallocate
                   1032:  * the existing contents and return the aliased vnode. The
                   1033:  * caller is responsible for filling it with its new contents.
                   1034:  */
                   1035: struct vnode *
                   1036: checkalias(nvp, nvp_rdev, mp)
1.123     augustss 1037:        struct vnode *nvp;
1.29      cgd      1038:        dev_t nvp_rdev;
                   1039:        struct mount *mp;
                   1040: {
1.80      fvdl     1041:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1042:        struct vnode *vp;
1.29      cgd      1043:        struct vnode **vpp;
                   1044:
                   1045:        if (nvp->v_type != VBLK && nvp->v_type != VCHR)
                   1046:                return (NULLVP);
                   1047:
                   1048:        vpp = &speclisth[SPECHASH(nvp_rdev)];
                   1049: loop:
1.80      fvdl     1050:        simple_lock(&spechash_slock);
1.29      cgd      1051:        for (vp = *vpp; vp; vp = vp->v_specnext) {
                   1052:                if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
                   1053:                        continue;
                   1054:                /*
                   1055:                 * Alias, but not in use, so flush it out.
                   1056:                 */
1.80      fvdl     1057:                simple_lock(&vp->v_interlock);
1.29      cgd      1058:                if (vp->v_usecount == 0) {
1.80      fvdl     1059:                        simple_unlock(&spechash_slock);
                   1060:                        vgonel(vp, p);
1.29      cgd      1061:                        goto loop;
                   1062:                }
1.80      fvdl     1063:                if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
                   1064:                        simple_unlock(&spechash_slock);
1.29      cgd      1065:                        goto loop;
1.80      fvdl     1066:                }
1.29      cgd      1067:                break;
                   1068:        }
1.34      cgd      1069:        if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29      cgd      1070:                MALLOC(nvp->v_specinfo, struct specinfo *,
1.150     thorpej  1071:                        sizeof(struct specinfo), M_VNODE, M_NOWAIT);
                   1072:                /* XXX Erg. */
                   1073:                if (nvp->v_specinfo == NULL) {
                   1074:                        simple_unlock(&spechash_slock);
                   1075:                        uvm_wait("checkalias");
                   1076:                        goto loop;
                   1077:                }
                   1078:
1.29      cgd      1079:                nvp->v_rdev = nvp_rdev;
                   1080:                nvp->v_hashchain = vpp;
                   1081:                nvp->v_specnext = *vpp;
1.113     fvdl     1082:                nvp->v_specmountpoint = NULL;
1.80      fvdl     1083:                simple_unlock(&spechash_slock);
1.62      kleink   1084:                nvp->v_speclockf = NULL;
1.29      cgd      1085:                *vpp = nvp;
1.80      fvdl     1086:                if (vp != NULLVP) {
1.29      cgd      1087:                        nvp->v_flag |= VALIASED;
                   1088:                        vp->v_flag |= VALIASED;
                   1089:                        vput(vp);
                   1090:                }
                   1091:                return (NULLVP);
                   1092:        }
1.80      fvdl     1093:        simple_unlock(&spechash_slock);
                   1094:        VOP_UNLOCK(vp, 0);
                   1095:        simple_lock(&vp->v_interlock);
                   1096:        vclean(vp, 0, p);
1.29      cgd      1097:        vp->v_op = nvp->v_op;
                   1098:        vp->v_tag = nvp->v_tag;
1.104     wrstuden 1099:        vp->v_vnlock = &vp->v_lock;
                   1100:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd      1101:        nvp->v_type = VNON;
                   1102:        insmntque(vp, mp);
                   1103:        return (vp);
                   1104: }
                   1105:
                   1106: /*
                   1107:  * Grab a particular vnode from the free list, increment its
1.83      fvdl     1108:  * reference count and lock it. If the vnode lock bit is set the
                   1109:  * vnode is being eliminated in vgone. In that case, we can not
                   1110:  * grab the vnode, so the process is awakened when the transition is
                   1111:  * completed, and an error returned to indicate that the vnode is no
                   1112:  * longer usable (possibly having been changed to a new file system type).
1.29      cgd      1113:  */
1.30      mycroft  1114: int
1.80      fvdl     1115: vget(vp, flags)
                   1116:        struct vnode *vp;
                   1117:        int flags;
1.29      cgd      1118: {
1.175     perseant 1119:        int error;
1.29      cgd      1120:
1.30      mycroft  1121:        /*
                   1122:         * If the vnode is in the process of being cleaned out for
                   1123:         * another use, we wait for the cleaning to finish and then
1.80      fvdl     1124:         * return failure. Cleaning is determined by checking that
                   1125:         * the VXLOCK flag is set.
                   1126:         */
1.142     chs      1127:
1.80      fvdl     1128:        if ((flags & LK_INTERLOCK) == 0)
                   1129:                simple_lock(&vp->v_interlock);
                   1130:        if (vp->v_flag & VXLOCK) {
1.142     chs      1131:                if (flags & LK_NOWAIT) {
1.143     sommerfe 1132:                        simple_unlock(&vp->v_interlock);
1.142     chs      1133:                        return EBUSY;
                   1134:                }
1.29      cgd      1135:                vp->v_flag |= VXWANT;
1.158     chs      1136:                ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1.80      fvdl     1137:                return (ENOENT);
1.29      cgd      1138:        }
1.80      fvdl     1139:        if (vp->v_usecount == 0) {
                   1140:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1141:                if (vp->v_holdcnt > 0)
                   1142:                        TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1143:                else
                   1144:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1145:                simple_unlock(&vnode_free_list_slock);
                   1146:        }
1.29      cgd      1147:        vp->v_usecount++;
1.112     mycroft  1148: #ifdef DIAGNOSTIC
                   1149:        if (vp->v_usecount == 0) {
                   1150:                vprint("vget", vp);
1.125     chs      1151:                panic("vget: usecount overflow, vp %p", vp);
1.112     mycroft  1152:        }
                   1153: #endif
1.80      fvdl     1154:        if (flags & LK_TYPE_MASK) {
1.113     fvdl     1155:                if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
                   1156:                        /*
                   1157:                         * must expand vrele here because we do not want
                   1158:                         * to call VOP_INACTIVE if the reference count
                   1159:                         * drops back to zero since it was never really
                   1160:                         * active. We must remove it from the free list
                   1161:                         * before sleeping so that multiple processes do
                   1162:                         * not try to recycle it.
                   1163:                         */
                   1164:                        simple_lock(&vp->v_interlock);
                   1165:                        vp->v_usecount--;
                   1166:                        if (vp->v_usecount > 0) {
                   1167:                                simple_unlock(&vp->v_interlock);
                   1168:                                return (error);
                   1169:                        }
                   1170:                        /*
                   1171:                         * insert at tail of LRU list
                   1172:                         */
                   1173:                        simple_lock(&vnode_free_list_slock);
                   1174:                        if (vp->v_holdcnt > 0)
                   1175:                                TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
                   1176:                                    v_freelist);
                   1177:                        else
                   1178:                                TAILQ_INSERT_TAIL(&vnode_free_list, vp,
                   1179:                                    v_freelist);
                   1180:                        simple_unlock(&vnode_free_list_slock);
                   1181:                        simple_unlock(&vp->v_interlock);
                   1182:                }
1.80      fvdl     1183:                return (error);
                   1184:        }
                   1185:        simple_unlock(&vp->v_interlock);
1.29      cgd      1186:        return (0);
                   1187: }
                   1188:
                   1189: /*
                   1190:  * vput(), just unlock and vrele()
                   1191:  */
                   1192: void
                   1193: vput(vp)
1.80      fvdl     1194:        struct vnode *vp;
1.29      cgd      1195: {
1.80      fvdl     1196:        struct proc *p = curproc;       /* XXX */
1.30      mycroft  1197:
1.111     mycroft  1198: #ifdef DIAGNOSTIC
1.80      fvdl     1199:        if (vp == NULL)
                   1200:                panic("vput: null vp");
                   1201: #endif
                   1202:        simple_lock(&vp->v_interlock);
                   1203:        vp->v_usecount--;
                   1204:        if (vp->v_usecount > 0) {
                   1205:                simple_unlock(&vp->v_interlock);
                   1206:                VOP_UNLOCK(vp, 0);
                   1207:                return;
                   1208:        }
                   1209: #ifdef DIAGNOSTIC
                   1210:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1211:                vprint("vput: bad ref count", vp);
                   1212:                panic("vput: ref cnt");
                   1213:        }
                   1214: #endif
                   1215:        /*
1.87      pk       1216:         * Insert at tail of LRU list.
1.80      fvdl     1217:         */
                   1218:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1219:        if (vp->v_holdcnt > 0)
                   1220:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1221:        else
                   1222:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1223:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej  1224:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1225:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1226:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1227:        }
1.161     thorpej  1228:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80      fvdl     1229:        simple_unlock(&vp->v_interlock);
                   1230:        VOP_INACTIVE(vp, p);
1.29      cgd      1231: }
                   1232:
                   1233: /*
                   1234:  * Vnode release.
                   1235:  * If count drops to zero, call inactive routine and return to freelist.
                   1236:  */
                   1237: void
                   1238: vrele(vp)
1.80      fvdl     1239:        struct vnode *vp;
1.29      cgd      1240: {
1.80      fvdl     1241:        struct proc *p = curproc;       /* XXX */
1.29      cgd      1242:
                   1243: #ifdef DIAGNOSTIC
                   1244:        if (vp == NULL)
                   1245:                panic("vrele: null vp");
                   1246: #endif
1.80      fvdl     1247:        simple_lock(&vp->v_interlock);
1.29      cgd      1248:        vp->v_usecount--;
1.80      fvdl     1249:        if (vp->v_usecount > 0) {
                   1250:                simple_unlock(&vp->v_interlock);
1.29      cgd      1251:                return;
1.80      fvdl     1252:        }
1.29      cgd      1253: #ifdef DIAGNOSTIC
1.80      fvdl     1254:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29      cgd      1255:                vprint("vrele: bad ref count", vp);
1.142     chs      1256:                panic("vrele: ref cnt vp %p", vp);
1.29      cgd      1257:        }
                   1258: #endif
1.30      mycroft  1259:        /*
1.87      pk       1260:         * Insert at tail of LRU list.
1.30      mycroft  1261:         */
1.80      fvdl     1262:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1263:        if (vp->v_holdcnt > 0)
                   1264:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1265:        else
                   1266:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1267:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej  1268:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1269:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1270:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1271:        }
1.161     thorpej  1272:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80      fvdl     1273:        if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
                   1274:                VOP_INACTIVE(vp, p);
1.29      cgd      1275: }
                   1276:
1.80      fvdl     1277: #ifdef DIAGNOSTIC
1.29      cgd      1278: /*
                   1279:  * Page or buffer structure gets a reference.
                   1280:  */
1.30      mycroft  1281: void
1.29      cgd      1282: vhold(vp)
1.123     augustss 1283:        struct vnode *vp;
1.29      cgd      1284: {
                   1285:
1.113     fvdl     1286:        /*
                   1287:         * If it is on the freelist and the hold count is currently
                   1288:         * zero, move it to the hold list. The test of the back
                   1289:         * pointer and the use reference count of zero is because
                   1290:         * it will be removed from a free list by getnewvnode,
                   1291:         * but will not have its reference count incremented until
                   1292:         * after calling vgone. If the reference count were
                   1293:         * incremented first, vgone would (incorrectly) try to
                   1294:         * close the previous instance of the underlying object.
                   1295:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1296:         * getnewvnode after removing it from a freelist to ensure
                   1297:         * that we do not try to move it here.
                   1298:         */
                   1299:        simple_lock(&vp->v_interlock);
                   1300:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1301:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1302:                simple_lock(&vnode_free_list_slock);
                   1303:                TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1304:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1305:                simple_unlock(&vnode_free_list_slock);
                   1306:        }
1.29      cgd      1307:        vp->v_holdcnt++;
1.80      fvdl     1308:        simple_unlock(&vp->v_interlock);
1.29      cgd      1309: }
                   1310:
                   1311: /*
                   1312:  * Page or buffer structure frees a reference.
                   1313:  */
1.30      mycroft  1314: void
1.29      cgd      1315: holdrele(vp)
1.123     augustss 1316:        struct vnode *vp;
1.29      cgd      1317: {
                   1318:
1.80      fvdl     1319:        simple_lock(&vp->v_interlock);
1.29      cgd      1320:        if (vp->v_holdcnt <= 0)
1.125     chs      1321:                panic("holdrele: holdcnt vp %p", vp);
1.29      cgd      1322:        vp->v_holdcnt--;
1.142     chs      1323:
1.113     fvdl     1324:        /*
                   1325:         * If it is on the holdlist and the hold count drops to
                   1326:         * zero, move it to the free list. The test of the back
                   1327:         * pointer and the use reference count of zero is because
                   1328:         * it will be removed from a free list by getnewvnode,
                   1329:         * but will not have its reference count incremented until
                   1330:         * after calling vgone. If the reference count were
                   1331:         * incremented first, vgone would (incorrectly) try to
                   1332:         * close the previous instance of the underlying object.
                   1333:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1334:         * getnewvnode after removing it from a freelist to ensure
                   1335:         * that we do not try to move it here.
                   1336:         */
1.142     chs      1337:
1.113     fvdl     1338:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1339:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1340:                simple_lock(&vnode_free_list_slock);
                   1341:                TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1342:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1343:                simple_unlock(&vnode_free_list_slock);
                   1344:        }
1.81      ross     1345:        simple_unlock(&vp->v_interlock);
                   1346: }
                   1347:
                   1348: /*
                   1349:  * Vnode reference.
                   1350:  */
                   1351: void
                   1352: vref(vp)
                   1353:        struct vnode *vp;
                   1354: {
                   1355:
                   1356:        simple_lock(&vp->v_interlock);
                   1357:        if (vp->v_usecount <= 0)
1.125     chs      1358:                panic("vref used where vget required, vp %p", vp);
1.81      ross     1359:        vp->v_usecount++;
1.112     mycroft  1360: #ifdef DIAGNOSTIC
                   1361:        if (vp->v_usecount == 0) {
                   1362:                vprint("vref", vp);
1.125     chs      1363:                panic("vref: usecount overflow, vp %p", vp);
1.112     mycroft  1364:        }
                   1365: #endif
1.80      fvdl     1366:        simple_unlock(&vp->v_interlock);
1.29      cgd      1367: }
1.80      fvdl     1368: #endif /* DIAGNOSTIC */
1.29      cgd      1369:
                   1370: /*
                   1371:  * Remove any vnodes in the vnode table belonging to mount point mp.
                   1372:  *
                   1373:  * If MNT_NOFORCE is specified, there should not be any active ones,
                   1374:  * return error if any are found (nb: this is a user error, not a
                   1375:  * system error). If MNT_FORCE is specified, detach any active vnodes
                   1376:  * that are found.
                   1377:  */
1.30      mycroft  1378: #ifdef DEBUG
                   1379: int busyprt = 0;       /* print out busy vnodes */
                   1380: struct ctldebug debug1 = { "busyprt", &busyprt };
                   1381: #endif
1.29      cgd      1382:
1.50      christos 1383: int
1.29      cgd      1384: vflush(mp, skipvp, flags)
                   1385:        struct mount *mp;
                   1386:        struct vnode *skipvp;
                   1387:        int flags;
                   1388: {
1.80      fvdl     1389:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1390:        struct vnode *vp, *nvp;
1.29      cgd      1391:        int busy = 0;
                   1392:
1.80      fvdl     1393:        simple_lock(&mntvnode_slock);
1.29      cgd      1394: loop:
1.177   ! matt     1395:        for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1.29      cgd      1396:                if (vp->v_mount != mp)
                   1397:                        goto loop;
1.177   ! matt     1398:                nvp = LIST_NEXT(vp, v_mntvnodes);
1.29      cgd      1399:                /*
                   1400:                 * Skip over a selected vnode.
                   1401:                 */
                   1402:                if (vp == skipvp)
                   1403:                        continue;
1.80      fvdl     1404:                simple_lock(&vp->v_interlock);
1.29      cgd      1405:                /*
                   1406:                 * Skip over a vnodes marked VSYSTEM.
                   1407:                 */
1.80      fvdl     1408:                if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
                   1409:                        simple_unlock(&vp->v_interlock);
1.29      cgd      1410:                        continue;
1.80      fvdl     1411:                }
1.29      cgd      1412:                /*
1.30      mycroft  1413:                 * If WRITECLOSE is set, only flush out regular file
                   1414:                 * vnodes open for writing.
                   1415:                 */
                   1416:                if ((flags & WRITECLOSE) &&
1.92      thorpej  1417:                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
                   1418:                        simple_unlock(&vp->v_interlock);
1.30      mycroft  1419:                        continue;
1.92      thorpej  1420:                }
1.30      mycroft  1421:                /*
1.29      cgd      1422:                 * With v_usecount == 0, all we need to do is clear
                   1423:                 * out the vnode data structures and we are done.
                   1424:                 */
                   1425:                if (vp->v_usecount == 0) {
1.80      fvdl     1426:                        simple_unlock(&mntvnode_slock);
                   1427:                        vgonel(vp, p);
                   1428:                        simple_lock(&mntvnode_slock);
1.29      cgd      1429:                        continue;
                   1430:                }
                   1431:                /*
1.30      mycroft  1432:                 * If FORCECLOSE is set, forcibly close the vnode.
1.29      cgd      1433:                 * For block or character devices, revert to an
                   1434:                 * anonymous device. For all other files, just kill them.
                   1435:                 */
                   1436:                if (flags & FORCECLOSE) {
1.80      fvdl     1437:                        simple_unlock(&mntvnode_slock);
1.29      cgd      1438:                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.80      fvdl     1439:                                vgonel(vp, p);
1.29      cgd      1440:                        } else {
1.80      fvdl     1441:                                vclean(vp, 0, p);
1.30      mycroft  1442:                                vp->v_op = spec_vnodeop_p;
1.29      cgd      1443:                                insmntque(vp, (struct mount *)0);
                   1444:                        }
1.80      fvdl     1445:                        simple_lock(&mntvnode_slock);
1.29      cgd      1446:                        continue;
                   1447:                }
1.30      mycroft  1448: #ifdef DEBUG
1.29      cgd      1449:                if (busyprt)
                   1450:                        vprint("vflush: busy vnode", vp);
1.30      mycroft  1451: #endif
1.80      fvdl     1452:                simple_unlock(&vp->v_interlock);
1.29      cgd      1453:                busy++;
                   1454:        }
1.80      fvdl     1455:        simple_unlock(&mntvnode_slock);
1.29      cgd      1456:        if (busy)
                   1457:                return (EBUSY);
                   1458:        return (0);
                   1459: }
                   1460:
                   1461: /*
                   1462:  * Disassociate the underlying file system from a vnode.
                   1463:  */
                   1464: void
1.80      fvdl     1465: vclean(vp, flags, p)
1.123     augustss 1466:        struct vnode *vp;
1.29      cgd      1467:        int flags;
1.80      fvdl     1468:        struct proc *p;
1.29      cgd      1469: {
1.175     perseant 1470:        int active;
1.29      cgd      1471:
1.166     chs      1472:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1473:
1.29      cgd      1474:        /*
                   1475:         * Check to see if the vnode is in use.
                   1476:         * If so we have to reference it before we clean it out
                   1477:         * so that its count cannot fall to zero and generate a
                   1478:         * race against ourselves to recycle it.
                   1479:         */
1.166     chs      1480:
1.112     mycroft  1481:        if ((active = vp->v_usecount) != 0) {
1.87      pk       1482:                vp->v_usecount++;
1.112     mycroft  1483: #ifdef DIAGNOSTIC
                   1484:                if (vp->v_usecount == 0) {
                   1485:                        vprint("vclean", vp);
                   1486:                        panic("vclean: usecount overflow");
                   1487:                }
                   1488: #endif
                   1489:        }
1.87      pk       1490:
1.29      cgd      1491:        /*
                   1492:         * Prevent the vnode from being recycled or
                   1493:         * brought into use while we clean it out.
                   1494:         */
                   1495:        if (vp->v_flag & VXLOCK)
1.125     chs      1496:                panic("vclean: deadlock, vp %p", vp);
1.29      cgd      1497:        vp->v_flag |= VXLOCK;
1.161     thorpej  1498:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1499:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1500:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1501:        }
1.161     thorpej  1502:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.142     chs      1503:
1.29      cgd      1504:        /*
1.80      fvdl     1505:         * Even if the count is zero, the VOP_INACTIVE routine may still
                   1506:         * have the object locked while it cleans it out. The VOP_LOCK
                   1507:         * ensures that the VOP_INACTIVE routine is done with its work.
                   1508:         * For active vnodes, it ensures that no other activity can
                   1509:         * occur while the underlying object is being cleaned out.
                   1510:         */
                   1511:        VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
                   1512:
1.98      wrstuden 1513:        /*
1.142     chs      1514:         * Clean out any cached data associated with the vnode.
1.29      cgd      1515:         */
1.166     chs      1516:        if (flags & DOCLOSE) {
1.80      fvdl     1517:                vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1.166     chs      1518:                KASSERT((vp->v_flag & VONWORKLST) == 0);
                   1519:        }
                   1520:        LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1.80      fvdl     1521:
1.29      cgd      1522:        /*
1.30      mycroft  1523:         * If purging an active vnode, it must be closed and
1.80      fvdl     1524:         * deactivated before being reclaimed. Note that the
                   1525:         * VOP_INACTIVE will unlock the vnode.
1.29      cgd      1526:         */
                   1527:        if (active) {
                   1528:                if (flags & DOCLOSE)
1.86      pk       1529:                        VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1.80      fvdl     1530:                VOP_INACTIVE(vp, p);
                   1531:        } else {
                   1532:                /*
                   1533:                 * Any other processes trying to obtain this lock must first
                   1534:                 * wait for VXLOCK to clear, then call the new lock operation.
                   1535:                 */
                   1536:                VOP_UNLOCK(vp, 0);
1.29      cgd      1537:        }
                   1538:        /*
                   1539:         * Reclaim the vnode.
                   1540:         */
1.80      fvdl     1541:        if (VOP_RECLAIM(vp, p))
1.125     chs      1542:                panic("vclean: cannot reclaim, vp %p", vp);
1.87      pk       1543:        if (active) {
                   1544:                /*
                   1545:                 * Inline copy of vrele() since VOP_INACTIVE
                   1546:                 * has already been called.
                   1547:                 */
                   1548:                simple_lock(&vp->v_interlock);
                   1549:                if (--vp->v_usecount <= 0) {
                   1550: #ifdef DIAGNOSTIC
                   1551:                        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1552:                                vprint("vclean: bad ref count", vp);
                   1553:                                panic("vclean: ref cnt");
                   1554:                        }
                   1555: #endif
                   1556:                        /*
                   1557:                         * Insert at tail of LRU list.
                   1558:                         */
1.142     chs      1559:
1.113     fvdl     1560:                        simple_unlock(&vp->v_interlock);
1.87      pk       1561:                        simple_lock(&vnode_free_list_slock);
1.104     wrstuden 1562: #ifdef DIAGNOSTIC
1.113     fvdl     1563:                        if (vp->v_holdcnt > 0)
1.125     chs      1564:                                panic("vclean: not clean, vp %p", vp);
1.104     wrstuden 1565: #endif
1.87      pk       1566:                        TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1567:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl     1568:                } else
                   1569:                        simple_unlock(&vp->v_interlock);
1.87      pk       1570:        }
1.30      mycroft  1571:
1.169     chs      1572:        KASSERT(vp->v_uobj.uo_npages == 0);
1.80      fvdl     1573:        cache_purge(vp);
                   1574:
1.29      cgd      1575:        /*
1.30      mycroft  1576:         * Done with purge, notify sleepers of the grim news.
1.29      cgd      1577:         */
1.30      mycroft  1578:        vp->v_op = dead_vnodeop_p;
                   1579:        vp->v_tag = VT_NON;
1.139     enami    1580:        simple_lock(&vp->v_interlock);
1.29      cgd      1581:        vp->v_flag &= ~VXLOCK;
                   1582:        if (vp->v_flag & VXWANT) {
                   1583:                vp->v_flag &= ~VXWANT;
1.139     enami    1584:                simple_unlock(&vp->v_interlock);
1.29      cgd      1585:                wakeup((caddr_t)vp);
1.139     enami    1586:        } else
                   1587:                simple_unlock(&vp->v_interlock);
1.29      cgd      1588: }
                   1589:
                   1590: /*
1.80      fvdl     1591:  * Recycle an unused vnode to the front of the free list.
                   1592:  * Release the passed interlock if the vnode will be recycled.
1.29      cgd      1593:  */
1.80      fvdl     1594: int
                   1595: vrecycle(vp, inter_lkp, p)
                   1596:        struct vnode *vp;
                   1597:        struct simplelock *inter_lkp;
                   1598:        struct proc *p;
                   1599: {
                   1600:
                   1601:        simple_lock(&vp->v_interlock);
                   1602:        if (vp->v_usecount == 0) {
                   1603:                if (inter_lkp)
                   1604:                        simple_unlock(inter_lkp);
                   1605:                vgonel(vp, p);
                   1606:                return (1);
1.29      cgd      1607:        }
1.80      fvdl     1608:        simple_unlock(&vp->v_interlock);
                   1609:        return (0);
1.29      cgd      1610: }
                   1611:
                   1612: /*
                   1613:  * Eliminate all activity associated with a vnode
                   1614:  * in preparation for reuse.
                   1615:  */
                   1616: void
                   1617: vgone(vp)
1.80      fvdl     1618:        struct vnode *vp;
                   1619: {
                   1620:        struct proc *p = curproc;       /* XXX */
                   1621:
                   1622:        simple_lock(&vp->v_interlock);
                   1623:        vgonel(vp, p);
                   1624: }
                   1625:
                   1626: /*
                   1627:  * vgone, with the vp interlock held.
                   1628:  */
                   1629: void
                   1630: vgonel(vp, p)
1.123     augustss 1631:        struct vnode *vp;
1.80      fvdl     1632:        struct proc *p;
1.29      cgd      1633: {
1.80      fvdl     1634:        struct vnode *vq;
1.29      cgd      1635:        struct vnode *vx;
                   1636:
1.166     chs      1637:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1638:
1.29      cgd      1639:        /*
                   1640:         * If a vgone (or vclean) is already in progress,
                   1641:         * wait until it is done and return.
                   1642:         */
1.166     chs      1643:
1.29      cgd      1644:        if (vp->v_flag & VXLOCK) {
                   1645:                vp->v_flag |= VXWANT;
1.166     chs      1646:                ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1.29      cgd      1647:                return;
                   1648:        }
1.166     chs      1649:
1.29      cgd      1650:        /*
                   1651:         * Clean out the filesystem specific data.
                   1652:         */
1.166     chs      1653:
1.80      fvdl     1654:        vclean(vp, DOCLOSE, p);
1.166     chs      1655:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                   1656:
1.29      cgd      1657:        /*
                   1658:         * Delete from old mount point vnode list, if on one.
                   1659:         */
1.166     chs      1660:
1.80      fvdl     1661:        if (vp->v_mount != NULL)
                   1662:                insmntque(vp, (struct mount *)0);
1.166     chs      1663:
1.29      cgd      1664:        /*
                   1665:         * If special device, remove it from special device alias list.
1.80      fvdl     1666:         * if it is on one.
1.29      cgd      1667:         */
1.166     chs      1668:
1.80      fvdl     1669:        if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
                   1670:                simple_lock(&spechash_slock);
1.110     wrstuden 1671:                if (vp->v_hashchain != NULL) {
                   1672:                        if (*vp->v_hashchain == vp) {
                   1673:                                *vp->v_hashchain = vp->v_specnext;
                   1674:                        } else {
                   1675:                                for (vq = *vp->v_hashchain; vq;
                   1676:                                                        vq = vq->v_specnext) {
                   1677:                                        if (vq->v_specnext != vp)
                   1678:                                                continue;
                   1679:                                        vq->v_specnext = vp->v_specnext;
                   1680:                                        break;
                   1681:                                }
                   1682:                                if (vq == NULL)
                   1683:                                        panic("missing bdev");
1.29      cgd      1684:                        }
1.110     wrstuden 1685:                        if (vp->v_flag & VALIASED) {
                   1686:                                vx = NULL;
                   1687:                                for (vq = *vp->v_hashchain; vq;
                   1688:                                                        vq = vq->v_specnext) {
                   1689:                                        if (vq->v_rdev != vp->v_rdev ||
                   1690:                                            vq->v_type != vp->v_type)
                   1691:                                                continue;
                   1692:                                        if (vx)
                   1693:                                                break;
                   1694:                                        vx = vq;
                   1695:                                }
                   1696:                                if (vx == NULL)
                   1697:                                        panic("missing alias");
                   1698:                                if (vq == NULL)
                   1699:                                        vx->v_flag &= ~VALIASED;
                   1700:                                vp->v_flag &= ~VALIASED;
1.29      cgd      1701:                        }
                   1702:                }
1.80      fvdl     1703:                simple_unlock(&spechash_slock);
1.29      cgd      1704:                FREE(vp->v_specinfo, M_VNODE);
                   1705:                vp->v_specinfo = NULL;
                   1706:        }
1.166     chs      1707:
1.29      cgd      1708:        /*
1.30      mycroft  1709:         * If it is on the freelist and not already at the head,
                   1710:         * move it to the head of the list. The test of the back
                   1711:         * pointer and the reference count of zero is because
                   1712:         * it will be removed from the free list by getnewvnode,
                   1713:         * but will not have its reference count incremented until
                   1714:         * after calling vgone. If the reference count were
                   1715:         * incremented first, vgone would (incorrectly) try to
                   1716:         * close the previous instance of the underlying object.
                   1717:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1718:         * getnewvnode after removing it from the freelist to ensure
                   1719:         * that we do not try to move it here.
1.29      cgd      1720:         */
1.166     chs      1721:
1.80      fvdl     1722:        if (vp->v_usecount == 0) {
                   1723:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1724:                if (vp->v_holdcnt > 0)
1.125     chs      1725:                        panic("vgonel: not clean, vp %p", vp);
1.80      fvdl     1726:                if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1.113     fvdl     1727:                    TAILQ_FIRST(&vnode_free_list) != vp) {
1.80      fvdl     1728:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1729:                        TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                   1730:                }
                   1731:                simple_unlock(&vnode_free_list_slock);
1.29      cgd      1732:        }
                   1733:        vp->v_type = VBAD;
                   1734: }
                   1735:
                   1736: /*
                   1737:  * Lookup a vnode by device number.
                   1738:  */
1.50      christos 1739: int
1.29      cgd      1740: vfinddev(dev, type, vpp)
                   1741:        dev_t dev;
                   1742:        enum vtype type;
                   1743:        struct vnode **vpp;
                   1744: {
1.80      fvdl     1745:        struct vnode *vp;
                   1746:        int rc = 0;
1.29      cgd      1747:
1.80      fvdl     1748:        simple_lock(&spechash_slock);
1.29      cgd      1749:        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                   1750:                if (dev != vp->v_rdev || type != vp->v_type)
                   1751:                        continue;
                   1752:                *vpp = vp;
1.80      fvdl     1753:                rc = 1;
                   1754:                break;
1.29      cgd      1755:        }
1.80      fvdl     1756:        simple_unlock(&spechash_slock);
                   1757:        return (rc);
1.96      thorpej  1758: }
                   1759:
                   1760: /*
                   1761:  * Revoke all the vnodes corresponding to the specified minor number
                   1762:  * range (endpoints inclusive) of the specified major.
                   1763:  */
                   1764: void
                   1765: vdevgone(maj, minl, minh, type)
                   1766:        int maj, minl, minh;
                   1767:        enum vtype type;
                   1768: {
                   1769:        struct vnode *vp;
                   1770:        int mn;
                   1771:
                   1772:        for (mn = minl; mn <= minh; mn++)
                   1773:                if (vfinddev(makedev(maj, mn), type, &vp))
                   1774:                        VOP_REVOKE(vp, REVOKEALL);
1.29      cgd      1775: }
                   1776:
                   1777: /*
                   1778:  * Calculate the total number of references to a special device.
                   1779:  */
1.30      mycroft  1780: int
1.29      cgd      1781: vcount(vp)
1.123     augustss 1782:        struct vnode *vp;
1.29      cgd      1783: {
1.123     augustss 1784:        struct vnode *vq, *vnext;
1.29      cgd      1785:        int count;
                   1786:
                   1787: loop:
                   1788:        if ((vp->v_flag & VALIASED) == 0)
                   1789:                return (vp->v_usecount);
1.80      fvdl     1790:        simple_lock(&spechash_slock);
1.30      mycroft  1791:        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
                   1792:                vnext = vq->v_specnext;
1.29      cgd      1793:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   1794:                        continue;
                   1795:                /*
                   1796:                 * Alias, but not in use, so flush it out.
                   1797:                 */
1.151     wrstuden 1798:                if (vq->v_usecount == 0 && vq != vp &&
                   1799:                    (vq->v_flag & VXLOCK) == 0) {
1.80      fvdl     1800:                        simple_unlock(&spechash_slock);
1.29      cgd      1801:                        vgone(vq);
                   1802:                        goto loop;
                   1803:                }
                   1804:                count += vq->v_usecount;
                   1805:        }
1.80      fvdl     1806:        simple_unlock(&spechash_slock);
1.29      cgd      1807:        return (count);
                   1808: }
                   1809:
                   1810: /*
                   1811:  * Print out a description of a vnode.
                   1812:  */
1.146     jdolecek 1813: static const char * const typename[] =
1.29      cgd      1814:    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
                   1815:
                   1816: void
                   1817: vprint(label, vp)
                   1818:        char *label;
1.123     augustss 1819:        struct vnode *vp;
1.29      cgd      1820: {
1.161     thorpej  1821:        char buf[96];
1.29      cgd      1822:
                   1823:        if (label != NULL)
1.57      christos 1824:                printf("%s: ", label);
1.142     chs      1825:        printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
1.113     fvdl     1826:            vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1.56      christos 1827:            vp->v_holdcnt);
1.29      cgd      1828:        buf[0] = '\0';
                   1829:        if (vp->v_flag & VROOT)
                   1830:                strcat(buf, "|VROOT");
                   1831:        if (vp->v_flag & VTEXT)
                   1832:                strcat(buf, "|VTEXT");
1.161     thorpej  1833:        if (vp->v_flag & VEXECMAP)
                   1834:                strcat(buf, "|VEXECMAP");
1.29      cgd      1835:        if (vp->v_flag & VSYSTEM)
                   1836:                strcat(buf, "|VSYSTEM");
                   1837:        if (vp->v_flag & VXLOCK)
                   1838:                strcat(buf, "|VXLOCK");
                   1839:        if (vp->v_flag & VXWANT)
                   1840:                strcat(buf, "|VXWANT");
                   1841:        if (vp->v_flag & VBWAIT)
                   1842:                strcat(buf, "|VBWAIT");
                   1843:        if (vp->v_flag & VALIASED)
                   1844:                strcat(buf, "|VALIASED");
                   1845:        if (buf[0] != '\0')
1.57      christos 1846:                printf(" flags (%s)", &buf[1]);
1.30      mycroft  1847:        if (vp->v_data == NULL) {
1.57      christos 1848:                printf("\n");
1.30      mycroft  1849:        } else {
1.57      christos 1850:                printf("\n\t");
1.30      mycroft  1851:                VOP_PRINT(vp);
                   1852:        }
1.29      cgd      1853: }
                   1854:
                   1855: #ifdef DEBUG
                   1856: /*
                   1857:  * List all of the locked vnodes in the system.
                   1858:  * Called when debugging the kernel.
                   1859:  */
1.51      christos 1860: void
1.29      cgd      1861: printlockedvnodes()
                   1862: {
1.80      fvdl     1863:        struct mount *mp, *nmp;
                   1864:        struct vnode *vp;
1.29      cgd      1865:
1.57      christos 1866:        printf("Locked vnodes\n");
1.80      fvdl     1867:        simple_lock(&mountlist_slock);
1.177   ! matt     1868:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
        !          1869:             mp = nmp) {
1.80      fvdl     1870:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177   ! matt     1871:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80      fvdl     1872:                        continue;
                   1873:                }
1.158     chs      1874:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1.29      cgd      1875:                        if (VOP_ISLOCKED(vp))
1.158     chs      1876:                                vprint(NULL, vp);
1.80      fvdl     1877:                }
                   1878:                simple_lock(&mountlist_slock);
1.177   ! matt     1879:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80      fvdl     1880:                vfs_unbusy(mp);
1.29      cgd      1881:        }
1.80      fvdl     1882:        simple_unlock(&mountlist_slock);
1.29      cgd      1883: }
                   1884: #endif
                   1885:
1.101     mrg      1886: /*
1.80      fvdl     1887:  * Top level filesystem related information gathering.
                   1888:  */
                   1889: int
                   1890: vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
                   1891:        int *name;
                   1892:        u_int namelen;
                   1893:        void *oldp;
                   1894:        size_t *oldlenp;
                   1895:        void *newp;
                   1896:        size_t newlen;
                   1897:        struct proc *p;
                   1898: {
1.95      thorpej  1899: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80      fvdl     1900:        struct vfsconf vfc;
1.154     jdolecek 1901:        extern const char * const mountcompatnames[];
                   1902:        extern int nmountcompatnames;
1.95      thorpej  1903: #endif
1.80      fvdl     1904:        struct vfsops *vfsp;
                   1905:
                   1906:        /* all sysctl names at this level are at least name and field */
                   1907:        if (namelen < 2)
                   1908:                return (ENOTDIR);               /* overloaded */
1.94      thorpej  1909:
                   1910:        /* Not generic: goes to file system. */
1.80      fvdl     1911:        if (name[0] != VFS_GENERIC) {
1.155     jdolecek 1912:                static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES;
1.154     jdolecek 1913:                const char *vfsname;
                   1914:
                   1915:                if (name[0] < 0 || name[0] > VFS_MAXID
                   1916:                    || (vfsname = vfsnames[name[0]].ctl_name) == NULL)
1.80      fvdl     1917:                        return (EOPNOTSUPP);
1.154     jdolecek 1918:
                   1919:                vfsp = vfs_getopsbyname(vfsname);
1.94      thorpej  1920:                if (vfsp == NULL || vfsp->vfs_sysctl == NULL)
                   1921:                        return (EOPNOTSUPP);
1.80      fvdl     1922:                return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
                   1923:                    oldp, oldlenp, newp, newlen, p));
                   1924:        }
1.94      thorpej  1925:
                   1926:        /* The rest are generic vfs sysctls. */
1.80      fvdl     1927:        switch (name[1]) {
1.117     fvdl     1928:        case VFS_USERMOUNT:
                   1929:                return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount);
1.95      thorpej  1930: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80      fvdl     1931:        case VFS_MAXTYPENUM:
1.94      thorpej  1932:                /*
                   1933:                 * Provided for 4.4BSD-Lite2 compatibility.
                   1934:                 */
1.80      fvdl     1935:                return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
                   1936:        case VFS_CONF:
1.94      thorpej  1937:                /*
                   1938:                 * Special: a node, next is a file system name.
                   1939:                 * Provided for 4.4BSD-Lite2 compatibility.
                   1940:                 */
1.80      fvdl     1941:                if (namelen < 3)
                   1942:                        return (ENOTDIR);       /* overloaded */
                   1943:                if (name[2] >= nmountcompatnames || name[2] < 0 ||
                   1944:                    mountcompatnames[name[2]] == NULL)
                   1945:                        return (EOPNOTSUPP);
                   1946:                vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
                   1947:                if (vfsp == NULL)
1.94      thorpej  1948:                        return (EOPNOTSUPP);
                   1949:                vfc.vfc_vfsops = vfsp;
1.80      fvdl     1950:                strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
                   1951:                vfc.vfc_typenum = name[2];
1.94      thorpej  1952:                vfc.vfc_refcount = vfsp->vfs_refcount;
1.80      fvdl     1953:                vfc.vfc_flags = 0;
                   1954:                vfc.vfc_mountroot = vfsp->vfs_mountroot;
                   1955:                vfc.vfc_next = NULL;
                   1956:                return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
                   1957:                    sizeof(struct vfsconf)));
1.95      thorpej  1958: #endif
                   1959:        default:
                   1960:                break;
1.80      fvdl     1961:        }
                   1962:        return (EOPNOTSUPP);
                   1963: }
                   1964:
1.29      cgd      1965: int kinfo_vdebug = 1;
                   1966: int kinfo_vgetfailed;
                   1967: #define KINFO_VNODESLOP        10
                   1968: /*
                   1969:  * Dump vnode list (via sysctl).
                   1970:  * Copyout address of vnode followed by vnode.
                   1971:  */
                   1972: /* ARGSUSED */
1.50      christos 1973: int
1.80      fvdl     1974: sysctl_vnode(where, sizep, p)
1.29      cgd      1975:        char *where;
                   1976:        size_t *sizep;
1.80      fvdl     1977:        struct proc *p;
1.29      cgd      1978: {
1.80      fvdl     1979:        struct mount *mp, *nmp;
                   1980:        struct vnode *nvp, *vp;
                   1981:        char *bp = where, *savebp;
1.29      cgd      1982:        char *ewhere;
                   1983:        int error;
                   1984:
1.90      perry    1985: #define VPTRSZ sizeof(struct vnode *)
                   1986: #define VNODESZ        sizeof(struct vnode)
1.29      cgd      1987:        if (where == NULL) {
                   1988:                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                   1989:                return (0);
                   1990:        }
                   1991:        ewhere = where + *sizep;
1.80      fvdl     1992:
                   1993:        simple_lock(&mountlist_slock);
1.177   ! matt     1994:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
        !          1995:             mp = nmp) {
1.80      fvdl     1996:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177   ! matt     1997:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      1998:                        continue;
1.80      fvdl     1999:                }
1.29      cgd      2000:                savebp = bp;
                   2001: again:
1.80      fvdl     2002:                simple_lock(&mntvnode_slock);
1.177   ! matt     2003:                for (vp = LIST_FIRST(&mp->mnt_vnodelist);
1.29      cgd      2004:                     vp != NULL;
1.80      fvdl     2005:                     vp = nvp) {
1.29      cgd      2006:                        /*
                   2007:                         * Check that the vp is still associated with
                   2008:                         * this filesystem.  RACE: could have been
                   2009:                         * recycled onto the same filesystem.
                   2010:                         */
                   2011:                        if (vp->v_mount != mp) {
1.80      fvdl     2012:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2013:                                if (kinfo_vdebug)
1.57      christos 2014:                                        printf("kinfo: vp changed\n");
1.29      cgd      2015:                                bp = savebp;
                   2016:                                goto again;
                   2017:                        }
1.177   ! matt     2018:                        nvp = LIST_NEXT(vp, v_mntvnodes);
1.29      cgd      2019:                        if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80      fvdl     2020:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2021:                                *sizep = bp - where;
                   2022:                                return (ENOMEM);
                   2023:                        }
1.80      fvdl     2024:                        simple_unlock(&mntvnode_slock);
1.29      cgd      2025:                        if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
                   2026:                           (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
                   2027:                                return (error);
                   2028:                        bp += VPTRSZ + VNODESZ;
1.80      fvdl     2029:                        simple_lock(&mntvnode_slock);
1.29      cgd      2030:                }
1.80      fvdl     2031:                simple_unlock(&mntvnode_slock);
                   2032:                simple_lock(&mountlist_slock);
1.177   ! matt     2033:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      2034:                vfs_unbusy(mp);
                   2035:        }
1.80      fvdl     2036:        simple_unlock(&mountlist_slock);
1.29      cgd      2037:
                   2038:        *sizep = bp - where;
                   2039:        return (0);
1.30      mycroft  2040: }
                   2041:
                   2042: /*
                   2043:  * Check to see if a filesystem is mounted on a block device.
                   2044:  */
                   2045: int
                   2046: vfs_mountedon(vp)
1.80      fvdl     2047:        struct vnode *vp;
1.30      mycroft  2048: {
1.80      fvdl     2049:        struct vnode *vq;
                   2050:        int error = 0;
1.30      mycroft  2051:
1.113     fvdl     2052:        if (vp->v_specmountpoint != NULL)
1.30      mycroft  2053:                return (EBUSY);
                   2054:        if (vp->v_flag & VALIASED) {
1.80      fvdl     2055:                simple_lock(&spechash_slock);
1.30      mycroft  2056:                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   2057:                        if (vq->v_rdev != vp->v_rdev ||
                   2058:                            vq->v_type != vp->v_type)
                   2059:                                continue;
1.113     fvdl     2060:                        if (vq->v_specmountpoint != NULL) {
1.80      fvdl     2061:                                error = EBUSY;
                   2062:                                break;
                   2063:                        }
1.30      mycroft  2064:                }
1.80      fvdl     2065:                simple_unlock(&spechash_slock);
1.30      mycroft  2066:        }
1.80      fvdl     2067:        return (error);
1.30      mycroft  2068: }
                   2069:
                   2070: /*
                   2071:  * Build hash lists of net addresses and hang them off the mount point.
                   2072:  * Called by ufs_mount() to set up the lists of export addresses.
                   2073:  */
                   2074: static int
                   2075: vfs_hang_addrlist(mp, nep, argp)
                   2076:        struct mount *mp;
                   2077:        struct netexport *nep;
                   2078:        struct export_args *argp;
                   2079: {
1.123     augustss 2080:        struct netcred *np, *enp;
                   2081:        struct radix_node_head *rnh;
                   2082:        int i;
1.30      mycroft  2083:        struct radix_node *rn;
                   2084:        struct sockaddr *saddr, *smask = 0;
                   2085:        struct domain *dom;
                   2086:        int error;
                   2087:
                   2088:        if (argp->ex_addrlen == 0) {
                   2089:                if (mp->mnt_flag & MNT_DEFEXPORTED)
                   2090:                        return (EPERM);
                   2091:                np = &nep->ne_defexported;
                   2092:                np->netc_exflags = argp->ex_flags;
1.163     christos 2093:                crcvt(&np->netc_anon, &argp->ex_anon);
1.30      mycroft  2094:                np->netc_anon.cr_ref = 1;
                   2095:                mp->mnt_flag |= MNT_DEFEXPORTED;
                   2096:                return (0);
                   2097:        }
1.156     jdolecek 2098:
                   2099:        if (argp->ex_addrlen > MLEN)
                   2100:                return (EINVAL);
                   2101:
1.30      mycroft  2102:        i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
                   2103:        np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1.91      perry    2104:        memset((caddr_t)np, 0, i);
1.30      mycroft  2105:        saddr = (struct sockaddr *)(np + 1);
1.50      christos 2106:        error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
                   2107:        if (error)
1.30      mycroft  2108:                goto out;
                   2109:        if (saddr->sa_len > argp->ex_addrlen)
                   2110:                saddr->sa_len = argp->ex_addrlen;
                   2111:        if (argp->ex_masklen) {
                   2112:                smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1.66      mycroft  2113:                error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1.30      mycroft  2114:                if (error)
                   2115:                        goto out;
                   2116:                if (smask->sa_len > argp->ex_masklen)
                   2117:                        smask->sa_len = argp->ex_masklen;
                   2118:        }
                   2119:        i = saddr->sa_family;
                   2120:        if ((rnh = nep->ne_rtable[i]) == 0) {
                   2121:                /*
                   2122:                 * Seems silly to initialize every AF when most are not
                   2123:                 * used, do so on demand here
                   2124:                 */
                   2125:                for (dom = domains; dom; dom = dom->dom_next)
                   2126:                        if (dom->dom_family == i && dom->dom_rtattach) {
                   2127:                                dom->dom_rtattach((void **)&nep->ne_rtable[i],
                   2128:                                        dom->dom_rtoffset);
                   2129:                                break;
                   2130:                        }
                   2131:                if ((rnh = nep->ne_rtable[i]) == 0) {
                   2132:                        error = ENOBUFS;
                   2133:                        goto out;
                   2134:                }
                   2135:        }
                   2136:        rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
                   2137:                np->netc_rnodes);
                   2138:        if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1.72      fvdl     2139:                if (rn == 0) {
                   2140:                        enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
                   2141:                                smask, rnh);
                   2142:                        if (enp == 0) {
                   2143:                                error = EPERM;
                   2144:                                goto out;
                   2145:                        }
                   2146:                } else
                   2147:                        enp = (struct netcred *)rn;
                   2148:
                   2149:                if (enp->netc_exflags != argp->ex_flags ||
                   2150:                    enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
                   2151:                    enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
1.176     thorpej  2152:                    enp->netc_anon.cr_ngroups !=
                   2153:                                        (uint32_t) argp->ex_anon.cr_ngroups ||
1.91      perry    2154:                    memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
1.72      fvdl     2155:                        enp->netc_anon.cr_ngroups))
                   2156:                                error = EPERM;
                   2157:                else
                   2158:                        error = 0;
1.30      mycroft  2159:                goto out;
                   2160:        }
                   2161:        np->netc_exflags = argp->ex_flags;
1.163     christos 2162:        crcvt(&np->netc_anon, &argp->ex_anon);
1.30      mycroft  2163:        np->netc_anon.cr_ref = 1;
                   2164:        return (0);
                   2165: out:
                   2166:        free(np, M_NETADDR);
                   2167:        return (error);
                   2168: }
                   2169:
                   2170: /* ARGSUSED */
                   2171: static int
                   2172: vfs_free_netcred(rn, w)
                   2173:        struct radix_node *rn;
1.50      christos 2174:        void *w;
1.30      mycroft  2175: {
1.123     augustss 2176:        struct radix_node_head *rnh = (struct radix_node_head *)w;
1.30      mycroft  2177:
                   2178:        (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
                   2179:        free((caddr_t)rn, M_NETADDR);
                   2180:        return (0);
                   2181: }
                   2182:
                   2183: /*
                   2184:  * Free the net address hash lists that are hanging off the mount points.
                   2185:  */
                   2186: static void
                   2187: vfs_free_addrlist(nep)
                   2188:        struct netexport *nep;
                   2189: {
1.123     augustss 2190:        int i;
                   2191:        struct radix_node_head *rnh;
1.30      mycroft  2192:
                   2193:        for (i = 0; i <= AF_MAX; i++)
1.50      christos 2194:                if ((rnh = nep->ne_rtable[i]) != NULL) {
                   2195:                        (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1.30      mycroft  2196:                        free((caddr_t)rnh, M_RTABLE);
                   2197:                        nep->ne_rtable[i] = 0;
                   2198:                }
                   2199: }
                   2200:
                   2201: int
                   2202: vfs_export(mp, nep, argp)
                   2203:        struct mount *mp;
                   2204:        struct netexport *nep;
                   2205:        struct export_args *argp;
                   2206: {
                   2207:        int error;
                   2208:
                   2209:        if (argp->ex_flags & MNT_DELEXPORT) {
1.71      fvdl     2210:                if (mp->mnt_flag & MNT_EXPUBLIC) {
                   2211:                        vfs_setpublicfs(NULL, NULL, NULL);
                   2212:                        mp->mnt_flag &= ~MNT_EXPUBLIC;
                   2213:                }
1.30      mycroft  2214:                vfs_free_addrlist(nep);
                   2215:                mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
                   2216:        }
                   2217:        if (argp->ex_flags & MNT_EXPORTED) {
1.71      fvdl     2218:                if (argp->ex_flags & MNT_EXPUBLIC) {
                   2219:                        if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
                   2220:                                return (error);
                   2221:                        mp->mnt_flag |= MNT_EXPUBLIC;
                   2222:                }
1.50      christos 2223:                if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1.30      mycroft  2224:                        return (error);
                   2225:                mp->mnt_flag |= MNT_EXPORTED;
                   2226:        }
1.71      fvdl     2227:        return (0);
                   2228: }
                   2229:
                   2230: /*
                   2231:  * Set the publicly exported filesystem (WebNFS). Currently, only
                   2232:  * one public filesystem is possible in the spec (RFC 2054 and 2055)
                   2233:  */
                   2234: int
                   2235: vfs_setpublicfs(mp, nep, argp)
                   2236:        struct mount *mp;
                   2237:        struct netexport *nep;
                   2238:        struct export_args *argp;
                   2239: {
                   2240:        int error;
                   2241:        struct vnode *rvp;
                   2242:        char *cp;
                   2243:
                   2244:        /*
                   2245:         * mp == NULL -> invalidate the current info, the FS is
                   2246:         * no longer exported. May be called from either vfs_export
                   2247:         * or unmount, so check if it hasn't already been done.
                   2248:         */
                   2249:        if (mp == NULL) {
                   2250:                if (nfs_pub.np_valid) {
                   2251:                        nfs_pub.np_valid = 0;
                   2252:                        if (nfs_pub.np_index != NULL) {
                   2253:                                FREE(nfs_pub.np_index, M_TEMP);
                   2254:                                nfs_pub.np_index = NULL;
                   2255:                        }
                   2256:                }
                   2257:                return (0);
                   2258:        }
                   2259:
                   2260:        /*
                   2261:         * Only one allowed at a time.
                   2262:         */
                   2263:        if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
                   2264:                return (EBUSY);
                   2265:
                   2266:        /*
                   2267:         * Get real filehandle for root of exported FS.
                   2268:         */
1.91      perry    2269:        memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
1.71      fvdl     2270:        nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
                   2271:
                   2272:        if ((error = VFS_ROOT(mp, &rvp)))
                   2273:                return (error);
                   2274:
                   2275:        if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
                   2276:                return (error);
                   2277:
                   2278:        vput(rvp);
                   2279:
                   2280:        /*
                   2281:         * If an indexfile was specified, pull it in.
                   2282:         */
                   2283:        if (argp->ex_indexfile != NULL) {
                   2284:                MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
                   2285:                    M_WAITOK);
                   2286:                error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
                   2287:                    MAXNAMLEN, (size_t *)0);
                   2288:                if (!error) {
                   2289:                        /*
                   2290:                         * Check for illegal filenames.
                   2291:                         */
                   2292:                        for (cp = nfs_pub.np_index; *cp; cp++) {
                   2293:                                if (*cp == '/') {
                   2294:                                        error = EINVAL;
                   2295:                                        break;
                   2296:                                }
                   2297:                        }
                   2298:                }
                   2299:                if (error) {
                   2300:                        FREE(nfs_pub.np_index, M_TEMP);
                   2301:                        return (error);
                   2302:                }
                   2303:        }
                   2304:
                   2305:        nfs_pub.np_mount = mp;
                   2306:        nfs_pub.np_valid = 1;
1.30      mycroft  2307:        return (0);
                   2308: }
                   2309:
                   2310: struct netcred *
                   2311: vfs_export_lookup(mp, nep, nam)
1.123     augustss 2312:        struct mount *mp;
1.30      mycroft  2313:        struct netexport *nep;
                   2314:        struct mbuf *nam;
                   2315: {
1.123     augustss 2316:        struct netcred *np;
                   2317:        struct radix_node_head *rnh;
1.30      mycroft  2318:        struct sockaddr *saddr;
                   2319:
                   2320:        np = NULL;
                   2321:        if (mp->mnt_flag & MNT_EXPORTED) {
                   2322:                /*
                   2323:                 * Lookup in the export list first.
                   2324:                 */
                   2325:                if (nam != NULL) {
                   2326:                        saddr = mtod(nam, struct sockaddr *);
                   2327:                        rnh = nep->ne_rtable[saddr->sa_family];
                   2328:                        if (rnh != NULL) {
                   2329:                                np = (struct netcred *)
                   2330:                                        (*rnh->rnh_matchaddr)((caddr_t)saddr,
                   2331:                                                              rnh);
                   2332:                                if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
                   2333:                                        np = NULL;
                   2334:                        }
                   2335:                }
                   2336:                /*
                   2337:                 * If no address match, use the default if it exists.
                   2338:                 */
                   2339:                if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
                   2340:                        np = &nep->ne_defexported;
                   2341:        }
                   2342:        return (np);
1.35      ws       2343: }
                   2344:
                   2345: /*
                   2346:  * Do the usual access checking.
                   2347:  * file_mode, uid and gid are from the vnode in question,
                   2348:  * while acc_mode and cred are from the VOP_ACCESS parameter list
                   2349:  */
1.41      mycroft  2350: int
1.68      mycroft  2351: vaccess(type, file_mode, uid, gid, acc_mode, cred)
                   2352:        enum vtype type;
1.35      ws       2353:        mode_t file_mode;
                   2354:        uid_t uid;
                   2355:        gid_t gid;
                   2356:        mode_t acc_mode;
                   2357:        struct ucred *cred;
                   2358: {
                   2359:        mode_t mask;
                   2360:
1.64      mycroft  2361:        /*
                   2362:         * Super-user always gets read/write access, but execute access depends
                   2363:         * on at least one execute bit being set.
                   2364:         */
                   2365:        if (cred->cr_uid == 0) {
1.69      mycroft  2366:                if ((acc_mode & VEXEC) && type != VDIR &&
1.68      mycroft  2367:                    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1.64      mycroft  2368:                        return (EACCES);
1.68      mycroft  2369:                return (0);
1.64      mycroft  2370:        }
1.35      ws       2371:
                   2372:        mask = 0;
                   2373:
                   2374:        /* Otherwise, check the owner. */
                   2375:        if (cred->cr_uid == uid) {
1.68      mycroft  2376:                if (acc_mode & VEXEC)
1.35      ws       2377:                        mask |= S_IXUSR;
                   2378:                if (acc_mode & VREAD)
                   2379:                        mask |= S_IRUSR;
                   2380:                if (acc_mode & VWRITE)
                   2381:                        mask |= S_IWUSR;
1.64      mycroft  2382:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2383:        }
                   2384:
                   2385:        /* Otherwise, check the groups. */
1.44      jtc      2386:        if (cred->cr_gid == gid || groupmember(gid, cred)) {
1.68      mycroft  2387:                if (acc_mode & VEXEC)
1.35      ws       2388:                        mask |= S_IXGRP;
                   2389:                if (acc_mode & VREAD)
                   2390:                        mask |= S_IRGRP;
                   2391:                if (acc_mode & VWRITE)
                   2392:                        mask |= S_IWGRP;
1.64      mycroft  2393:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2394:        }
                   2395:
                   2396:        /* Otherwise, check everyone else. */
1.68      mycroft  2397:        if (acc_mode & VEXEC)
1.35      ws       2398:                mask |= S_IXOTH;
                   2399:        if (acc_mode & VREAD)
                   2400:                mask |= S_IROTH;
                   2401:        if (acc_mode & VWRITE)
                   2402:                mask |= S_IWOTH;
1.64      mycroft  2403:        return ((file_mode & mask) == mask ? 0 : EACCES);
1.39      mycroft  2404: }
                   2405:
                   2406: /*
                   2407:  * Unmount all file systems.
                   2408:  * We traverse the list in reverse order under the assumption that doing so
                   2409:  * will avoid needing to worry about dependencies.
                   2410:  */
                   2411: void
1.128     sommerfe 2412: vfs_unmountall(p)
                   2413:        struct proc *p;
1.39      mycroft  2414: {
1.123     augustss 2415:        struct mount *mp, *nmp;
1.40      mycroft  2416:        int allerror, error;
1.39      mycroft  2417:
                   2418:        for (allerror = 0,
                   2419:             mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
                   2420:                nmp = mp->mnt_list.cqe_prev;
1.54      jtk      2421: #ifdef DEBUG
1.57      christos 2422:                printf("unmounting %s (%s)...\n",
1.56      christos 2423:                    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54      jtk      2424: #endif
1.149     thorpej  2425:                /*
                   2426:                 * XXX Freeze syncer.  Must do this before locking the
                   2427:                 * mount point.  See dounmount() for details.
                   2428:                 */
                   2429:                lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
                   2430:                if (vfs_busy(mp, 0, 0)) {
                   2431:                        lockmgr(&syncer_lock, LK_RELEASE, NULL);
1.60      fvdl     2432:                        continue;
1.149     thorpej  2433:                }
1.80      fvdl     2434:                if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
1.57      christos 2435:                        printf("unmount of %s failed with error %d\n",
1.40      mycroft  2436:                            mp->mnt_stat.f_mntonname, error);
                   2437:                        allerror = 1;
                   2438:                }
1.39      mycroft  2439:        }
                   2440:        if (allerror)
1.57      christos 2441:                printf("WARNING: some file systems would not unmount\n");
1.40      mycroft  2442: }
                   2443:
                   2444: /*
                   2445:  * Sync and unmount file systems before shutting down.
                   2446:  */
                   2447: void
                   2448: vfs_shutdown()
                   2449: {
1.123     augustss 2450:        struct buf *bp;
1.138     bouyer   2451:        int iter, nbusy, nbusy_prev = 0, dcount, s;
1.128     sommerfe 2452:        struct proc *p = curproc;
1.40      mycroft  2453:
1.128     sommerfe 2454:        /* XXX we're certainly not running in proc0's context! */
                   2455:        if (p == NULL)
                   2456:                p = &proc0;
                   2457:
1.70      cgd      2458:        printf("syncing disks... ");
                   2459:
1.138     bouyer   2460:        /* remove user process from run queue */
                   2461:        suspendsched();
1.40      mycroft  2462:        (void) spl0();
                   2463:
1.128     sommerfe 2464:        /* avoid coming back this way again if we panic. */
                   2465:        doing_shutdown = 1;
                   2466:
1.142     chs      2467:        sys_sync(p, NULL, NULL);
1.40      mycroft  2468:
                   2469:        /* Wait for sync to finish. */
1.113     fvdl     2470:        dcount = 10000;
1.138     bouyer   2471:        for (iter = 0; iter < 20;) {
1.40      mycroft  2472:                nbusy = 0;
1.113     fvdl     2473:                for (bp = &buf[nbuf]; --bp >= buf; ) {
1.133     fvdl     2474:                        if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.40      mycroft  2475:                                nbusy++;
1.113     fvdl     2476:                        /*
                   2477:                         * With soft updates, some buffers that are
                   2478:                         * written will be remarked as dirty until other
                   2479:                         * buffers are written.
                   2480:                         */
1.116     perseant 2481:                        if (bp->b_vp && bp->b_vp->v_mount
                   2482:                            && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
                   2483:                            && (bp->b_flags & B_DELWRI)) {
1.113     fvdl     2484:                                s = splbio();
                   2485:                                bremfree(bp);
                   2486:                                bp->b_flags |= B_BUSY;
                   2487:                                splx(s);
                   2488:                                nbusy++;
                   2489:                                bawrite(bp);
                   2490:                                if (dcount-- <= 0) {
                   2491:                                        printf("softdep ");
                   2492:                                        goto fail;
                   2493:                                }
                   2494:                        }
                   2495:                }
1.40      mycroft  2496:                if (nbusy == 0)
                   2497:                        break;
1.138     bouyer   2498:                if (nbusy_prev == 0)
                   2499:                        nbusy_prev = nbusy;
1.57      christos 2500:                printf("%d ", nbusy);
1.138     bouyer   2501:                tsleep(&nbusy, PRIBIO, "bflush",
                   2502:                    (iter == 0) ? 1 : hz / 25 * iter);
                   2503:                if (nbusy >= nbusy_prev) /* we didn't flush anything */
                   2504:                        iter++;
                   2505:                else
                   2506:                        nbusy_prev = nbusy;
1.40      mycroft  2507:        }
1.73      thorpej  2508:        if (nbusy) {
1.113     fvdl     2509: fail:
1.124     augustss 2510: #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
1.108     simonb   2511:                printf("giving up\nPrinting vnodes for busy buffers\n");
                   2512:                for (bp = &buf[nbuf]; --bp >= buf; )
1.133     fvdl     2513:                        if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.109     thorpej  2514:                                vprint(NULL, bp->b_vp);
1.124     augustss 2515:
                   2516: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   2517:                Debugger();
                   2518: #endif
                   2519:
                   2520: #else  /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.57      christos 2521:                printf("giving up\n");
1.124     augustss 2522: #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.84      thorpej  2523:                return;
1.73      thorpej  2524:        } else
1.57      christos 2525:                printf("done\n");
1.73      thorpej  2526:
1.84      thorpej  2527:        /*
                   2528:         * If we've panic'd, don't make the situation potentially
                   2529:         * worse by unmounting the file systems.
                   2530:         */
                   2531:        if (panicstr != NULL)
                   2532:                return;
                   2533:
                   2534:        /* Release inodes held by texts before update. */
1.73      thorpej  2535: #ifdef notdef
1.84      thorpej  2536:        vnshutdown();
1.73      thorpej  2537: #endif
1.84      thorpej  2538:        /* Unmount file systems. */
1.128     sommerfe 2539:        vfs_unmountall(p);
1.58      thorpej  2540: }
                   2541:
                   2542: /*
                   2543:  * Mount the root file system.  If the operator didn't specify a
                   2544:  * file system to use, try all possible file systems until one
                   2545:  * succeeds.
                   2546:  */
                   2547: int
                   2548: vfs_mountroot()
                   2549: {
1.79      thorpej  2550:        struct vfsops *v;
1.58      thorpej  2551:
                   2552:        if (root_device == NULL)
                   2553:                panic("vfs_mountroot: root device unknown");
                   2554:
                   2555:        switch (root_device->dv_class) {
                   2556:        case DV_IFNET:
                   2557:                if (rootdev != NODEV)
1.173     thorpej  2558:                        panic("vfs_mountroot: rootdev set for DV_IFNET "
                   2559:                            "(0x%08x -> %d,%d)", rootdev,
                   2560:                            major(rootdev), minor(rootdev));
1.58      thorpej  2561:                break;
                   2562:
                   2563:        case DV_DISK:
                   2564:                if (rootdev == NODEV)
                   2565:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
                   2566:                break;
                   2567:
                   2568:        default:
                   2569:                printf("%s: inappropriate for root file system\n",
                   2570:                    root_device->dv_xname);
                   2571:                return (ENODEV);
                   2572:        }
                   2573:
                   2574:        /*
                   2575:         * If user specified a file system, use it.
                   2576:         */
                   2577:        if (mountroot != NULL)
                   2578:                return ((*mountroot)());
                   2579:
                   2580:        /*
                   2581:         * Try each file system currently configured into the kernel.
                   2582:         */
1.79      thorpej  2583:        for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
                   2584:                if (v->vfs_mountroot == NULL)
1.58      thorpej  2585:                        continue;
                   2586: #ifdef DEBUG
1.79      thorpej  2587:                printf("mountroot: trying %s...\n", v->vfs_name);
1.58      thorpej  2588: #endif
1.79      thorpej  2589:                if ((*v->vfs_mountroot)() == 0) {
                   2590:                        printf("root file system type: %s\n", v->vfs_name);
                   2591:                        break;
1.58      thorpej  2592:                }
                   2593:        }
                   2594:
1.79      thorpej  2595:        if (v == NULL) {
                   2596:                printf("no file system for %s", root_device->dv_xname);
                   2597:                if (root_device->dv_class == DV_DISK)
                   2598:                        printf(" (dev 0x%x)", rootdev);
                   2599:                printf("\n");
                   2600:                return (EFTYPE);
                   2601:        }
                   2602:        return (0);
1.58      thorpej  2603: }
                   2604:
                   2605: /*
                   2606:  * Given a file system name, look up the vfsops for that
                   2607:  * file system, or return NULL if file system isn't present
                   2608:  * in the kernel.
                   2609:  */
                   2610: struct vfsops *
                   2611: vfs_getopsbyname(name)
                   2612:        const char *name;
                   2613: {
1.79      thorpej  2614:        struct vfsops *v;
                   2615:
                   2616:        for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
                   2617:                if (strcmp(v->vfs_name, name) == 0)
                   2618:                        break;
                   2619:        }
                   2620:
                   2621:        return (v);
                   2622: }
                   2623:
                   2624: /*
                   2625:  * Establish a file system and initialize it.
                   2626:  */
                   2627: int
                   2628: vfs_attach(vfs)
                   2629:        struct vfsops *vfs;
                   2630: {
                   2631:        struct vfsops *v;
                   2632:        int error = 0;
                   2633:
1.58      thorpej  2634:
1.79      thorpej  2635:        /*
                   2636:         * Make sure this file system doesn't already exist.
                   2637:         */
1.157     chs      2638:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2639:                if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
                   2640:                        error = EEXIST;
                   2641:                        goto out;
                   2642:                }
                   2643:        }
                   2644:
                   2645:        /*
                   2646:         * Initialize the vnode operations for this file system.
                   2647:         */
                   2648:        vfs_opv_init(vfs->vfs_opv_descs);
                   2649:
                   2650:        /*
                   2651:         * Now initialize the file system itself.
                   2652:         */
                   2653:        (*vfs->vfs_init)();
                   2654:
                   2655:        /*
                   2656:         * ...and link it into the kernel's list.
                   2657:         */
                   2658:        LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
                   2659:
                   2660:        /*
                   2661:         * Sanity: make sure the reference count is 0.
                   2662:         */
                   2663:        vfs->vfs_refcount = 0;
                   2664:
                   2665:  out:
                   2666:        return (error);
                   2667: }
                   2668:
                   2669: /*
                   2670:  * Remove a file system from the kernel.
                   2671:  */
                   2672: int
                   2673: vfs_detach(vfs)
                   2674:        struct vfsops *vfs;
                   2675: {
                   2676:        struct vfsops *v;
                   2677:
                   2678:        /*
                   2679:         * Make sure no one is using the filesystem.
                   2680:         */
                   2681:        if (vfs->vfs_refcount != 0)
                   2682:                return (EBUSY);
                   2683:
                   2684:        /*
                   2685:         * ...and remove it from the kernel's list.
                   2686:         */
1.157     chs      2687:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2688:                if (v == vfs) {
                   2689:                        LIST_REMOVE(v, vfs_list);
                   2690:                        break;
                   2691:                }
                   2692:        }
                   2693:
                   2694:        if (v == NULL)
                   2695:                return (ESRCH);
1.121     jdolecek 2696:
                   2697:        /*
                   2698:         * Now run the file system-specific cleanups.
                   2699:         */
                   2700:        (*vfs->vfs_done)();
1.79      thorpej  2701:
                   2702:        /*
                   2703:         * Free the vnode operations vector.
                   2704:         */
                   2705:        vfs_opv_free(vfs->vfs_opv_descs);
                   2706:        return (0);
1.157     chs      2707: }
                   2708:
                   2709: void
                   2710: vfs_reinit(void)
                   2711: {
                   2712:        struct vfsops *vfs;
                   2713:
                   2714:        LIST_FOREACH(vfs, &vfs_list, vfs_list) {
                   2715:                if (vfs->vfs_reinit) {
                   2716:                        (*vfs->vfs_reinit)();
                   2717:                }
                   2718:        }
1.29      cgd      2719: }
1.125     chs      2720:
                   2721: #ifdef DDB
                   2722: const char buf_flagbits[] =
1.142     chs      2723:        "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
1.125     chs      2724:        "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
1.142     chs      2725:        "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
                   2726:        "\32XXX\33VFLUSH";
1.125     chs      2727:
                   2728: void
                   2729: vfs_buf_print(bp, full, pr)
                   2730:        struct buf *bp;
                   2731:        int full;
                   2732:        void (*pr) __P((const char *, ...));
                   2733: {
                   2734:        char buf[1024];
                   2735:
                   2736:        (*pr)("  vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n",
                   2737:                  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
                   2738:
                   2739:        bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
                   2740:        (*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
                   2741:
1.164     msaitoh  2742:        (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1.125     chs      2743:                  bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.142     chs      2744:        (*pr)("  data %p saveaddr %p dep %p\n",
                   2745:                  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
1.125     chs      2746:        (*pr)("  iodone %p\n", bp->b_iodone);
                   2747: }
                   2748:
                   2749:
                   2750: const char vnode_flagbits[] =
1.168     chs      2751:        "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP"
                   2752:        "\11XLOCK\12XWANT\13BWAIT\14ALIASED"
1.148     enami    2753:        "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
1.125     chs      2754:
                   2755: const char *vnode_types[] = {
                   2756:        "VNON",
                   2757:        "VREG",
                   2758:        "VDIR",
                   2759:        "VBLK",
                   2760:        "VCHR",
                   2761:        "VLNK",
                   2762:        "VSOCK",
                   2763:        "VFIFO",
                   2764:        "VBAD",
                   2765: };
                   2766:
                   2767: const char *vnode_tags[] = {
                   2768:        "VT_NON",
                   2769:        "VT_UFS",
                   2770:        "VT_NFS",
                   2771:        "VT_MFS",
                   2772:        "VT_MSDOSFS",
                   2773:        "VT_LFS",
                   2774:        "VT_LOFS",
                   2775:        "VT_FDESC",
                   2776:        "VT_PORTAL",
                   2777:        "VT_NULL",
                   2778:        "VT_UMAP",
                   2779:        "VT_KERNFS",
                   2780:        "VT_PROCFS",
                   2781:        "VT_AFS",
                   2782:        "VT_ISOFS",
                   2783:        "VT_UNION",
                   2784:        "VT_ADOSFS",
                   2785:        "VT_EXT2FS",
                   2786:        "VT_CODA",
                   2787:        "VT_FILECORE",
                   2788:        "VT_NTFS",
                   2789:        "VT_VFS",
                   2790:        "VT_OVERLAY"
                   2791: };
                   2792:
                   2793: void
                   2794: vfs_vnode_print(vp, full, pr)
                   2795:        struct vnode *vp;
                   2796:        int full;
                   2797:        void (*pr) __P((const char *, ...));
                   2798: {
1.142     chs      2799:        char buf[256];
1.125     chs      2800:        const char *vtype, *vtag;
1.172     bjh21    2801:        int tmp;
1.125     chs      2802:
1.158     chs      2803:        uvm_object_printit(&vp->v_uobj, full, pr);
1.125     chs      2804:        bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
                   2805:        (*pr)("\nVNODE flags %s\n", buf);
1.158     chs      2806:        (*pr)("mp %p numoutput %d size 0x%llx\n",
                   2807:              vp->v_mount, vp->v_numoutput, vp->v_size);
1.125     chs      2808:
1.164     msaitoh  2809:        (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
1.125     chs      2810:              vp->v_data, vp->v_usecount, vp->v_writecount,
                   2811:              vp->v_holdcnt, vp->v_numoutput);
                   2812:
1.172     bjh21    2813:        vtype = ((tmp = vp->v_type) >= 0 &&
1.125     chs      2814:                 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
                   2815:                vnode_types[vp->v_type] : "UNKNOWN";
1.172     bjh21    2816:        vtag = ((tmp = vp->v_tag) >= 0 &&
1.125     chs      2817:                vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
                   2818:                vnode_tags[vp->v_tag] : "UNKNOWN";
                   2819:
1.165     msaitoh  2820:        (*pr)("type %s(%d) tag %s(%d) id 0x%lx mount %p typedata %p\n",
1.125     chs      2821:              vtype, vp->v_type, vtag, vp->v_tag,
                   2822:              vp->v_id, vp->v_mount, vp->v_mountedhere);
                   2823:
                   2824:        if (full) {
                   2825:                struct buf *bp;
                   2826:
                   2827:                (*pr)("clean bufs:\n");
1.142     chs      2828:                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                   2829:                        (*pr)(" bp %p\n", bp);
1.125     chs      2830:                        vfs_buf_print(bp, full, pr);
                   2831:                }
                   2832:
                   2833:                (*pr)("dirty bufs:\n");
1.142     chs      2834:                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                   2835:                        (*pr)(" bp %p\n", bp);
1.125     chs      2836:                        vfs_buf_print(bp, full, pr);
                   2837:                }
                   2838:        }
                   2839: }
                   2840: #endif

CVSweb <webmaster@jp.NetBSD.org>