[BACK]Return to vfs_subr.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_subr.c, Revision 1.155.2.7

1.155.2.7! jdolecek    1: /*     $NetBSD: vfs_subr.c,v 1.155.2.6 2002/03/15 19:21:26 jdolecek Exp $      */
1.74      thorpej     2:
                      3: /*-
1.79      thorpej     4:  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
1.74      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.32      cgd        39:
1.29      cgd        40: /*
1.30      mycroft    41:  * Copyright (c) 1989, 1993
                     42:  *     The Regents of the University of California.  All rights reserved.
1.29      cgd        43:  * (c) UNIX System Laboratories, Inc.
                     44:  * All or some portions of this file are derived from material licensed
                     45:  * to the University of California by American Telephone and Telegraph
                     46:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     47:  * the permission of UNIX System Laboratories, Inc.
                     48:  *
                     49:  * Redistribution and use in source and binary forms, with or without
                     50:  * modification, are permitted provided that the following conditions
                     51:  * are met:
                     52:  * 1. Redistributions of source code must retain the above copyright
                     53:  *    notice, this list of conditions and the following disclaimer.
                     54:  * 2. Redistributions in binary form must reproduce the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer in the
                     56:  *    documentation and/or other materials provided with the distribution.
                     57:  * 3. All advertising materials mentioning features or use of this software
                     58:  *    must display the following acknowledgement:
                     59:  *     This product includes software developed by the University of
                     60:  *     California, Berkeley and its contributors.
                     61:  * 4. Neither the name of the University nor the names of its contributors
                     62:  *    may be used to endorse or promote products derived from this software
                     63:  *    without specific prior written permission.
                     64:  *
                     65:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     66:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     67:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     68:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     69:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     70:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     71:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     72:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     73:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     74:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     75:  * SUCH DAMAGE.
                     76:  *
1.32      cgd        77:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
1.29      cgd        78:  */
                     79:
                     80: /*
                     81:  * External virtual filesystem routines
                     82:  */
1.78      mrg        83:
1.155.2.4  thorpej    84: #include <sys/cdefs.h>
1.155.2.7! jdolecek   85: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.155.2.6 2002/03/15 19:21:26 jdolecek Exp $");
1.155.2.4  thorpej    86:
1.125     chs        87: #include "opt_ddb.h"
1.95      thorpej    88: #include "opt_compat_netbsd.h"
1.97      christos   89: #include "opt_compat_43.h"
1.29      cgd        90:
                     91: #include <sys/param.h>
1.30      mycroft    92: #include <sys/systm.h>
1.29      cgd        93: #include <sys/proc.h>
1.138     bouyer     94: #include <sys/kernel.h>
1.29      cgd        95: #include <sys/mount.h>
                     96: #include <sys/time.h>
1.155.2.1  lukem      97: #include <sys/event.h>
1.46      mycroft    98: #include <sys/fcntl.h>
1.29      cgd        99: #include <sys/vnode.h>
1.30      mycroft   100: #include <sys/stat.h>
1.29      cgd       101: #include <sys/namei.h>
                    102: #include <sys/ucred.h>
                    103: #include <sys/buf.h>
                    104: #include <sys/errno.h>
                    105: #include <sys/malloc.h>
1.30      mycroft   106: #include <sys/domain.h>
                    107: #include <sys/mbuf.h>
1.51      christos  108: #include <sys/syscallargs.h>
1.58      thorpej   109: #include <sys/device.h>
1.71      fvdl      110: #include <sys/dirent.h>
1.50      christos  111:
1.30      mycroft   112: #include <miscfs/specfs/specdev.h>
1.113     fvdl      113: #include <miscfs/genfs/genfs.h>
                    114: #include <miscfs/syncfs/syncfs.h>
1.30      mycroft   115:
1.125     chs       116: #include <uvm/uvm.h>
                    117: #include <uvm/uvm_ddb.h>
1.129     mrg       118:
                    119: #include <sys/sysctl.h>
1.77      mrg       120:
1.30      mycroft   121: enum vtype iftovt_tab[16] = {
                    122:        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
                    123:        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
                    124: };
1.146     jdolecek  125: const int      vttoif_tab[9] = {
1.30      mycroft   126:        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
                    127:        S_IFSOCK, S_IFIFO, S_IFMT,
                    128: };
                    129:
1.31      mycroft   130: int doforce = 1;               /* 1 => permit forcible unmounting */
                    131: int prtactive = 0;             /* 1 => print out reclaim of active vnodes */
1.29      cgd       132:
1.117     fvdl      133: extern int dovfsusermount;     /* 1 => permit any user to mount filesystems */
                    134:
1.29      cgd       135: /*
                    136:  * Insq/Remq for the vnode usage lists.
                    137:  */
                    138: #define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
                    139: #define        bufremvn(bp) {                                                  \
                    140:        LIST_REMOVE(bp, b_vnbufs);                                      \
                    141:        (bp)->b_vnbufs.le_next = NOLIST;                                \
                    142: }
1.113     fvdl      143: /* TAILQ_HEAD(freelst, vnode) vnode_free_list =        vnode free list (in vnode.h) */
                    144: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114     enami     145: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113     fvdl      146:
1.55      cgd       147: struct mntlist mountlist =                     /* mounted filesystem list */
                    148:     CIRCLEQ_HEAD_INITIALIZER(mountlist);
1.79      thorpej   149: struct vfs_list_head vfs_list =                        /* vfs list */
1.118     mycroft   150:     LIST_HEAD_INITIALIZER(vfs_list);
1.79      thorpej   151:
1.71      fvdl      152: struct nfs_public nfs_pub;                     /* publicly exported FS */
1.58      thorpej   153:
1.135     sommerfe  154: struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
                    155: static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
                    156: struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
                    157: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
                    158: struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
1.80      fvdl      159:
1.79      thorpej   160: /*
                    161:  * These define the root filesystem and device.
                    162:  */
                    163: struct mount *rootfs;
                    164: struct vnode *rootvnode;
1.80      fvdl      165: struct device *root_device;                    /* root device */
1.79      thorpej   166:
1.93      thorpej   167: struct pool vnode_pool;                                /* memory pool for vnodes */
                    168:
1.89      kleink    169: /*
                    170:  * Local declarations.
                    171:  */
1.50      christos  172: void insmntque __P((struct vnode *, struct mount *));
                    173: int getdevvp __P((dev_t, struct vnode **, enum vtype));
                    174: void vgoneall __P((struct vnode *));
                    175:
                    176: static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
                    177:                                  struct export_args *));
                    178: static int vfs_free_netcred __P((struct radix_node *, void *));
                    179: static void vfs_free_addrlist __P((struct netexport *));
1.51      christos  180:
                    181: #ifdef DEBUG
                    182: void printlockedvnodes __P((void));
                    183: #endif
                    184:
1.29      cgd       185: /*
1.30      mycroft   186:  * Initialize the vnode management data structures.
1.29      cgd       187:  */
1.50      christos  188: void
1.30      mycroft   189: vntblinit()
1.29      cgd       190: {
1.93      thorpej   191:
                    192:        pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
1.155.2.7! jdolecek  193:            &pool_allocator_nointr);
1.113     fvdl      194:
                    195:        /*
                    196:         * Initialize the filesystem syncer.
                    197:         */
                    198:        vn_initialize_syncerd();
1.29      cgd       199: }
                    200:
                    201: /*
1.80      fvdl      202:  * Mark a mount point as busy. Used to synchronize access and to delay
                    203:  * unmounting. Interlock is not released on failure.
1.29      cgd       204:  */
1.50      christos  205: int
1.80      fvdl      206: vfs_busy(mp, flags, interlkp)
                    207:        struct mount *mp;
                    208:        int flags;
                    209:        struct simplelock *interlkp;
1.29      cgd       210: {
1.80      fvdl      211:        int lkflags;
1.29      cgd       212:
1.103     sommerfe  213:        while (mp->mnt_flag & MNT_UNMOUNT) {
                    214:                int gone;
                    215:
1.80      fvdl      216:                if (flags & LK_NOWAIT)
                    217:                        return (ENOENT);
1.113     fvdl      218:                if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
                    219:                    && mp->mnt_unmounter == curproc)
                    220:                        return (EDEADLK);
1.80      fvdl      221:                if (interlkp)
                    222:                        simple_unlock(interlkp);
                    223:                /*
                    224:                 * Since all busy locks are shared except the exclusive
                    225:                 * lock granted when unmounting, the only place that a
                    226:                 * wakeup needs to be done is at the release of the
                    227:                 * exclusive lock at the end of dounmount.
1.103     sommerfe  228:                 *
1.106     sommerfe  229:                 * XXX MP: add spinlock protecting mnt_wcnt here once you
                    230:                 * can atomically unlock-and-sleep.
1.80      fvdl      231:                 */
1.103     sommerfe  232:                mp->mnt_wcnt++;
1.113     fvdl      233:                tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
1.103     sommerfe  234:                mp->mnt_wcnt--;
                    235:                gone = mp->mnt_flag & MNT_GONE;
                    236:
                    237:                if (mp->mnt_wcnt == 0)
                    238:                        wakeup(&mp->mnt_wcnt);
1.80      fvdl      239:                if (interlkp)
                    240:                        simple_lock(interlkp);
1.103     sommerfe  241:                if (gone)
                    242:                        return (ENOENT);
1.80      fvdl      243:        }
                    244:        lkflags = LK_SHARED;
                    245:        if (interlkp)
                    246:                lkflags |= LK_INTERLOCK;
                    247:        if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
                    248:                panic("vfs_busy: unexpected lock failure");
1.29      cgd       249:        return (0);
                    250: }
                    251:
                    252: /*
1.80      fvdl      253:  * Free a busy filesystem.
1.29      cgd       254:  */
                    255: void
1.80      fvdl      256: vfs_unbusy(mp)
                    257:        struct mount *mp;
1.29      cgd       258: {
                    259:
1.80      fvdl      260:        lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29      cgd       261: }
                    262:
                    263: /*
1.80      fvdl      264:  * Lookup a filesystem type, and if found allocate and initialize
                    265:  * a mount structure for it.
                    266:  *
                    267:  * Devname is usually updated by mount(8) after booting.
1.29      cgd       268:  */
1.50      christos  269: int
1.80      fvdl      270: vfs_rootmountalloc(fstypename, devname, mpp)
                    271:        char *fstypename;
                    272:        char *devname;
                    273:        struct mount **mpp;
1.29      cgd       274: {
1.80      fvdl      275:        struct vfsops *vfsp = NULL;
                    276:        struct mount *mp;
1.29      cgd       277:
1.152     jdolecek  278:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.80      fvdl      279:                if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
                    280:                        break;
                    281:
                    282:        if (vfsp == NULL)
                    283:                return (ENODEV);
                    284:        mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91      perry     285:        memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80      fvdl      286:        lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
                    287:        (void)vfs_busy(mp, LK_NOWAIT, 0);
                    288:        LIST_INIT(&mp->mnt_vnodelist);
                    289:        mp->mnt_op = vfsp;
                    290:        mp->mnt_flag = MNT_RDONLY;
                    291:        mp->mnt_vnodecovered = NULLVP;
                    292:        vfsp->vfs_refcount++;
                    293:        strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
                    294:        mp->mnt_stat.f_mntonname[0] = '/';
                    295:        (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
                    296:        *mpp = mp;
1.29      cgd       297:        return (0);
                    298: }
                    299:
                    300: /*
                    301:  * Lookup a mount point by filesystem identifier.
                    302:  */
                    303: struct mount *
1.80      fvdl      304: vfs_getvfs(fsid)
1.29      cgd       305:        fsid_t *fsid;
                    306: {
1.123     augustss  307:        struct mount *mp;
1.29      cgd       308:
1.80      fvdl      309:        simple_lock(&mountlist_slock);
1.38      mycroft   310:        for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
1.80      fvdl      311:             mp = mp->mnt_list.cqe_next) {
1.29      cgd       312:                if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
1.80      fvdl      313:                    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
                    314:                        simple_unlock(&mountlist_slock);
1.29      cgd       315:                        return (mp);
1.80      fvdl      316:                }
                    317:        }
                    318:        simple_unlock(&mountlist_slock);
1.29      cgd       319:        return ((struct mount *)0);
                    320: }
                    321:
                    322: /*
                    323:  * Get a new unique fsid
                    324:  */
                    325: void
1.127     assar     326: vfs_getnewfsid(mp)
1.29      cgd       327:        struct mount *mp;
                    328: {
                    329:        static u_short xxxfs_mntid;
                    330:        fsid_t tfsid;
1.80      fvdl      331:        int mtype;
1.29      cgd       332:
1.80      fvdl      333:        simple_lock(&mntid_slock);
1.127     assar     334:        mtype = makefstype(mp->mnt_op->vfs_name);
1.80      fvdl      335:        mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
1.29      cgd       336:        mp->mnt_stat.f_fsid.val[1] = mtype;
                    337:        if (xxxfs_mntid == 0)
                    338:                ++xxxfs_mntid;
1.33      deraadt   339:        tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
1.29      cgd       340:        tfsid.val[1] = mtype;
1.38      mycroft   341:        if (mountlist.cqh_first != (void *)&mountlist) {
1.80      fvdl      342:                while (vfs_getvfs(&tfsid)) {
1.29      cgd       343:                        tfsid.val[0]++;
                    344:                        xxxfs_mntid++;
                    345:                }
                    346:        }
                    347:        mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
1.80      fvdl      348:        simple_unlock(&mntid_slock);
1.29      cgd       349: }
                    350:
                    351: /*
1.30      mycroft   352:  * Make a 'unique' number from a mount type name.
1.29      cgd       353:  */
                    354: long
                    355: makefstype(type)
1.127     assar     356:        const char *type;
1.29      cgd       357: {
                    358:        long rv;
                    359:
                    360:        for (rv = 0; *type; type++) {
                    361:                rv <<= 2;
                    362:                rv ^= *type;
                    363:        }
                    364:        return rv;
                    365: }
1.30      mycroft   366:
1.80      fvdl      367:
1.30      mycroft   368: /*
                    369:  * Set vnode attributes to VNOVAL
                    370:  */
                    371: void
                    372: vattr_null(vap)
1.123     augustss  373:        struct vattr *vap;
1.30      mycroft   374: {
                    375:
                    376:        vap->va_type = VNON;
1.75      enami     377:
                    378:        /*
                    379:         * Assign individually so that it is safe even if size and
                    380:         * sign of each member are varied.
                    381:         */
                    382:        vap->va_mode = VNOVAL;
                    383:        vap->va_nlink = VNOVAL;
                    384:        vap->va_uid = VNOVAL;
                    385:        vap->va_gid = VNOVAL;
                    386:        vap->va_fsid = VNOVAL;
                    387:        vap->va_fileid = VNOVAL;
1.30      mycroft   388:        vap->va_size = VNOVAL;
1.75      enami     389:        vap->va_blocksize = VNOVAL;
1.76      christos  390:        vap->va_atime.tv_sec =
                    391:            vap->va_mtime.tv_sec =
                    392:            vap->va_ctime.tv_sec = VNOVAL;
                    393:        vap->va_atime.tv_nsec =
                    394:            vap->va_mtime.tv_nsec =
                    395:            vap->va_ctime.tv_nsec = VNOVAL;
1.75      enami     396:        vap->va_gen = VNOVAL;
                    397:        vap->va_flags = VNOVAL;
                    398:        vap->va_rdev = VNOVAL;
1.30      mycroft   399:        vap->va_bytes = VNOVAL;
                    400:        vap->va_vaflags = 0;
                    401: }
                    402:
                    403: /*
                    404:  * Routines having to do with the management of the vnode table.
                    405:  */
1.50      christos  406: extern int (**dead_vnodeop_p) __P((void *));
1.30      mycroft   407: long numvnodes;
                    408:
1.29      cgd       409: /*
                    410:  * Return the next vnode from the free list.
                    411:  */
1.50      christos  412: int
1.29      cgd       413: getnewvnode(tag, mp, vops, vpp)
                    414:        enum vtagtype tag;
                    415:        struct mount *mp;
1.50      christos  416:        int (**vops) __P((void *));
1.29      cgd       417:        struct vnode **vpp;
                    418: {
1.142     chs       419:        extern struct uvm_pagerops uvm_vnodeops;
                    420:        struct uvm_object *uobj;
1.80      fvdl      421:        struct proc *p = curproc;       /* XXX */
1.113     fvdl      422:        struct freelst *listhd;
                    423:        static int toggle;
1.80      fvdl      424:        struct vnode *vp;
1.153     thorpej   425:        int error = 0, tryalloc;
1.155.2.4  thorpej   426:
                    427:  try_again:
1.103     sommerfe  428:        if (mp) {
                    429:                /*
1.106     sommerfe  430:                 * Mark filesystem busy while we're creating a vnode.
                    431:                 * If unmount is in progress, this will wait; if the
                    432:                 * unmount succeeds (only if umount -f), this will
                    433:                 * return an error.  If the unmount fails, we'll keep
                    434:                 * going afterwards.
                    435:                 * (This puts the per-mount vnode list logically under
                    436:                 * the protection of the vfs_busy lock).
1.103     sommerfe  437:                 */
1.113     fvdl      438:                error = vfs_busy(mp, LK_RECURSEFAIL, 0);
                    439:                if (error && error != EDEADLK)
1.103     sommerfe  440:                        return error;
                    441:        }
1.29      cgd       442:
1.113     fvdl      443:        /*
                    444:         * We must choose whether to allocate a new vnode or recycle an
                    445:         * existing one. The criterion for allocating a new one is that
                    446:         * the total number of vnodes is less than the number desired or
                    447:         * there are no vnodes on either free list. Generally we only
                    448:         * want to recycle vnodes that have no buffers associated with
                    449:         * them, so we look first on the vnode_free_list. If it is empty,
                    450:         * we next consider vnodes with referencing buffers on the
                    451:         * vnode_hold_list. The toggle ensures that half the time we
                    452:         * will use a buffer from the vnode_hold_list, and half the time
                    453:         * we will allocate a new one unless the list has grown to twice
                    454:         * the desired size. We are reticent to recycle vnodes from the
                    455:         * vnode_hold_list because we will lose the identity of all its
                    456:         * referencing buffers.
                    457:         */
1.142     chs       458:
1.153     thorpej   459:        vp = NULL;
                    460:
                    461:        simple_lock(&vnode_free_list_slock);
                    462:
1.113     fvdl      463:        toggle ^= 1;
                    464:        if (numvnodes > 2 * desiredvnodes)
                    465:                toggle = 0;
                    466:
1.153     thorpej   467:        tryalloc = numvnodes < desiredvnodes ||
1.155.2.4  thorpej   468:            (TAILQ_FIRST(&vnode_free_list) == NULL &&
                    469:             (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153     thorpej   470:
                    471:        if (tryalloc &&
                    472:            (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.80      fvdl      473:                simple_unlock(&vnode_free_list_slock);
1.142     chs       474:                memset(vp, 0, sizeof(*vp));
1.155.2.4  thorpej   475:                simple_lock_init(&vp->v_interlock);
                    476:                uobj = &vp->v_uobj;
                    477:                uobj->pgops = &uvm_vnodeops;
                    478:                uobj->uo_npages = 0;
                    479:                TAILQ_INIT(&uobj->memq);
1.29      cgd       480:                numvnodes++;
                    481:        } else {
1.155.2.4  thorpej   482:                if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL)
                    483:                        vp = TAILQ_FIRST(listhd = &vnode_hold_list);
                    484:                for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) {
1.105     wrstuden  485:                        if (simple_lock_try(&vp->v_interlock)) {
1.155.2.4  thorpej   486:                                if ((vp->v_flag & VLAYER) == 0) {
1.105     wrstuden  487:                                        break;
1.155.2.4  thorpej   488:                                }
1.105     wrstuden  489:                                if (VOP_ISLOCKED(vp) == 0)
                    490:                                        break;
                    491:                                else
                    492:                                        simple_unlock(&vp->v_interlock);
                    493:                        }
1.80      fvdl      494:                }
                    495:                /*
                    496:                 * Unless this is a bad time of the month, at most
                    497:                 * the first NCPUS items on the free list are
                    498:                 * locked, so this is close enough to being empty.
                    499:                 */
                    500:                if (vp == NULLVP) {
                    501:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl      502:                        if (mp && error != EDEADLK)
                    503:                                vfs_unbusy(mp);
1.153     thorpej   504:                        if (tryalloc) {
                    505:                                printf("WARNING: unable to allocate new "
                    506:                                    "vnode, retrying...\n");
                    507:                                (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
                    508:                                goto try_again;
                    509:                        }
1.132     jdolecek  510:                        tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29      cgd       511:                        *vpp = 0;
                    512:                        return (ENFILE);
                    513:                }
1.80      fvdl      514:                if (vp->v_usecount)
1.125     chs       515:                        panic("free vnode isn't, vp %p", vp);
1.113     fvdl      516:                TAILQ_REMOVE(listhd, vp, v_freelist);
1.30      mycroft   517:                /* see comment on why 0xdeadb is set at end of vgone (below) */
1.29      cgd       518:                vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
1.80      fvdl      519:                simple_unlock(&vnode_free_list_slock);
1.30      mycroft   520:                vp->v_lease = NULL;
1.155.2.4  thorpej   521:
1.29      cgd       522:                if (vp->v_type != VBAD)
1.80      fvdl      523:                        vgonel(vp, p);
                    524:                else
                    525:                        simple_unlock(&vp->v_interlock);
1.30      mycroft   526: #ifdef DIAGNOSTIC
1.155.2.4  thorpej   527:                if (vp->v_data || vp->v_uobj.uo_npages ||
                    528:                    TAILQ_FIRST(&vp->v_uobj.memq))
1.125     chs       529:                        panic("cleaned vnode isn't, vp %p", vp);
1.30      mycroft   530:                if (vp->v_numoutput)
1.125     chs       531:                        panic("clean vnode has pending I/O's, vp %p", vp);
1.30      mycroft   532: #endif
1.155.2.4  thorpej   533:                KASSERT((vp->v_flag & VONWORKLST) == 0);
1.29      cgd       534:                vp->v_flag = 0;
1.155.2.4  thorpej   535:                vp->v_socket = NULL;
1.29      cgd       536:        }
                    537:        vp->v_type = VNON;
1.104     wrstuden  538:        vp->v_vnlock = &vp->v_lock;
                    539:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd       540:        cache_purge(vp);
                    541:        vp->v_tag = tag;
                    542:        vp->v_op = vops;
                    543:        insmntque(vp, mp);
1.30      mycroft   544:        *vpp = vp;
1.29      cgd       545:        vp->v_usecount = 1;
1.30      mycroft   546:        vp->v_data = 0;
1.155.2.4  thorpej   547:        simple_lock_init(&vp->v_uobj.vmobjlock);
1.142     chs       548:
                    549:        /*
                    550:         * initialize uvm_object within vnode.
                    551:         */
                    552:
1.155.2.4  thorpej   553:        uobj = &vp->v_uobj;
                    554:        KASSERT(uobj->pgops == &uvm_vnodeops);
                    555:        KASSERT(uobj->uo_npages == 0);
                    556:        KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
                    557:        vp->v_size = VSIZENOTSET;
1.142     chs       558:
1.113     fvdl      559:        if (mp && error != EDEADLK)
                    560:                vfs_unbusy(mp);
1.29      cgd       561:        return (0);
1.130     fvdl      562: }
                    563:
                    564: /*
                    565:  * This is really just the reverse of getnewvnode(). Needed for
                    566:  * VFS_VGET functions who may need to push back a vnode in case
                    567:  * of a locking race.
                    568:  */
                    569: void
1.131     fvdl      570: ungetnewvnode(vp)
1.130     fvdl      571:        struct vnode *vp;
                    572: {
                    573: #ifdef DIAGNOSTIC
                    574:        if (vp->v_usecount != 1)
1.131     fvdl      575:                panic("ungetnewvnode: busy vnode");
1.130     fvdl      576: #endif
                    577:        vp->v_usecount--;
                    578:        insmntque(vp, NULL);
                    579:        vp->v_type = VBAD;
                    580:
                    581:        simple_lock(&vp->v_interlock);
                    582:        /*
                    583:         * Insert at head of LRU list
                    584:         */
                    585:        simple_lock(&vnode_free_list_slock);
                    586:        if (vp->v_holdcnt > 0)
                    587:                TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
                    588:        else
                    589:                TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                    590:        simple_unlock(&vnode_free_list_slock);
                    591:        simple_unlock(&vp->v_interlock);
1.29      cgd       592: }
                    593:
                    594: /*
                    595:  * Move a vnode from one mount queue to another.
                    596:  */
1.50      christos  597: void
1.29      cgd       598: insmntque(vp, mp)
1.123     augustss  599:        struct vnode *vp;
                    600:        struct mount *mp;
1.29      cgd       601: {
                    602:
1.103     sommerfe  603: #ifdef DIAGNOSTIC
                    604:        if ((mp != NULL) &&
1.113     fvdl      605:            (mp->mnt_flag & MNT_UNMOUNT) &&
                    606:            !(mp->mnt_flag & MNT_SOFTDEP) &&
                    607:            vp->v_tag != VT_VFS) {
1.103     sommerfe  608:                panic("insmntque into dying filesystem");
                    609:        }
                    610: #endif
                    611:
1.80      fvdl      612:        simple_lock(&mntvnode_slock);
1.29      cgd       613:        /*
                    614:         * Delete from old mount point vnode list, if on one.
                    615:         */
                    616:        if (vp->v_mount != NULL)
                    617:                LIST_REMOVE(vp, v_mntvnodes);
                    618:        /*
                    619:         * Insert into list of vnodes for the new mount point, if available.
                    620:         */
1.80      fvdl      621:        if ((vp->v_mount = mp) != NULL)
                    622:                LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
                    623:        simple_unlock(&mntvnode_slock);
1.29      cgd       624: }
                    625:
                    626: /*
                    627:  * Update outstanding I/O count and do wakeup if requested.
                    628:  */
1.50      christos  629: void
1.29      cgd       630: vwakeup(bp)
1.123     augustss  631:        struct buf *bp;
1.29      cgd       632: {
1.123     augustss  633:        struct vnode *vp;
1.29      cgd       634:
1.50      christos  635:        if ((vp = bp->b_vp) != NULL) {
1.30      mycroft   636:                if (--vp->v_numoutput < 0)
1.125     chs       637:                        panic("vwakeup: neg numoutput, vp %p", vp);
1.29      cgd       638:                if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
                    639:                        vp->v_flag &= ~VBWAIT;
                    640:                        wakeup((caddr_t)&vp->v_numoutput);
                    641:                }
                    642:        }
                    643: }
                    644:
                    645: /*
                    646:  * Flush out and invalidate all buffers associated with a vnode.
1.126     mycroft   647:  * Called with the underlying vnode locked, which should prevent new dirty
                    648:  * buffers from being queued.
1.29      cgd       649:  */
1.30      mycroft   650: int
                    651: vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1.123     augustss  652:        struct vnode *vp;
1.30      mycroft   653:        int flags;
                    654:        struct ucred *cred;
                    655:        struct proc *p;
                    656:        int slpflag, slptimeo;
1.29      cgd       657: {
1.126     mycroft   658:        struct buf *bp, *nbp;
1.155.2.4  thorpej   659:        int s, error;
                    660:        int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
1.142     chs       661:                (flags & V_SAVE ? PGO_CLEANIT : 0);
                    662:
                    663:        /* XXXUBC this doesn't look at flags or slp* */
1.155.2.4  thorpej   664:        simple_lock(&vp->v_interlock);
                    665:        error = VOP_PUTPAGES(vp, 0, 0, flushflags);
                    666:        if (error) {
                    667:                return error;
1.142     chs       668:        }
1.155.2.4  thorpej   669:
1.30      mycroft   670:        if (flags & V_SAVE) {
1.140     fvdl      671:                error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
1.126     mycroft   672:                if (error)
1.122     fvdl      673:                        return (error);
1.126     mycroft   674: #ifdef DIAGNOSTIC
1.122     fvdl      675:                s = splbio();
1.126     mycroft   676:                if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
1.125     chs       677:                        panic("vinvalbuf: dirty bufs, vp %p", vp);
1.113     fvdl      678:                splx(s);
1.126     mycroft   679: #endif
1.30      mycroft   680:        }
1.113     fvdl      681:
1.115     fvdl      682:        s = splbio();
                    683:
1.126     mycroft   684: restart:
                    685:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    686:                nbp = LIST_NEXT(bp, b_vnbufs);
                    687:                if (bp->b_flags & B_BUSY) {
                    688:                        bp->b_flags |= B_WANTED;
                    689:                        error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
                    690:                            "vinvalbuf", slptimeo);
                    691:                        if (error) {
                    692:                                splx(s);
                    693:                                return (error);
                    694:                        }
                    695:                        goto restart;
1.113     fvdl      696:                }
1.126     mycroft   697:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    698:                brelse(bp);
                    699:        }
1.30      mycroft   700:
1.126     mycroft   701:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    702:                nbp = LIST_NEXT(bp, b_vnbufs);
                    703:                if (bp->b_flags & B_BUSY) {
                    704:                        bp->b_flags |= B_WANTED;
                    705:                        error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
                    706:                            "vinvalbuf", slptimeo);
                    707:                        if (error) {
                    708:                                splx(s);
                    709:                                return (error);
1.29      cgd       710:                        }
1.126     mycroft   711:                        goto restart;
                    712:                }
                    713:                /*
                    714:                 * XXX Since there are no node locks for NFS, I believe
                    715:                 * there is a slight chance that a delayed write will
                    716:                 * occur while sleeping just above, so check for it.
                    717:                 */
                    718:                if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
                    719: #ifdef DEBUG
                    720:                        printf("buffer still DELWRI\n");
                    721: #endif
1.63      mycroft   722:                        bp->b_flags |= B_BUSY | B_VFLUSH;
1.126     mycroft   723:                        VOP_BWRITE(bp);
                    724:                        goto restart;
                    725:                }
                    726:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    727:                brelse(bp);
                    728:        }
                    729:
                    730: #ifdef DIAGNOSTIC
                    731:        if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
                    732:                panic("vinvalbuf: flush failed, vp %p", vp);
1.113     fvdl      733: #endif
1.126     mycroft   734:
                    735:        splx(s);
                    736:
                    737:        return (0);
                    738: }
                    739:
                    740: /*
                    741:  * Destroy any in core blocks past the truncation length.
                    742:  * Called with the underlying vnode locked, which should prevent new dirty
                    743:  * buffers from being queued.
                    744:  */
                    745: int
                    746: vtruncbuf(vp, lbn, slpflag, slptimeo)
                    747:        struct vnode *vp;
                    748:        daddr_t lbn;
                    749:        int slpflag, slptimeo;
                    750: {
                    751:        struct buf *bp, *nbp;
1.155.2.4  thorpej   752:        int s, error;
                    753:        voff_t off;
1.126     mycroft   754:
1.155.2.4  thorpej   755:        off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
                    756:        simple_lock(&vp->v_interlock);
                    757:        error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
                    758:        if (error) {
                    759:                return error;
1.142     chs       760:        }
1.126     mycroft   761:
1.155.2.4  thorpej   762:        s = splbio();
                    763:
1.126     mycroft   764: restart:
                    765:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    766:                nbp = LIST_NEXT(bp, b_vnbufs);
                    767:                if (bp->b_lblkno < lbn)
                    768:                        continue;
                    769:                if (bp->b_flags & B_BUSY) {
                    770:                        bp->b_flags |= B_WANTED;
1.142     chs       771:                        error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126     mycroft   772:                            "vtruncbuf", slptimeo);
                    773:                        if (error) {
                    774:                                splx(s);
                    775:                                return (error);
1.29      cgd       776:                        }
1.126     mycroft   777:                        goto restart;
1.29      cgd       778:                }
1.126     mycroft   779:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    780:                brelse(bp);
1.29      cgd       781:        }
1.115     fvdl      782:
1.126     mycroft   783:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    784:                nbp = LIST_NEXT(bp, b_vnbufs);
                    785:                if (bp->b_lblkno < lbn)
                    786:                        continue;
                    787:                if (bp->b_flags & B_BUSY) {
                    788:                        bp->b_flags |= B_WANTED;
1.142     chs       789:                        error = tsleep(bp, slpflag | (PRIBIO + 1),
1.126     mycroft   790:                            "vtruncbuf", slptimeo);
                    791:                        if (error) {
                    792:                                splx(s);
                    793:                                return (error);
                    794:                        }
                    795:                        goto restart;
                    796:                }
                    797:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
                    798:                brelse(bp);
                    799:        }
1.115     fvdl      800:
                    801:        splx(s);
                    802:
1.30      mycroft   803:        return (0);
                    804: }
                    805:
                    806: void
                    807: vflushbuf(vp, sync)
1.123     augustss  808:        struct vnode *vp;
1.30      mycroft   809:        int sync;
                    810: {
1.123     augustss  811:        struct buf *bp, *nbp;
1.155.2.4  thorpej   812:        int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
1.30      mycroft   813:        int s;
                    814:
1.155.2.4  thorpej   815:        simple_lock(&vp->v_interlock);
                    816:        (void) VOP_PUTPAGES(vp, 0, 0, flags);
1.142     chs       817:
1.30      mycroft   818: loop:
                    819:        s = splbio();
1.126     mycroft   820:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    821:                nbp = LIST_NEXT(bp, b_vnbufs);
1.30      mycroft   822:                if ((bp->b_flags & B_BUSY))
                    823:                        continue;
                    824:                if ((bp->b_flags & B_DELWRI) == 0)
1.125     chs       825:                        panic("vflushbuf: not dirty, bp %p", bp);
1.63      mycroft   826:                bp->b_flags |= B_BUSY | B_VFLUSH;
1.30      mycroft   827:                splx(s);
                    828:                /*
                    829:                 * Wait for I/O associated with indirect blocks to complete,
                    830:                 * since there is no way to quickly wait for them below.
                    831:                 */
                    832:                if (bp->b_vp == vp || sync == 0)
                    833:                        (void) bawrite(bp);
                    834:                else
                    835:                        (void) bwrite(bp);
                    836:                goto loop;
                    837:        }
                    838:        if (sync == 0) {
                    839:                splx(s);
                    840:                return;
                    841:        }
                    842:        while (vp->v_numoutput) {
                    843:                vp->v_flag |= VBWAIT;
                    844:                tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
                    845:        }
                    846:        splx(s);
1.126     mycroft   847:        if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1.30      mycroft   848:                vprint("vflushbuf: dirty", vp);
                    849:                goto loop;
                    850:        }
1.29      cgd       851: }
                    852:
                    853: /*
                    854:  * Associate a buffer with a vnode.
                    855:  */
1.50      christos  856: void
1.29      cgd       857: bgetvp(vp, bp)
1.123     augustss  858:        struct vnode *vp;
                    859:        struct buf *bp;
1.29      cgd       860: {
1.115     fvdl      861:        int s;
1.29      cgd       862:
                    863:        if (bp->b_vp)
1.125     chs       864:                panic("bgetvp: not free, bp %p", bp);
1.29      cgd       865:        VHOLD(vp);
1.115     fvdl      866:        s = splbio();
1.29      cgd       867:        bp->b_vp = vp;
                    868:        if (vp->v_type == VBLK || vp->v_type == VCHR)
                    869:                bp->b_dev = vp->v_rdev;
                    870:        else
                    871:                bp->b_dev = NODEV;
                    872:        /*
                    873:         * Insert onto list for new vnode.
                    874:         */
                    875:        bufinsvn(bp, &vp->v_cleanblkhd);
1.115     fvdl      876:        splx(s);
1.29      cgd       877: }
                    878:
                    879: /*
                    880:  * Disassociate a buffer from a vnode.
                    881:  */
1.50      christos  882: void
1.29      cgd       883: brelvp(bp)
1.123     augustss  884:        struct buf *bp;
1.29      cgd       885: {
                    886:        struct vnode *vp;
1.115     fvdl      887:        int s;
1.29      cgd       888:
1.125     chs       889:        if (bp->b_vp == NULL)
                    890:                panic("brelvp: vp NULL, bp %p", bp);
1.115     fvdl      891:
                    892:        s = splbio();
1.113     fvdl      893:        vp = bp->b_vp;
1.29      cgd       894:        /*
                    895:         * Delete from old vnode list, if on one.
                    896:         */
                    897:        if (bp->b_vnbufs.le_next != NOLIST)
                    898:                bufremvn(bp);
1.142     chs       899:
1.155.2.4  thorpej   900:        if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
1.142     chs       901:            LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1.113     fvdl      902:                vp->v_flag &= ~VONWORKLST;
                    903:                LIST_REMOVE(vp, v_synclist);
                    904:        }
1.142     chs       905:
                    906:        bp->b_vp = NULL;
1.29      cgd       907:        HOLDRELE(vp);
1.115     fvdl      908:        splx(s);
1.29      cgd       909: }
                    910:
                    911: /*
                    912:  * Reassign a buffer from one vnode to another.
                    913:  * Used to assign file specific control information
                    914:  * (indirect blocks) to the vnode to which they belong.
1.115     fvdl      915:  *
                    916:  * This function must be called at splbio().
1.29      cgd       917:  */
1.50      christos  918: void
1.29      cgd       919: reassignbuf(bp, newvp)
1.113     fvdl      920:        struct buf *bp;
                    921:        struct vnode *newvp;
1.29      cgd       922: {
1.113     fvdl      923:        struct buflists *listheadp;
                    924:        int delay;
1.29      cgd       925:
                    926:        /*
                    927:         * Delete from old vnode list, if on one.
                    928:         */
                    929:        if (bp->b_vnbufs.le_next != NOLIST)
                    930:                bufremvn(bp);
                    931:        /*
                    932:         * If dirty, put on list of dirty buffers;
                    933:         * otherwise insert onto list of clean buffers.
                    934:         */
1.113     fvdl      935:        if ((bp->b_flags & B_DELWRI) == 0) {
                    936:                listheadp = &newvp->v_cleanblkhd;
1.155.2.4  thorpej   937:                if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1.142     chs       938:                    (newvp->v_flag & VONWORKLST) &&
1.113     fvdl      939:                    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
                    940:                        newvp->v_flag &= ~VONWORKLST;
                    941:                        LIST_REMOVE(newvp, v_synclist);
                    942:                }
                    943:        } else {
1.29      cgd       944:                listheadp = &newvp->v_dirtyblkhd;
1.113     fvdl      945:                if ((newvp->v_flag & VONWORKLST) == 0) {
                    946:                        switch (newvp->v_type) {
                    947:                        case VDIR:
                    948:                                delay = dirdelay;
                    949:                                break;
                    950:                        case VBLK:
                    951:                                if (newvp->v_specmountpoint != NULL) {
                    952:                                        delay = metadelay;
                    953:                                        break;
                    954:                                }
                    955:                                /* fall through */
                    956:                        default:
1.118     mycroft   957:                                delay = filedelay;
                    958:                                break;
1.113     fvdl      959:                        }
1.118     mycroft   960:                        if (!newvp->v_mount ||
                    961:                            (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
                    962:                                vn_syncer_add_to_worklist(newvp, delay);
1.113     fvdl      963:                }
                    964:        }
1.29      cgd       965:        bufinsvn(bp, listheadp);
                    966: }
                    967:
                    968: /*
                    969:  * Create a vnode for a block device.
1.59      thorpej   970:  * Used for root filesystem and swap areas.
1.29      cgd       971:  * Also used for memory file system special devices.
                    972:  */
1.50      christos  973: int
1.29      cgd       974: bdevvp(dev, vpp)
                    975:        dev_t dev;
                    976:        struct vnode **vpp;
                    977: {
1.30      mycroft   978:
                    979:        return (getdevvp(dev, vpp, VBLK));
1.29      cgd       980: }
                    981:
                    982: /*
                    983:  * Create a vnode for a character device.
                    984:  * Used for kernfs and some console handling.
                    985:  */
1.50      christos  986: int
1.29      cgd       987: cdevvp(dev, vpp)
                    988:        dev_t dev;
                    989:        struct vnode **vpp;
                    990: {
1.30      mycroft   991:
                    992:        return (getdevvp(dev, vpp, VCHR));
1.29      cgd       993: }
                    994:
                    995: /*
                    996:  * Create a vnode for a device.
                    997:  * Used by bdevvp (block device) for root file system etc.,
                    998:  * and by cdevvp (character device) for console and kernfs.
                    999:  */
1.50      christos 1000: int
1.29      cgd      1001: getdevvp(dev, vpp, type)
                   1002:        dev_t dev;
                   1003:        struct vnode **vpp;
                   1004:        enum vtype type;
                   1005: {
1.123     augustss 1006:        struct vnode *vp;
1.29      cgd      1007:        struct vnode *nvp;
                   1008:        int error;
                   1009:
1.80      fvdl     1010:        if (dev == NODEV) {
                   1011:                *vpp = NULLVP;
1.29      cgd      1012:                return (0);
1.80      fvdl     1013:        }
1.50      christos 1014:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29      cgd      1015:        if (error) {
                   1016:                *vpp = NULLVP;
                   1017:                return (error);
                   1018:        }
                   1019:        vp = nvp;
                   1020:        vp->v_type = type;
1.50      christos 1021:        if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29      cgd      1022:                vput(vp);
                   1023:                vp = nvp;
                   1024:        }
                   1025:        *vpp = vp;
                   1026:        return (0);
                   1027: }
                   1028:
                   1029: /*
                   1030:  * Check to see if the new vnode represents a special device
                   1031:  * for which we already have a vnode (either because of
                   1032:  * bdevvp() or because of a different vnode representing
                   1033:  * the same block device). If such an alias exists, deallocate
                   1034:  * the existing contents and return the aliased vnode. The
                   1035:  * caller is responsible for filling it with its new contents.
                   1036:  */
                   1037: struct vnode *
                   1038: checkalias(nvp, nvp_rdev, mp)
1.123     augustss 1039:        struct vnode *nvp;
1.29      cgd      1040:        dev_t nvp_rdev;
                   1041:        struct mount *mp;
                   1042: {
1.80      fvdl     1043:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1044:        struct vnode *vp;
1.29      cgd      1045:        struct vnode **vpp;
                   1046:
                   1047:        if (nvp->v_type != VBLK && nvp->v_type != VCHR)
                   1048:                return (NULLVP);
                   1049:
                   1050:        vpp = &speclisth[SPECHASH(nvp_rdev)];
                   1051: loop:
1.80      fvdl     1052:        simple_lock(&spechash_slock);
1.29      cgd      1053:        for (vp = *vpp; vp; vp = vp->v_specnext) {
                   1054:                if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
                   1055:                        continue;
                   1056:                /*
                   1057:                 * Alias, but not in use, so flush it out.
                   1058:                 */
1.80      fvdl     1059:                simple_lock(&vp->v_interlock);
1.29      cgd      1060:                if (vp->v_usecount == 0) {
1.80      fvdl     1061:                        simple_unlock(&spechash_slock);
                   1062:                        vgonel(vp, p);
1.29      cgd      1063:                        goto loop;
                   1064:                }
1.80      fvdl     1065:                if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
                   1066:                        simple_unlock(&spechash_slock);
1.29      cgd      1067:                        goto loop;
1.80      fvdl     1068:                }
1.29      cgd      1069:                break;
                   1070:        }
1.34      cgd      1071:        if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29      cgd      1072:                MALLOC(nvp->v_specinfo, struct specinfo *,
1.150     thorpej  1073:                        sizeof(struct specinfo), M_VNODE, M_NOWAIT);
                   1074:                /* XXX Erg. */
                   1075:                if (nvp->v_specinfo == NULL) {
                   1076:                        simple_unlock(&spechash_slock);
                   1077:                        uvm_wait("checkalias");
                   1078:                        goto loop;
                   1079:                }
                   1080:
1.29      cgd      1081:                nvp->v_rdev = nvp_rdev;
                   1082:                nvp->v_hashchain = vpp;
                   1083:                nvp->v_specnext = *vpp;
1.113     fvdl     1084:                nvp->v_specmountpoint = NULL;
1.80      fvdl     1085:                simple_unlock(&spechash_slock);
1.62      kleink   1086:                nvp->v_speclockf = NULL;
1.29      cgd      1087:                *vpp = nvp;
1.80      fvdl     1088:                if (vp != NULLVP) {
1.29      cgd      1089:                        nvp->v_flag |= VALIASED;
                   1090:                        vp->v_flag |= VALIASED;
                   1091:                        vput(vp);
                   1092:                }
                   1093:                return (NULLVP);
                   1094:        }
1.80      fvdl     1095:        simple_unlock(&spechash_slock);
                   1096:        VOP_UNLOCK(vp, 0);
                   1097:        simple_lock(&vp->v_interlock);
                   1098:        vclean(vp, 0, p);
1.29      cgd      1099:        vp->v_op = nvp->v_op;
                   1100:        vp->v_tag = nvp->v_tag;
1.104     wrstuden 1101:        vp->v_vnlock = &vp->v_lock;
                   1102:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd      1103:        nvp->v_type = VNON;
                   1104:        insmntque(vp, mp);
                   1105:        return (vp);
                   1106: }
                   1107:
                   1108: /*
                   1109:  * Grab a particular vnode from the free list, increment its
1.83      fvdl     1110:  * reference count and lock it. If the vnode lock bit is set the
                   1111:  * vnode is being eliminated in vgone. In that case, we can not
                   1112:  * grab the vnode, so the process is awakened when the transition is
                   1113:  * completed, and an error returned to indicate that the vnode is no
                   1114:  * longer usable (possibly having been changed to a new file system type).
1.29      cgd      1115:  */
1.30      mycroft  1116: int
1.80      fvdl     1117: vget(vp, flags)
                   1118:        struct vnode *vp;
                   1119:        int flags;
1.29      cgd      1120: {
1.80      fvdl     1121:        int error;
1.29      cgd      1122:
1.30      mycroft  1123:        /*
                   1124:         * If the vnode is in the process of being cleaned out for
                   1125:         * another use, we wait for the cleaning to finish and then
1.80      fvdl     1126:         * return failure. Cleaning is determined by checking that
                   1127:         * the VXLOCK flag is set.
                   1128:         */
1.142     chs      1129:
1.80      fvdl     1130:        if ((flags & LK_INTERLOCK) == 0)
                   1131:                simple_lock(&vp->v_interlock);
                   1132:        if (vp->v_flag & VXLOCK) {
1.142     chs      1133:                if (flags & LK_NOWAIT) {
1.143     sommerfe 1134:                        simple_unlock(&vp->v_interlock);
1.142     chs      1135:                        return EBUSY;
                   1136:                }
1.29      cgd      1137:                vp->v_flag |= VXWANT;
1.155.2.4  thorpej  1138:                ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1.80      fvdl     1139:                return (ENOENT);
1.29      cgd      1140:        }
1.80      fvdl     1141:        if (vp->v_usecount == 0) {
                   1142:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1143:                if (vp->v_holdcnt > 0)
                   1144:                        TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1145:                else
                   1146:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1147:                simple_unlock(&vnode_free_list_slock);
                   1148:        }
1.29      cgd      1149:        vp->v_usecount++;
1.112     mycroft  1150: #ifdef DIAGNOSTIC
                   1151:        if (vp->v_usecount == 0) {
                   1152:                vprint("vget", vp);
1.125     chs      1153:                panic("vget: usecount overflow, vp %p", vp);
1.112     mycroft  1154:        }
                   1155: #endif
1.80      fvdl     1156:        if (flags & LK_TYPE_MASK) {
1.113     fvdl     1157:                if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
                   1158:                        /*
                   1159:                         * must expand vrele here because we do not want
                   1160:                         * to call VOP_INACTIVE if the reference count
                   1161:                         * drops back to zero since it was never really
                   1162:                         * active. We must remove it from the free list
                   1163:                         * before sleeping so that multiple processes do
                   1164:                         * not try to recycle it.
                   1165:                         */
                   1166:                        simple_lock(&vp->v_interlock);
                   1167:                        vp->v_usecount--;
                   1168:                        if (vp->v_usecount > 0) {
                   1169:                                simple_unlock(&vp->v_interlock);
                   1170:                                return (error);
                   1171:                        }
                   1172:                        /*
                   1173:                         * insert at tail of LRU list
                   1174:                         */
                   1175:                        simple_lock(&vnode_free_list_slock);
                   1176:                        if (vp->v_holdcnt > 0)
                   1177:                                TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
                   1178:                                    v_freelist);
                   1179:                        else
                   1180:                                TAILQ_INSERT_TAIL(&vnode_free_list, vp,
                   1181:                                    v_freelist);
                   1182:                        simple_unlock(&vnode_free_list_slock);
                   1183:                        simple_unlock(&vp->v_interlock);
                   1184:                }
1.80      fvdl     1185:                return (error);
                   1186:        }
                   1187:        simple_unlock(&vp->v_interlock);
1.29      cgd      1188:        return (0);
                   1189: }
                   1190:
                   1191: /*
                   1192:  * vput(), just unlock and vrele()
                   1193:  */
                   1194: void
                   1195: vput(vp)
1.80      fvdl     1196:        struct vnode *vp;
1.29      cgd      1197: {
1.80      fvdl     1198:        struct proc *p = curproc;       /* XXX */
1.30      mycroft  1199:
1.111     mycroft  1200: #ifdef DIAGNOSTIC
1.80      fvdl     1201:        if (vp == NULL)
                   1202:                panic("vput: null vp");
                   1203: #endif
                   1204:        simple_lock(&vp->v_interlock);
                   1205:        vp->v_usecount--;
                   1206:        if (vp->v_usecount > 0) {
                   1207:                simple_unlock(&vp->v_interlock);
                   1208:                VOP_UNLOCK(vp, 0);
                   1209:                return;
                   1210:        }
                   1211: #ifdef DIAGNOSTIC
                   1212:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1213:                vprint("vput: bad ref count", vp);
                   1214:                panic("vput: ref cnt");
                   1215:        }
                   1216: #endif
                   1217:        /*
1.87      pk       1218:         * Insert at tail of LRU list.
1.80      fvdl     1219:         */
                   1220:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1221:        if (vp->v_holdcnt > 0)
                   1222:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1223:        else
                   1224:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1225:        simple_unlock(&vnode_free_list_slock);
1.155.2.4  thorpej  1226:        if (vp->v_flag & VEXECMAP) {
                   1227:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1228:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1229:        }
1.155.2.4  thorpej  1230:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80      fvdl     1231:        simple_unlock(&vp->v_interlock);
                   1232:        VOP_INACTIVE(vp, p);
1.29      cgd      1233: }
                   1234:
                   1235: /*
                   1236:  * Vnode release.
                   1237:  * If count drops to zero, call inactive routine and return to freelist.
                   1238:  */
                   1239: void
                   1240: vrele(vp)
1.80      fvdl     1241:        struct vnode *vp;
1.29      cgd      1242: {
1.80      fvdl     1243:        struct proc *p = curproc;       /* XXX */
1.29      cgd      1244:
                   1245: #ifdef DIAGNOSTIC
                   1246:        if (vp == NULL)
                   1247:                panic("vrele: null vp");
                   1248: #endif
1.80      fvdl     1249:        simple_lock(&vp->v_interlock);
1.29      cgd      1250:        vp->v_usecount--;
1.80      fvdl     1251:        if (vp->v_usecount > 0) {
                   1252:                simple_unlock(&vp->v_interlock);
1.29      cgd      1253:                return;
1.80      fvdl     1254:        }
1.29      cgd      1255: #ifdef DIAGNOSTIC
1.80      fvdl     1256:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29      cgd      1257:                vprint("vrele: bad ref count", vp);
1.142     chs      1258:                panic("vrele: ref cnt vp %p", vp);
1.29      cgd      1259:        }
                   1260: #endif
1.30      mycroft  1261:        /*
1.87      pk       1262:         * Insert at tail of LRU list.
1.30      mycroft  1263:         */
1.80      fvdl     1264:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1265:        if (vp->v_holdcnt > 0)
                   1266:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1267:        else
                   1268:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1269:        simple_unlock(&vnode_free_list_slock);
1.155.2.4  thorpej  1270:        if (vp->v_flag & VEXECMAP) {
                   1271:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1272:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1273:        }
1.155.2.4  thorpej  1274:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80      fvdl     1275:        if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
                   1276:                VOP_INACTIVE(vp, p);
1.29      cgd      1277: }
                   1278:
1.80      fvdl     1279: #ifdef DIAGNOSTIC
1.29      cgd      1280: /*
                   1281:  * Page or buffer structure gets a reference.
                   1282:  */
1.30      mycroft  1283: void
1.29      cgd      1284: vhold(vp)
1.123     augustss 1285:        struct vnode *vp;
1.29      cgd      1286: {
                   1287:
1.113     fvdl     1288:        /*
                   1289:         * If it is on the freelist and the hold count is currently
                   1290:         * zero, move it to the hold list. The test of the back
                   1291:         * pointer and the use reference count of zero is because
                   1292:         * it will be removed from a free list by getnewvnode,
                   1293:         * but will not have its reference count incremented until
                   1294:         * after calling vgone. If the reference count were
                   1295:         * incremented first, vgone would (incorrectly) try to
                   1296:         * close the previous instance of the underlying object.
                   1297:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1298:         * getnewvnode after removing it from a freelist to ensure
                   1299:         * that we do not try to move it here.
                   1300:         */
                   1301:        simple_lock(&vp->v_interlock);
                   1302:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1303:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1304:                simple_lock(&vnode_free_list_slock);
                   1305:                TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1306:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1307:                simple_unlock(&vnode_free_list_slock);
                   1308:        }
1.29      cgd      1309:        vp->v_holdcnt++;
1.80      fvdl     1310:        simple_unlock(&vp->v_interlock);
1.29      cgd      1311: }
                   1312:
                   1313: /*
                   1314:  * Page or buffer structure frees a reference.
                   1315:  */
1.30      mycroft  1316: void
1.29      cgd      1317: holdrele(vp)
1.123     augustss 1318:        struct vnode *vp;
1.29      cgd      1319: {
                   1320:
1.80      fvdl     1321:        simple_lock(&vp->v_interlock);
1.29      cgd      1322:        if (vp->v_holdcnt <= 0)
1.125     chs      1323:                panic("holdrele: holdcnt vp %p", vp);
1.29      cgd      1324:        vp->v_holdcnt--;
1.142     chs      1325:
1.113     fvdl     1326:        /*
                   1327:         * If it is on the holdlist and the hold count drops to
                   1328:         * zero, move it to the free list. The test of the back
                   1329:         * pointer and the use reference count of zero is because
                   1330:         * it will be removed from a free list by getnewvnode,
                   1331:         * but will not have its reference count incremented until
                   1332:         * after calling vgone. If the reference count were
                   1333:         * incremented first, vgone would (incorrectly) try to
                   1334:         * close the previous instance of the underlying object.
                   1335:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1336:         * getnewvnode after removing it from a freelist to ensure
                   1337:         * that we do not try to move it here.
                   1338:         */
1.142     chs      1339:
1.113     fvdl     1340:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1341:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1342:                simple_lock(&vnode_free_list_slock);
                   1343:                TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1344:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1345:                simple_unlock(&vnode_free_list_slock);
                   1346:        }
1.81      ross     1347:        simple_unlock(&vp->v_interlock);
                   1348: }
                   1349:
                   1350: /*
                   1351:  * Vnode reference.
                   1352:  */
                   1353: void
                   1354: vref(vp)
                   1355:        struct vnode *vp;
                   1356: {
                   1357:
                   1358:        simple_lock(&vp->v_interlock);
                   1359:        if (vp->v_usecount <= 0)
1.125     chs      1360:                panic("vref used where vget required, vp %p", vp);
1.81      ross     1361:        vp->v_usecount++;
1.112     mycroft  1362: #ifdef DIAGNOSTIC
                   1363:        if (vp->v_usecount == 0) {
                   1364:                vprint("vref", vp);
1.125     chs      1365:                panic("vref: usecount overflow, vp %p", vp);
1.112     mycroft  1366:        }
                   1367: #endif
1.80      fvdl     1368:        simple_unlock(&vp->v_interlock);
1.29      cgd      1369: }
1.80      fvdl     1370: #endif /* DIAGNOSTIC */
1.29      cgd      1371:
                   1372: /*
                   1373:  * Remove any vnodes in the vnode table belonging to mount point mp.
                   1374:  *
                   1375:  * If MNT_NOFORCE is specified, there should not be any active ones,
                   1376:  * return error if any are found (nb: this is a user error, not a
                   1377:  * system error). If MNT_FORCE is specified, detach any active vnodes
                   1378:  * that are found.
                   1379:  */
1.30      mycroft  1380: #ifdef DEBUG
                   1381: int busyprt = 0;       /* print out busy vnodes */
                   1382: struct ctldebug debug1 = { "busyprt", &busyprt };
                   1383: #endif
1.29      cgd      1384:
1.50      christos 1385: int
1.29      cgd      1386: vflush(mp, skipvp, flags)
                   1387:        struct mount *mp;
                   1388:        struct vnode *skipvp;
                   1389:        int flags;
                   1390: {
1.80      fvdl     1391:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1392:        struct vnode *vp, *nvp;
1.29      cgd      1393:        int busy = 0;
                   1394:
1.80      fvdl     1395:        simple_lock(&mntvnode_slock);
1.29      cgd      1396: loop:
                   1397:        for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
                   1398:                if (vp->v_mount != mp)
                   1399:                        goto loop;
                   1400:                nvp = vp->v_mntvnodes.le_next;
                   1401:                /*
                   1402:                 * Skip over a selected vnode.
                   1403:                 */
                   1404:                if (vp == skipvp)
                   1405:                        continue;
1.80      fvdl     1406:                simple_lock(&vp->v_interlock);
1.29      cgd      1407:                /*
                   1408:                 * Skip over a vnodes marked VSYSTEM.
                   1409:                 */
1.80      fvdl     1410:                if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
                   1411:                        simple_unlock(&vp->v_interlock);
1.29      cgd      1412:                        continue;
1.80      fvdl     1413:                }
1.29      cgd      1414:                /*
1.30      mycroft  1415:                 * If WRITECLOSE is set, only flush out regular file
                   1416:                 * vnodes open for writing.
                   1417:                 */
                   1418:                if ((flags & WRITECLOSE) &&
1.92      thorpej  1419:                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
                   1420:                        simple_unlock(&vp->v_interlock);
1.30      mycroft  1421:                        continue;
1.92      thorpej  1422:                }
1.30      mycroft  1423:                /*
1.29      cgd      1424:                 * With v_usecount == 0, all we need to do is clear
                   1425:                 * out the vnode data structures and we are done.
                   1426:                 */
                   1427:                if (vp->v_usecount == 0) {
1.80      fvdl     1428:                        simple_unlock(&mntvnode_slock);
                   1429:                        vgonel(vp, p);
                   1430:                        simple_lock(&mntvnode_slock);
1.29      cgd      1431:                        continue;
                   1432:                }
                   1433:                /*
1.30      mycroft  1434:                 * If FORCECLOSE is set, forcibly close the vnode.
1.29      cgd      1435:                 * For block or character devices, revert to an
                   1436:                 * anonymous device. For all other files, just kill them.
                   1437:                 */
                   1438:                if (flags & FORCECLOSE) {
1.80      fvdl     1439:                        simple_unlock(&mntvnode_slock);
1.29      cgd      1440:                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.80      fvdl     1441:                                vgonel(vp, p);
1.29      cgd      1442:                        } else {
1.80      fvdl     1443:                                vclean(vp, 0, p);
1.30      mycroft  1444:                                vp->v_op = spec_vnodeop_p;
1.29      cgd      1445:                                insmntque(vp, (struct mount *)0);
                   1446:                        }
1.80      fvdl     1447:                        simple_lock(&mntvnode_slock);
1.29      cgd      1448:                        continue;
                   1449:                }
1.30      mycroft  1450: #ifdef DEBUG
1.29      cgd      1451:                if (busyprt)
                   1452:                        vprint("vflush: busy vnode", vp);
1.30      mycroft  1453: #endif
1.80      fvdl     1454:                simple_unlock(&vp->v_interlock);
1.29      cgd      1455:                busy++;
                   1456:        }
1.80      fvdl     1457:        simple_unlock(&mntvnode_slock);
1.29      cgd      1458:        if (busy)
                   1459:                return (EBUSY);
                   1460:        return (0);
                   1461: }
1.155.2.1  lukem    1462:
1.29      cgd      1463: /*
                   1464:  * Disassociate the underlying file system from a vnode.
                   1465:  */
                   1466: void
1.80      fvdl     1467: vclean(vp, flags, p)
1.123     augustss 1468:        struct vnode *vp;
1.29      cgd      1469:        int flags;
1.80      fvdl     1470:        struct proc *p;
1.29      cgd      1471: {
                   1472:        int active;
                   1473:
1.155.2.4  thorpej  1474:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1475:
1.29      cgd      1476:        /*
                   1477:         * Check to see if the vnode is in use.
                   1478:         * If so we have to reference it before we clean it out
                   1479:         * so that its count cannot fall to zero and generate a
                   1480:         * race against ourselves to recycle it.
                   1481:         */
1.155.2.4  thorpej  1482:
1.112     mycroft  1483:        if ((active = vp->v_usecount) != 0) {
1.87      pk       1484:                vp->v_usecount++;
1.112     mycroft  1485: #ifdef DIAGNOSTIC
                   1486:                if (vp->v_usecount == 0) {
                   1487:                        vprint("vclean", vp);
                   1488:                        panic("vclean: usecount overflow");
                   1489:                }
                   1490: #endif
                   1491:        }
1.87      pk       1492:
1.29      cgd      1493:        /*
                   1494:         * Prevent the vnode from being recycled or
                   1495:         * brought into use while we clean it out.
                   1496:         */
                   1497:        if (vp->v_flag & VXLOCK)
1.125     chs      1498:                panic("vclean: deadlock, vp %p", vp);
1.29      cgd      1499:        vp->v_flag |= VXLOCK;
1.155.2.4  thorpej  1500:        if (vp->v_flag & VEXECMAP) {
                   1501:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1502:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1503:        }
1.155.2.4  thorpej  1504:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.142     chs      1505:
1.29      cgd      1506:        /*
1.80      fvdl     1507:         * Even if the count is zero, the VOP_INACTIVE routine may still
                   1508:         * have the object locked while it cleans it out. The VOP_LOCK
                   1509:         * ensures that the VOP_INACTIVE routine is done with its work.
                   1510:         * For active vnodes, it ensures that no other activity can
                   1511:         * occur while the underlying object is being cleaned out.
                   1512:         */
                   1513:        VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
                   1514:
1.98      wrstuden 1515:        /*
1.142     chs      1516:         * Clean out any cached data associated with the vnode.
1.29      cgd      1517:         */
1.155.2.4  thorpej  1518:        if (flags & DOCLOSE) {
1.80      fvdl     1519:                vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1.155.2.4  thorpej  1520:                KASSERT((vp->v_flag & VONWORKLST) == 0);
                   1521:        }
                   1522:        LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1.80      fvdl     1523:
1.29      cgd      1524:        /*
1.30      mycroft  1525:         * If purging an active vnode, it must be closed and
1.80      fvdl     1526:         * deactivated before being reclaimed. Note that the
                   1527:         * VOP_INACTIVE will unlock the vnode.
1.29      cgd      1528:         */
                   1529:        if (active) {
                   1530:                if (flags & DOCLOSE)
1.86      pk       1531:                        VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1.80      fvdl     1532:                VOP_INACTIVE(vp, p);
                   1533:        } else {
                   1534:                /*
                   1535:                 * Any other processes trying to obtain this lock must first
                   1536:                 * wait for VXLOCK to clear, then call the new lock operation.
                   1537:                 */
                   1538:                VOP_UNLOCK(vp, 0);
1.29      cgd      1539:        }
                   1540:        /*
                   1541:         * Reclaim the vnode.
                   1542:         */
1.80      fvdl     1543:        if (VOP_RECLAIM(vp, p))
1.125     chs      1544:                panic("vclean: cannot reclaim, vp %p", vp);
1.87      pk       1545:        if (active) {
                   1546:                /*
                   1547:                 * Inline copy of vrele() since VOP_INACTIVE
                   1548:                 * has already been called.
                   1549:                 */
                   1550:                simple_lock(&vp->v_interlock);
                   1551:                if (--vp->v_usecount <= 0) {
                   1552: #ifdef DIAGNOSTIC
                   1553:                        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1554:                                vprint("vclean: bad ref count", vp);
                   1555:                                panic("vclean: ref cnt");
                   1556:                        }
                   1557: #endif
                   1558:                        /*
                   1559:                         * Insert at tail of LRU list.
                   1560:                         */
1.142     chs      1561:
1.113     fvdl     1562:                        simple_unlock(&vp->v_interlock);
1.87      pk       1563:                        simple_lock(&vnode_free_list_slock);
1.104     wrstuden 1564: #ifdef DIAGNOSTIC
1.113     fvdl     1565:                        if (vp->v_holdcnt > 0)
1.125     chs      1566:                                panic("vclean: not clean, vp %p", vp);
1.104     wrstuden 1567: #endif
1.87      pk       1568:                        TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1569:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl     1570:                } else
                   1571:                        simple_unlock(&vp->v_interlock);
1.87      pk       1572:        }
1.30      mycroft  1573:
1.155.2.5  jdolecek 1574:        KASSERT(vp->v_uobj.uo_npages == 0);
1.80      fvdl     1575:        cache_purge(vp);
                   1576:
1.29      cgd      1577:        /*
1.30      mycroft  1578:         * Done with purge, notify sleepers of the grim news.
1.29      cgd      1579:         */
1.30      mycroft  1580:        vp->v_op = dead_vnodeop_p;
                   1581:        vp->v_tag = VT_NON;
1.139     enami    1582:        simple_lock(&vp->v_interlock);
1.155.2.6  jdolecek 1583:        VN_KNOTE(vp, NOTE_REVOKE);      /* FreeBSD has this in vn_pollgone() */
1.29      cgd      1584:        vp->v_flag &= ~VXLOCK;
                   1585:        if (vp->v_flag & VXWANT) {
                   1586:                vp->v_flag &= ~VXWANT;
1.139     enami    1587:                simple_unlock(&vp->v_interlock);
1.29      cgd      1588:                wakeup((caddr_t)vp);
1.139     enami    1589:        } else
                   1590:                simple_unlock(&vp->v_interlock);
1.29      cgd      1591: }
                   1592:
                   1593: /*
1.80      fvdl     1594:  * Recycle an unused vnode to the front of the free list.
                   1595:  * Release the passed interlock if the vnode will be recycled.
1.29      cgd      1596:  */
1.80      fvdl     1597: int
                   1598: vrecycle(vp, inter_lkp, p)
                   1599:        struct vnode *vp;
                   1600:        struct simplelock *inter_lkp;
                   1601:        struct proc *p;
                   1602: {
                   1603:
                   1604:        simple_lock(&vp->v_interlock);
                   1605:        if (vp->v_usecount == 0) {
                   1606:                if (inter_lkp)
                   1607:                        simple_unlock(inter_lkp);
                   1608:                vgonel(vp, p);
                   1609:                return (1);
1.29      cgd      1610:        }
1.80      fvdl     1611:        simple_unlock(&vp->v_interlock);
                   1612:        return (0);
1.29      cgd      1613: }
                   1614:
                   1615: /*
                   1616:  * Eliminate all activity associated with a vnode
                   1617:  * in preparation for reuse.
                   1618:  */
                   1619: void
                   1620: vgone(vp)
1.80      fvdl     1621:        struct vnode *vp;
                   1622: {
                   1623:        struct proc *p = curproc;       /* XXX */
                   1624:
                   1625:        simple_lock(&vp->v_interlock);
                   1626:        vgonel(vp, p);
                   1627: }
                   1628:
                   1629: /*
                   1630:  * vgone, with the vp interlock held.
                   1631:  */
                   1632: void
                   1633: vgonel(vp, p)
1.123     augustss 1634:        struct vnode *vp;
1.80      fvdl     1635:        struct proc *p;
1.29      cgd      1636: {
1.80      fvdl     1637:        struct vnode *vq;
1.29      cgd      1638:        struct vnode *vx;
                   1639:
1.155.2.4  thorpej  1640:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1641:
1.29      cgd      1642:        /*
                   1643:         * If a vgone (or vclean) is already in progress,
                   1644:         * wait until it is done and return.
                   1645:         */
1.155.2.4  thorpej  1646:
1.29      cgd      1647:        if (vp->v_flag & VXLOCK) {
                   1648:                vp->v_flag |= VXWANT;
1.155.2.4  thorpej  1649:                ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1.29      cgd      1650:                return;
                   1651:        }
1.155.2.4  thorpej  1652:
1.29      cgd      1653:        /*
                   1654:         * Clean out the filesystem specific data.
                   1655:         */
1.155.2.4  thorpej  1656:
1.80      fvdl     1657:        vclean(vp, DOCLOSE, p);
1.155.2.4  thorpej  1658:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                   1659:
1.29      cgd      1660:        /*
                   1661:         * Delete from old mount point vnode list, if on one.
                   1662:         */
1.155.2.4  thorpej  1663:
1.80      fvdl     1664:        if (vp->v_mount != NULL)
                   1665:                insmntque(vp, (struct mount *)0);
1.155.2.4  thorpej  1666:
1.29      cgd      1667:        /*
                   1668:         * If special device, remove it from special device alias list.
1.80      fvdl     1669:         * if it is on one.
1.29      cgd      1670:         */
1.155.2.4  thorpej  1671:
1.80      fvdl     1672:        if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
                   1673:                simple_lock(&spechash_slock);
1.110     wrstuden 1674:                if (vp->v_hashchain != NULL) {
                   1675:                        if (*vp->v_hashchain == vp) {
                   1676:                                *vp->v_hashchain = vp->v_specnext;
                   1677:                        } else {
                   1678:                                for (vq = *vp->v_hashchain; vq;
                   1679:                                                        vq = vq->v_specnext) {
                   1680:                                        if (vq->v_specnext != vp)
                   1681:                                                continue;
                   1682:                                        vq->v_specnext = vp->v_specnext;
                   1683:                                        break;
                   1684:                                }
                   1685:                                if (vq == NULL)
                   1686:                                        panic("missing bdev");
1.29      cgd      1687:                        }
1.110     wrstuden 1688:                        if (vp->v_flag & VALIASED) {
                   1689:                                vx = NULL;
                   1690:                                for (vq = *vp->v_hashchain; vq;
                   1691:                                                        vq = vq->v_specnext) {
                   1692:                                        if (vq->v_rdev != vp->v_rdev ||
                   1693:                                            vq->v_type != vp->v_type)
                   1694:                                                continue;
                   1695:                                        if (vx)
                   1696:                                                break;
                   1697:                                        vx = vq;
                   1698:                                }
                   1699:                                if (vx == NULL)
                   1700:                                        panic("missing alias");
                   1701:                                if (vq == NULL)
                   1702:                                        vx->v_flag &= ~VALIASED;
                   1703:                                vp->v_flag &= ~VALIASED;
1.29      cgd      1704:                        }
                   1705:                }
1.80      fvdl     1706:                simple_unlock(&spechash_slock);
1.29      cgd      1707:                FREE(vp->v_specinfo, M_VNODE);
                   1708:                vp->v_specinfo = NULL;
                   1709:        }
1.155.2.4  thorpej  1710:
1.29      cgd      1711:        /*
1.30      mycroft  1712:         * If it is on the freelist and not already at the head,
                   1713:         * move it to the head of the list. The test of the back
                   1714:         * pointer and the reference count of zero is because
                   1715:         * it will be removed from the free list by getnewvnode,
                   1716:         * but will not have its reference count incremented until
                   1717:         * after calling vgone. If the reference count were
                   1718:         * incremented first, vgone would (incorrectly) try to
                   1719:         * close the previous instance of the underlying object.
                   1720:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1721:         * getnewvnode after removing it from the freelist to ensure
                   1722:         * that we do not try to move it here.
1.29      cgd      1723:         */
1.155.2.4  thorpej  1724:
1.80      fvdl     1725:        if (vp->v_usecount == 0) {
                   1726:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1727:                if (vp->v_holdcnt > 0)
1.125     chs      1728:                        panic("vgonel: not clean, vp %p", vp);
1.80      fvdl     1729:                if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1.113     fvdl     1730:                    TAILQ_FIRST(&vnode_free_list) != vp) {
1.80      fvdl     1731:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1732:                        TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                   1733:                }
                   1734:                simple_unlock(&vnode_free_list_slock);
1.29      cgd      1735:        }
                   1736:        vp->v_type = VBAD;
                   1737: }
                   1738:
                   1739: /*
                   1740:  * Lookup a vnode by device number.
                   1741:  */
1.50      christos 1742: int
1.29      cgd      1743: vfinddev(dev, type, vpp)
                   1744:        dev_t dev;
                   1745:        enum vtype type;
                   1746:        struct vnode **vpp;
                   1747: {
1.80      fvdl     1748:        struct vnode *vp;
                   1749:        int rc = 0;
1.29      cgd      1750:
1.80      fvdl     1751:        simple_lock(&spechash_slock);
1.29      cgd      1752:        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                   1753:                if (dev != vp->v_rdev || type != vp->v_type)
                   1754:                        continue;
                   1755:                *vpp = vp;
1.80      fvdl     1756:                rc = 1;
                   1757:                break;
1.29      cgd      1758:        }
1.80      fvdl     1759:        simple_unlock(&spechash_slock);
                   1760:        return (rc);
1.96      thorpej  1761: }
                   1762:
                   1763: /*
                   1764:  * Revoke all the vnodes corresponding to the specified minor number
                   1765:  * range (endpoints inclusive) of the specified major.
                   1766:  */
                   1767: void
                   1768: vdevgone(maj, minl, minh, type)
                   1769:        int maj, minl, minh;
                   1770:        enum vtype type;
                   1771: {
                   1772:        struct vnode *vp;
                   1773:        int mn;
                   1774:
                   1775:        for (mn = minl; mn <= minh; mn++)
                   1776:                if (vfinddev(makedev(maj, mn), type, &vp))
                   1777:                        VOP_REVOKE(vp, REVOKEALL);
1.29      cgd      1778: }
                   1779:
                   1780: /*
                   1781:  * Calculate the total number of references to a special device.
                   1782:  */
1.30      mycroft  1783: int
1.29      cgd      1784: vcount(vp)
1.123     augustss 1785:        struct vnode *vp;
1.29      cgd      1786: {
1.123     augustss 1787:        struct vnode *vq, *vnext;
1.29      cgd      1788:        int count;
                   1789:
                   1790: loop:
                   1791:        if ((vp->v_flag & VALIASED) == 0)
                   1792:                return (vp->v_usecount);
1.80      fvdl     1793:        simple_lock(&spechash_slock);
1.30      mycroft  1794:        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
                   1795:                vnext = vq->v_specnext;
1.29      cgd      1796:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   1797:                        continue;
                   1798:                /*
                   1799:                 * Alias, but not in use, so flush it out.
                   1800:                 */
1.151     wrstuden 1801:                if (vq->v_usecount == 0 && vq != vp &&
                   1802:                    (vq->v_flag & VXLOCK) == 0) {
1.80      fvdl     1803:                        simple_unlock(&spechash_slock);
1.29      cgd      1804:                        vgone(vq);
                   1805:                        goto loop;
                   1806:                }
                   1807:                count += vq->v_usecount;
                   1808:        }
1.80      fvdl     1809:        simple_unlock(&spechash_slock);
1.29      cgd      1810:        return (count);
                   1811: }
                   1812:
                   1813: /*
                   1814:  * Print out a description of a vnode.
                   1815:  */
1.146     jdolecek 1816: static const char * const typename[] =
1.29      cgd      1817:    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
                   1818:
                   1819: void
                   1820: vprint(label, vp)
                   1821:        char *label;
1.123     augustss 1822:        struct vnode *vp;
1.29      cgd      1823: {
1.155.2.4  thorpej  1824:        char buf[96];
1.29      cgd      1825:
                   1826:        if (label != NULL)
1.57      christos 1827:                printf("%s: ", label);
1.142     chs      1828:        printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
1.113     fvdl     1829:            vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1.56      christos 1830:            vp->v_holdcnt);
1.29      cgd      1831:        buf[0] = '\0';
                   1832:        if (vp->v_flag & VROOT)
                   1833:                strcat(buf, "|VROOT");
                   1834:        if (vp->v_flag & VTEXT)
                   1835:                strcat(buf, "|VTEXT");
1.155.2.4  thorpej  1836:        if (vp->v_flag & VEXECMAP)
                   1837:                strcat(buf, "|VEXECMAP");
1.29      cgd      1838:        if (vp->v_flag & VSYSTEM)
                   1839:                strcat(buf, "|VSYSTEM");
                   1840:        if (vp->v_flag & VXLOCK)
                   1841:                strcat(buf, "|VXLOCK");
                   1842:        if (vp->v_flag & VXWANT)
                   1843:                strcat(buf, "|VXWANT");
                   1844:        if (vp->v_flag & VBWAIT)
                   1845:                strcat(buf, "|VBWAIT");
                   1846:        if (vp->v_flag & VALIASED)
                   1847:                strcat(buf, "|VALIASED");
                   1848:        if (buf[0] != '\0')
1.57      christos 1849:                printf(" flags (%s)", &buf[1]);
1.30      mycroft  1850:        if (vp->v_data == NULL) {
1.57      christos 1851:                printf("\n");
1.30      mycroft  1852:        } else {
1.57      christos 1853:                printf("\n\t");
1.30      mycroft  1854:                VOP_PRINT(vp);
                   1855:        }
1.29      cgd      1856: }
                   1857:
                   1858: #ifdef DEBUG
                   1859: /*
                   1860:  * List all of the locked vnodes in the system.
                   1861:  * Called when debugging the kernel.
                   1862:  */
1.51      christos 1863: void
1.29      cgd      1864: printlockedvnodes()
                   1865: {
1.80      fvdl     1866:        struct mount *mp, *nmp;
                   1867:        struct vnode *vp;
1.29      cgd      1868:
1.57      christos 1869:        printf("Locked vnodes\n");
1.80      fvdl     1870:        simple_lock(&mountlist_slock);
                   1871:        for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
                   1872:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
                   1873:                        nmp = mp->mnt_list.cqe_next;
                   1874:                        continue;
                   1875:                }
1.155.2.4  thorpej  1876:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1.29      cgd      1877:                        if (VOP_ISLOCKED(vp))
1.155.2.4  thorpej  1878:                                vprint(NULL, vp);
1.80      fvdl     1879:                }
                   1880:                simple_lock(&mountlist_slock);
                   1881:                nmp = mp->mnt_list.cqe_next;
                   1882:                vfs_unbusy(mp);
1.29      cgd      1883:        }
1.80      fvdl     1884:        simple_unlock(&mountlist_slock);
1.29      cgd      1885: }
                   1886: #endif
                   1887:
1.101     mrg      1888: /*
1.80      fvdl     1889:  * Top level filesystem related information gathering.
                   1890:  */
                   1891: int
                   1892: vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
                   1893:        int *name;
                   1894:        u_int namelen;
                   1895:        void *oldp;
                   1896:        size_t *oldlenp;
                   1897:        void *newp;
                   1898:        size_t newlen;
                   1899:        struct proc *p;
                   1900: {
1.95      thorpej  1901: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80      fvdl     1902:        struct vfsconf vfc;
1.154     jdolecek 1903:        extern const char * const mountcompatnames[];
                   1904:        extern int nmountcompatnames;
1.95      thorpej  1905: #endif
1.80      fvdl     1906:        struct vfsops *vfsp;
                   1907:
                   1908:        /* all sysctl names at this level are at least name and field */
                   1909:        if (namelen < 2)
                   1910:                return (ENOTDIR);               /* overloaded */
1.94      thorpej  1911:
                   1912:        /* Not generic: goes to file system. */
1.80      fvdl     1913:        if (name[0] != VFS_GENERIC) {
1.155     jdolecek 1914:                static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES;
1.154     jdolecek 1915:                const char *vfsname;
                   1916:
                   1917:                if (name[0] < 0 || name[0] > VFS_MAXID
                   1918:                    || (vfsname = vfsnames[name[0]].ctl_name) == NULL)
1.80      fvdl     1919:                        return (EOPNOTSUPP);
1.154     jdolecek 1920:
                   1921:                vfsp = vfs_getopsbyname(vfsname);
1.94      thorpej  1922:                if (vfsp == NULL || vfsp->vfs_sysctl == NULL)
                   1923:                        return (EOPNOTSUPP);
1.80      fvdl     1924:                return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
                   1925:                    oldp, oldlenp, newp, newlen, p));
                   1926:        }
1.94      thorpej  1927:
                   1928:        /* The rest are generic vfs sysctls. */
1.80      fvdl     1929:        switch (name[1]) {
1.117     fvdl     1930:        case VFS_USERMOUNT:
                   1931:                return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount);
1.95      thorpej  1932: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.80      fvdl     1933:        case VFS_MAXTYPENUM:
1.94      thorpej  1934:                /*
                   1935:                 * Provided for 4.4BSD-Lite2 compatibility.
                   1936:                 */
1.80      fvdl     1937:                return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
                   1938:        case VFS_CONF:
1.94      thorpej  1939:                /*
                   1940:                 * Special: a node, next is a file system name.
                   1941:                 * Provided for 4.4BSD-Lite2 compatibility.
                   1942:                 */
1.80      fvdl     1943:                if (namelen < 3)
                   1944:                        return (ENOTDIR);       /* overloaded */
                   1945:                if (name[2] >= nmountcompatnames || name[2] < 0 ||
                   1946:                    mountcompatnames[name[2]] == NULL)
                   1947:                        return (EOPNOTSUPP);
                   1948:                vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
                   1949:                if (vfsp == NULL)
1.94      thorpej  1950:                        return (EOPNOTSUPP);
                   1951:                vfc.vfc_vfsops = vfsp;
1.80      fvdl     1952:                strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
                   1953:                vfc.vfc_typenum = name[2];
1.94      thorpej  1954:                vfc.vfc_refcount = vfsp->vfs_refcount;
1.80      fvdl     1955:                vfc.vfc_flags = 0;
                   1956:                vfc.vfc_mountroot = vfsp->vfs_mountroot;
                   1957:                vfc.vfc_next = NULL;
                   1958:                return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
                   1959:                    sizeof(struct vfsconf)));
1.95      thorpej  1960: #endif
                   1961:        default:
                   1962:                break;
1.80      fvdl     1963:        }
                   1964:        return (EOPNOTSUPP);
                   1965: }
                   1966:
1.29      cgd      1967: int kinfo_vdebug = 1;
                   1968: int kinfo_vgetfailed;
                   1969: #define KINFO_VNODESLOP        10
                   1970: /*
                   1971:  * Dump vnode list (via sysctl).
                   1972:  * Copyout address of vnode followed by vnode.
                   1973:  */
                   1974: /* ARGSUSED */
1.50      christos 1975: int
1.80      fvdl     1976: sysctl_vnode(where, sizep, p)
1.29      cgd      1977:        char *where;
                   1978:        size_t *sizep;
1.80      fvdl     1979:        struct proc *p;
1.29      cgd      1980: {
1.80      fvdl     1981:        struct mount *mp, *nmp;
                   1982:        struct vnode *nvp, *vp;
                   1983:        char *bp = where, *savebp;
1.29      cgd      1984:        char *ewhere;
                   1985:        int error;
                   1986:
1.90      perry    1987: #define VPTRSZ sizeof(struct vnode *)
                   1988: #define VNODESZ        sizeof(struct vnode)
1.29      cgd      1989:        if (where == NULL) {
                   1990:                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                   1991:                return (0);
                   1992:        }
                   1993:        ewhere = where + *sizep;
1.80      fvdl     1994:
                   1995:        simple_lock(&mountlist_slock);
1.38      mycroft  1996:        for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1.80      fvdl     1997:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
                   1998:                        nmp = mp->mnt_list.cqe_next;
1.29      cgd      1999:                        continue;
1.80      fvdl     2000:                }
1.29      cgd      2001:                savebp = bp;
                   2002: again:
1.80      fvdl     2003:                simple_lock(&mntvnode_slock);
1.29      cgd      2004:                for (vp = mp->mnt_vnodelist.lh_first;
                   2005:                     vp != NULL;
1.80      fvdl     2006:                     vp = nvp) {
1.29      cgd      2007:                        /*
                   2008:                         * Check that the vp is still associated with
                   2009:                         * this filesystem.  RACE: could have been
                   2010:                         * recycled onto the same filesystem.
                   2011:                         */
                   2012:                        if (vp->v_mount != mp) {
1.80      fvdl     2013:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2014:                                if (kinfo_vdebug)
1.57      christos 2015:                                        printf("kinfo: vp changed\n");
1.29      cgd      2016:                                bp = savebp;
                   2017:                                goto again;
                   2018:                        }
1.80      fvdl     2019:                        nvp = vp->v_mntvnodes.le_next;
1.29      cgd      2020:                        if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80      fvdl     2021:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2022:                                *sizep = bp - where;
                   2023:                                return (ENOMEM);
                   2024:                        }
1.80      fvdl     2025:                        simple_unlock(&mntvnode_slock);
1.29      cgd      2026:                        if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
                   2027:                           (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
                   2028:                                return (error);
                   2029:                        bp += VPTRSZ + VNODESZ;
1.80      fvdl     2030:                        simple_lock(&mntvnode_slock);
1.29      cgd      2031:                }
1.80      fvdl     2032:                simple_unlock(&mntvnode_slock);
                   2033:                simple_lock(&mountlist_slock);
                   2034:                nmp = mp->mnt_list.cqe_next;
1.29      cgd      2035:                vfs_unbusy(mp);
                   2036:        }
1.80      fvdl     2037:        simple_unlock(&mountlist_slock);
1.29      cgd      2038:
                   2039:        *sizep = bp - where;
                   2040:        return (0);
1.30      mycroft  2041: }
                   2042:
                   2043: /*
                   2044:  * Check to see if a filesystem is mounted on a block device.
                   2045:  */
                   2046: int
                   2047: vfs_mountedon(vp)
1.80      fvdl     2048:        struct vnode *vp;
1.30      mycroft  2049: {
1.80      fvdl     2050:        struct vnode *vq;
                   2051:        int error = 0;
1.30      mycroft  2052:
1.113     fvdl     2053:        if (vp->v_specmountpoint != NULL)
1.30      mycroft  2054:                return (EBUSY);
                   2055:        if (vp->v_flag & VALIASED) {
1.80      fvdl     2056:                simple_lock(&spechash_slock);
1.30      mycroft  2057:                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   2058:                        if (vq->v_rdev != vp->v_rdev ||
                   2059:                            vq->v_type != vp->v_type)
                   2060:                                continue;
1.113     fvdl     2061:                        if (vq->v_specmountpoint != NULL) {
1.80      fvdl     2062:                                error = EBUSY;
                   2063:                                break;
                   2064:                        }
1.30      mycroft  2065:                }
1.80      fvdl     2066:                simple_unlock(&spechash_slock);
1.30      mycroft  2067:        }
1.80      fvdl     2068:        return (error);
1.30      mycroft  2069: }
                   2070:
                   2071: /*
                   2072:  * Build hash lists of net addresses and hang them off the mount point.
                   2073:  * Called by ufs_mount() to set up the lists of export addresses.
                   2074:  */
                   2075: static int
                   2076: vfs_hang_addrlist(mp, nep, argp)
                   2077:        struct mount *mp;
                   2078:        struct netexport *nep;
                   2079:        struct export_args *argp;
                   2080: {
1.123     augustss 2081:        struct netcred *np, *enp;
                   2082:        struct radix_node_head *rnh;
                   2083:        int i;
1.30      mycroft  2084:        struct radix_node *rn;
                   2085:        struct sockaddr *saddr, *smask = 0;
                   2086:        struct domain *dom;
                   2087:        int error;
                   2088:
                   2089:        if (argp->ex_addrlen == 0) {
                   2090:                if (mp->mnt_flag & MNT_DEFEXPORTED)
                   2091:                        return (EPERM);
                   2092:                np = &nep->ne_defexported;
                   2093:                np->netc_exflags = argp->ex_flags;
1.155.2.4  thorpej  2094:                crcvt(&np->netc_anon, &argp->ex_anon);
1.30      mycroft  2095:                np->netc_anon.cr_ref = 1;
                   2096:                mp->mnt_flag |= MNT_DEFEXPORTED;
                   2097:                return (0);
                   2098:        }
1.155.2.2  thorpej  2099:
                   2100:        if (argp->ex_addrlen > MLEN)
                   2101:                return (EINVAL);
                   2102:
1.30      mycroft  2103:        i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
                   2104:        np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1.91      perry    2105:        memset((caddr_t)np, 0, i);
1.30      mycroft  2106:        saddr = (struct sockaddr *)(np + 1);
1.50      christos 2107:        error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
                   2108:        if (error)
1.30      mycroft  2109:                goto out;
                   2110:        if (saddr->sa_len > argp->ex_addrlen)
                   2111:                saddr->sa_len = argp->ex_addrlen;
                   2112:        if (argp->ex_masklen) {
                   2113:                smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1.66      mycroft  2114:                error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1.30      mycroft  2115:                if (error)
                   2116:                        goto out;
                   2117:                if (smask->sa_len > argp->ex_masklen)
                   2118:                        smask->sa_len = argp->ex_masklen;
                   2119:        }
                   2120:        i = saddr->sa_family;
                   2121:        if ((rnh = nep->ne_rtable[i]) == 0) {
                   2122:                /*
                   2123:                 * Seems silly to initialize every AF when most are not
                   2124:                 * used, do so on demand here
                   2125:                 */
                   2126:                for (dom = domains; dom; dom = dom->dom_next)
                   2127:                        if (dom->dom_family == i && dom->dom_rtattach) {
                   2128:                                dom->dom_rtattach((void **)&nep->ne_rtable[i],
                   2129:                                        dom->dom_rtoffset);
                   2130:                                break;
                   2131:                        }
                   2132:                if ((rnh = nep->ne_rtable[i]) == 0) {
                   2133:                        error = ENOBUFS;
                   2134:                        goto out;
                   2135:                }
                   2136:        }
                   2137:        rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
                   2138:                np->netc_rnodes);
                   2139:        if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1.72      fvdl     2140:                if (rn == 0) {
                   2141:                        enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
                   2142:                                smask, rnh);
                   2143:                        if (enp == 0) {
                   2144:                                error = EPERM;
                   2145:                                goto out;
                   2146:                        }
                   2147:                } else
                   2148:                        enp = (struct netcred *)rn;
                   2149:
                   2150:                if (enp->netc_exflags != argp->ex_flags ||
                   2151:                    enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
                   2152:                    enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
                   2153:                    enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups ||
1.91      perry    2154:                    memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
1.72      fvdl     2155:                        enp->netc_anon.cr_ngroups))
                   2156:                                error = EPERM;
                   2157:                else
                   2158:                        error = 0;
1.30      mycroft  2159:                goto out;
                   2160:        }
                   2161:        np->netc_exflags = argp->ex_flags;
1.155.2.4  thorpej  2162:        crcvt(&np->netc_anon, &argp->ex_anon);
1.30      mycroft  2163:        np->netc_anon.cr_ref = 1;
                   2164:        return (0);
                   2165: out:
                   2166:        free(np, M_NETADDR);
                   2167:        return (error);
                   2168: }
                   2169:
                   2170: /* ARGSUSED */
                   2171: static int
                   2172: vfs_free_netcred(rn, w)
                   2173:        struct radix_node *rn;
1.50      christos 2174:        void *w;
1.30      mycroft  2175: {
1.123     augustss 2176:        struct radix_node_head *rnh = (struct radix_node_head *)w;
1.30      mycroft  2177:
                   2178:        (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
                   2179:        free((caddr_t)rn, M_NETADDR);
                   2180:        return (0);
                   2181: }
                   2182:
                   2183: /*
                   2184:  * Free the net address hash lists that are hanging off the mount points.
                   2185:  */
                   2186: static void
                   2187: vfs_free_addrlist(nep)
                   2188:        struct netexport *nep;
                   2189: {
1.123     augustss 2190:        int i;
                   2191:        struct radix_node_head *rnh;
1.30      mycroft  2192:
                   2193:        for (i = 0; i <= AF_MAX; i++)
1.50      christos 2194:                if ((rnh = nep->ne_rtable[i]) != NULL) {
                   2195:                        (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1.30      mycroft  2196:                        free((caddr_t)rnh, M_RTABLE);
                   2197:                        nep->ne_rtable[i] = 0;
                   2198:                }
                   2199: }
                   2200:
                   2201: int
                   2202: vfs_export(mp, nep, argp)
                   2203:        struct mount *mp;
                   2204:        struct netexport *nep;
                   2205:        struct export_args *argp;
                   2206: {
                   2207:        int error;
                   2208:
                   2209:        if (argp->ex_flags & MNT_DELEXPORT) {
1.71      fvdl     2210:                if (mp->mnt_flag & MNT_EXPUBLIC) {
                   2211:                        vfs_setpublicfs(NULL, NULL, NULL);
                   2212:                        mp->mnt_flag &= ~MNT_EXPUBLIC;
                   2213:                }
1.30      mycroft  2214:                vfs_free_addrlist(nep);
                   2215:                mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
                   2216:        }
                   2217:        if (argp->ex_flags & MNT_EXPORTED) {
1.71      fvdl     2218:                if (argp->ex_flags & MNT_EXPUBLIC) {
                   2219:                        if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
                   2220:                                return (error);
                   2221:                        mp->mnt_flag |= MNT_EXPUBLIC;
                   2222:                }
1.50      christos 2223:                if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1.30      mycroft  2224:                        return (error);
                   2225:                mp->mnt_flag |= MNT_EXPORTED;
                   2226:        }
1.71      fvdl     2227:        return (0);
                   2228: }
                   2229:
                   2230: /*
                   2231:  * Set the publicly exported filesystem (WebNFS). Currently, only
                   2232:  * one public filesystem is possible in the spec (RFC 2054 and 2055)
                   2233:  */
                   2234: int
                   2235: vfs_setpublicfs(mp, nep, argp)
                   2236:        struct mount *mp;
                   2237:        struct netexport *nep;
                   2238:        struct export_args *argp;
                   2239: {
                   2240:        int error;
                   2241:        struct vnode *rvp;
                   2242:        char *cp;
                   2243:
                   2244:        /*
                   2245:         * mp == NULL -> invalidate the current info, the FS is
                   2246:         * no longer exported. May be called from either vfs_export
                   2247:         * or unmount, so check if it hasn't already been done.
                   2248:         */
                   2249:        if (mp == NULL) {
                   2250:                if (nfs_pub.np_valid) {
                   2251:                        nfs_pub.np_valid = 0;
                   2252:                        if (nfs_pub.np_index != NULL) {
                   2253:                                FREE(nfs_pub.np_index, M_TEMP);
                   2254:                                nfs_pub.np_index = NULL;
                   2255:                        }
                   2256:                }
                   2257:                return (0);
                   2258:        }
                   2259:
                   2260:        /*
                   2261:         * Only one allowed at a time.
                   2262:         */
                   2263:        if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
                   2264:                return (EBUSY);
                   2265:
                   2266:        /*
                   2267:         * Get real filehandle for root of exported FS.
                   2268:         */
1.91      perry    2269:        memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
1.71      fvdl     2270:        nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
                   2271:
                   2272:        if ((error = VFS_ROOT(mp, &rvp)))
                   2273:                return (error);
                   2274:
                   2275:        if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
                   2276:                return (error);
                   2277:
                   2278:        vput(rvp);
                   2279:
                   2280:        /*
                   2281:         * If an indexfile was specified, pull it in.
                   2282:         */
                   2283:        if (argp->ex_indexfile != NULL) {
                   2284:                MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
                   2285:                    M_WAITOK);
                   2286:                error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
                   2287:                    MAXNAMLEN, (size_t *)0);
                   2288:                if (!error) {
                   2289:                        /*
                   2290:                         * Check for illegal filenames.
                   2291:                         */
                   2292:                        for (cp = nfs_pub.np_index; *cp; cp++) {
                   2293:                                if (*cp == '/') {
                   2294:                                        error = EINVAL;
                   2295:                                        break;
                   2296:                                }
                   2297:                        }
                   2298:                }
                   2299:                if (error) {
                   2300:                        FREE(nfs_pub.np_index, M_TEMP);
                   2301:                        return (error);
                   2302:                }
                   2303:        }
                   2304:
                   2305:        nfs_pub.np_mount = mp;
                   2306:        nfs_pub.np_valid = 1;
1.30      mycroft  2307:        return (0);
                   2308: }
                   2309:
                   2310: struct netcred *
                   2311: vfs_export_lookup(mp, nep, nam)
1.123     augustss 2312:        struct mount *mp;
1.30      mycroft  2313:        struct netexport *nep;
                   2314:        struct mbuf *nam;
                   2315: {
1.123     augustss 2316:        struct netcred *np;
                   2317:        struct radix_node_head *rnh;
1.30      mycroft  2318:        struct sockaddr *saddr;
                   2319:
                   2320:        np = NULL;
                   2321:        if (mp->mnt_flag & MNT_EXPORTED) {
                   2322:                /*
                   2323:                 * Lookup in the export list first.
                   2324:                 */
                   2325:                if (nam != NULL) {
                   2326:                        saddr = mtod(nam, struct sockaddr *);
                   2327:                        rnh = nep->ne_rtable[saddr->sa_family];
                   2328:                        if (rnh != NULL) {
                   2329:                                np = (struct netcred *)
                   2330:                                        (*rnh->rnh_matchaddr)((caddr_t)saddr,
                   2331:                                                              rnh);
                   2332:                                if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
                   2333:                                        np = NULL;
                   2334:                        }
                   2335:                }
                   2336:                /*
                   2337:                 * If no address match, use the default if it exists.
                   2338:                 */
                   2339:                if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
                   2340:                        np = &nep->ne_defexported;
                   2341:        }
                   2342:        return (np);
1.35      ws       2343: }
                   2344:
                   2345: /*
                   2346:  * Do the usual access checking.
                   2347:  * file_mode, uid and gid are from the vnode in question,
                   2348:  * while acc_mode and cred are from the VOP_ACCESS parameter list
                   2349:  */
1.41      mycroft  2350: int
1.68      mycroft  2351: vaccess(type, file_mode, uid, gid, acc_mode, cred)
                   2352:        enum vtype type;
1.35      ws       2353:        mode_t file_mode;
                   2354:        uid_t uid;
                   2355:        gid_t gid;
                   2356:        mode_t acc_mode;
                   2357:        struct ucred *cred;
                   2358: {
                   2359:        mode_t mask;
                   2360:
1.64      mycroft  2361:        /*
                   2362:         * Super-user always gets read/write access, but execute access depends
                   2363:         * on at least one execute bit being set.
                   2364:         */
                   2365:        if (cred->cr_uid == 0) {
1.69      mycroft  2366:                if ((acc_mode & VEXEC) && type != VDIR &&
1.68      mycroft  2367:                    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1.64      mycroft  2368:                        return (EACCES);
1.68      mycroft  2369:                return (0);
1.64      mycroft  2370:        }
1.35      ws       2371:
                   2372:        mask = 0;
                   2373:
                   2374:        /* Otherwise, check the owner. */
                   2375:        if (cred->cr_uid == uid) {
1.68      mycroft  2376:                if (acc_mode & VEXEC)
1.35      ws       2377:                        mask |= S_IXUSR;
                   2378:                if (acc_mode & VREAD)
                   2379:                        mask |= S_IRUSR;
                   2380:                if (acc_mode & VWRITE)
                   2381:                        mask |= S_IWUSR;
1.64      mycroft  2382:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2383:        }
                   2384:
                   2385:        /* Otherwise, check the groups. */
1.44      jtc      2386:        if (cred->cr_gid == gid || groupmember(gid, cred)) {
1.68      mycroft  2387:                if (acc_mode & VEXEC)
1.35      ws       2388:                        mask |= S_IXGRP;
                   2389:                if (acc_mode & VREAD)
                   2390:                        mask |= S_IRGRP;
                   2391:                if (acc_mode & VWRITE)
                   2392:                        mask |= S_IWGRP;
1.64      mycroft  2393:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2394:        }
                   2395:
                   2396:        /* Otherwise, check everyone else. */
1.68      mycroft  2397:        if (acc_mode & VEXEC)
1.35      ws       2398:                mask |= S_IXOTH;
                   2399:        if (acc_mode & VREAD)
                   2400:                mask |= S_IROTH;
                   2401:        if (acc_mode & VWRITE)
                   2402:                mask |= S_IWOTH;
1.64      mycroft  2403:        return ((file_mode & mask) == mask ? 0 : EACCES);
1.39      mycroft  2404: }
                   2405:
                   2406: /*
                   2407:  * Unmount all file systems.
                   2408:  * We traverse the list in reverse order under the assumption that doing so
                   2409:  * will avoid needing to worry about dependencies.
                   2410:  */
                   2411: void
1.128     sommerfe 2412: vfs_unmountall(p)
                   2413:        struct proc *p;
1.39      mycroft  2414: {
1.123     augustss 2415:        struct mount *mp, *nmp;
1.40      mycroft  2416:        int allerror, error;
1.39      mycroft  2417:
                   2418:        for (allerror = 0,
                   2419:             mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
                   2420:                nmp = mp->mnt_list.cqe_prev;
1.54      jtk      2421: #ifdef DEBUG
1.57      christos 2422:                printf("unmounting %s (%s)...\n",
1.56      christos 2423:                    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54      jtk      2424: #endif
1.149     thorpej  2425:                /*
                   2426:                 * XXX Freeze syncer.  Must do this before locking the
                   2427:                 * mount point.  See dounmount() for details.
                   2428:                 */
                   2429:                lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
                   2430:                if (vfs_busy(mp, 0, 0)) {
                   2431:                        lockmgr(&syncer_lock, LK_RELEASE, NULL);
1.60      fvdl     2432:                        continue;
1.149     thorpej  2433:                }
1.80      fvdl     2434:                if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
1.57      christos 2435:                        printf("unmount of %s failed with error %d\n",
1.40      mycroft  2436:                            mp->mnt_stat.f_mntonname, error);
                   2437:                        allerror = 1;
                   2438:                }
1.39      mycroft  2439:        }
                   2440:        if (allerror)
1.57      christos 2441:                printf("WARNING: some file systems would not unmount\n");
1.40      mycroft  2442: }
                   2443:
                   2444: /*
                   2445:  * Sync and unmount file systems before shutting down.
                   2446:  */
                   2447: void
                   2448: vfs_shutdown()
                   2449: {
1.123     augustss 2450:        struct buf *bp;
1.138     bouyer   2451:        int iter, nbusy, nbusy_prev = 0, dcount, s;
1.128     sommerfe 2452:        struct proc *p = curproc;
1.40      mycroft  2453:
1.128     sommerfe 2454:        /* XXX we're certainly not running in proc0's context! */
                   2455:        if (p == NULL)
                   2456:                p = &proc0;
                   2457:
1.70      cgd      2458:        printf("syncing disks... ");
                   2459:
1.138     bouyer   2460:        /* remove user process from run queue */
                   2461:        suspendsched();
1.40      mycroft  2462:        (void) spl0();
                   2463:
1.128     sommerfe 2464:        /* avoid coming back this way again if we panic. */
                   2465:        doing_shutdown = 1;
                   2466:
1.142     chs      2467:        sys_sync(p, NULL, NULL);
1.40      mycroft  2468:
                   2469:        /* Wait for sync to finish. */
1.113     fvdl     2470:        dcount = 10000;
1.138     bouyer   2471:        for (iter = 0; iter < 20;) {
1.40      mycroft  2472:                nbusy = 0;
1.113     fvdl     2473:                for (bp = &buf[nbuf]; --bp >= buf; ) {
1.133     fvdl     2474:                        if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.40      mycroft  2475:                                nbusy++;
1.113     fvdl     2476:                        /*
                   2477:                         * With soft updates, some buffers that are
                   2478:                         * written will be remarked as dirty until other
                   2479:                         * buffers are written.
                   2480:                         */
1.116     perseant 2481:                        if (bp->b_vp && bp->b_vp->v_mount
                   2482:                            && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
                   2483:                            && (bp->b_flags & B_DELWRI)) {
1.113     fvdl     2484:                                s = splbio();
                   2485:                                bremfree(bp);
                   2486:                                bp->b_flags |= B_BUSY;
                   2487:                                splx(s);
                   2488:                                nbusy++;
                   2489:                                bawrite(bp);
                   2490:                                if (dcount-- <= 0) {
                   2491:                                        printf("softdep ");
                   2492:                                        goto fail;
                   2493:                                }
                   2494:                        }
                   2495:                }
1.40      mycroft  2496:                if (nbusy == 0)
                   2497:                        break;
1.138     bouyer   2498:                if (nbusy_prev == 0)
                   2499:                        nbusy_prev = nbusy;
1.57      christos 2500:                printf("%d ", nbusy);
1.138     bouyer   2501:                tsleep(&nbusy, PRIBIO, "bflush",
                   2502:                    (iter == 0) ? 1 : hz / 25 * iter);
                   2503:                if (nbusy >= nbusy_prev) /* we didn't flush anything */
                   2504:                        iter++;
                   2505:                else
                   2506:                        nbusy_prev = nbusy;
1.40      mycroft  2507:        }
1.73      thorpej  2508:        if (nbusy) {
1.113     fvdl     2509: fail:
1.124     augustss 2510: #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
1.108     simonb   2511:                printf("giving up\nPrinting vnodes for busy buffers\n");
                   2512:                for (bp = &buf[nbuf]; --bp >= buf; )
1.133     fvdl     2513:                        if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1.109     thorpej  2514:                                vprint(NULL, bp->b_vp);
1.124     augustss 2515:
                   2516: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   2517:                Debugger();
                   2518: #endif
                   2519:
                   2520: #else  /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.57      christos 2521:                printf("giving up\n");
1.124     augustss 2522: #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
1.84      thorpej  2523:                return;
1.73      thorpej  2524:        } else
1.57      christos 2525:                printf("done\n");
1.73      thorpej  2526:
1.84      thorpej  2527:        /*
                   2528:         * If we've panic'd, don't make the situation potentially
                   2529:         * worse by unmounting the file systems.
                   2530:         */
                   2531:        if (panicstr != NULL)
                   2532:                return;
                   2533:
                   2534:        /* Release inodes held by texts before update. */
1.73      thorpej  2535: #ifdef notdef
1.84      thorpej  2536:        vnshutdown();
1.73      thorpej  2537: #endif
1.84      thorpej  2538:        /* Unmount file systems. */
1.128     sommerfe 2539:        vfs_unmountall(p);
1.58      thorpej  2540: }
                   2541:
                   2542: /*
                   2543:  * Mount the root file system.  If the operator didn't specify a
                   2544:  * file system to use, try all possible file systems until one
                   2545:  * succeeds.
                   2546:  */
                   2547: int
                   2548: vfs_mountroot()
                   2549: {
1.79      thorpej  2550:        struct vfsops *v;
1.58      thorpej  2551:
                   2552:        if (root_device == NULL)
                   2553:                panic("vfs_mountroot: root device unknown");
                   2554:
                   2555:        switch (root_device->dv_class) {
                   2556:        case DV_IFNET:
                   2557:                if (rootdev != NODEV)
                   2558:                        panic("vfs_mountroot: rootdev set for DV_IFNET");
                   2559:                break;
                   2560:
                   2561:        case DV_DISK:
                   2562:                if (rootdev == NODEV)
                   2563:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
                   2564:                break;
                   2565:
                   2566:        default:
                   2567:                printf("%s: inappropriate for root file system\n",
                   2568:                    root_device->dv_xname);
                   2569:                return (ENODEV);
                   2570:        }
                   2571:
                   2572:        /*
                   2573:         * If user specified a file system, use it.
                   2574:         */
                   2575:        if (mountroot != NULL)
                   2576:                return ((*mountroot)());
                   2577:
                   2578:        /*
                   2579:         * Try each file system currently configured into the kernel.
                   2580:         */
1.79      thorpej  2581:        for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
                   2582:                if (v->vfs_mountroot == NULL)
1.58      thorpej  2583:                        continue;
                   2584: #ifdef DEBUG
1.79      thorpej  2585:                printf("mountroot: trying %s...\n", v->vfs_name);
1.58      thorpej  2586: #endif
1.79      thorpej  2587:                if ((*v->vfs_mountroot)() == 0) {
                   2588:                        printf("root file system type: %s\n", v->vfs_name);
                   2589:                        break;
1.58      thorpej  2590:                }
                   2591:        }
                   2592:
1.79      thorpej  2593:        if (v == NULL) {
                   2594:                printf("no file system for %s", root_device->dv_xname);
                   2595:                if (root_device->dv_class == DV_DISK)
                   2596:                        printf(" (dev 0x%x)", rootdev);
                   2597:                printf("\n");
                   2598:                return (EFTYPE);
                   2599:        }
                   2600:        return (0);
1.58      thorpej  2601: }
                   2602:
                   2603: /*
                   2604:  * Given a file system name, look up the vfsops for that
                   2605:  * file system, or return NULL if file system isn't present
                   2606:  * in the kernel.
                   2607:  */
                   2608: struct vfsops *
                   2609: vfs_getopsbyname(name)
                   2610:        const char *name;
                   2611: {
1.79      thorpej  2612:        struct vfsops *v;
                   2613:
                   2614:        for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
                   2615:                if (strcmp(v->vfs_name, name) == 0)
                   2616:                        break;
                   2617:        }
                   2618:
                   2619:        return (v);
                   2620: }
                   2621:
                   2622: /*
                   2623:  * Establish a file system and initialize it.
                   2624:  */
                   2625: int
                   2626: vfs_attach(vfs)
                   2627:        struct vfsops *vfs;
                   2628: {
                   2629:        struct vfsops *v;
                   2630:        int error = 0;
                   2631:
1.58      thorpej  2632:
1.79      thorpej  2633:        /*
                   2634:         * Make sure this file system doesn't already exist.
                   2635:         */
1.155.2.4  thorpej  2636:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2637:                if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
                   2638:                        error = EEXIST;
                   2639:                        goto out;
                   2640:                }
                   2641:        }
                   2642:
                   2643:        /*
                   2644:         * Initialize the vnode operations for this file system.
                   2645:         */
                   2646:        vfs_opv_init(vfs->vfs_opv_descs);
                   2647:
                   2648:        /*
                   2649:         * Now initialize the file system itself.
                   2650:         */
                   2651:        (*vfs->vfs_init)();
                   2652:
                   2653:        /*
                   2654:         * ...and link it into the kernel's list.
                   2655:         */
                   2656:        LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
                   2657:
                   2658:        /*
                   2659:         * Sanity: make sure the reference count is 0.
                   2660:         */
                   2661:        vfs->vfs_refcount = 0;
                   2662:
                   2663:  out:
                   2664:        return (error);
                   2665: }
                   2666:
                   2667: /*
                   2668:  * Remove a file system from the kernel.
                   2669:  */
                   2670: int
                   2671: vfs_detach(vfs)
                   2672:        struct vfsops *vfs;
                   2673: {
                   2674:        struct vfsops *v;
                   2675:
                   2676:        /*
                   2677:         * Make sure no one is using the filesystem.
                   2678:         */
                   2679:        if (vfs->vfs_refcount != 0)
                   2680:                return (EBUSY);
                   2681:
                   2682:        /*
                   2683:         * ...and remove it from the kernel's list.
                   2684:         */
1.155.2.4  thorpej  2685:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2686:                if (v == vfs) {
                   2687:                        LIST_REMOVE(v, vfs_list);
                   2688:                        break;
                   2689:                }
                   2690:        }
                   2691:
                   2692:        if (v == NULL)
                   2693:                return (ESRCH);
1.121     jdolecek 2694:
                   2695:        /*
                   2696:         * Now run the file system-specific cleanups.
                   2697:         */
                   2698:        (*vfs->vfs_done)();
1.79      thorpej  2699:
                   2700:        /*
                   2701:         * Free the vnode operations vector.
                   2702:         */
                   2703:        vfs_opv_free(vfs->vfs_opv_descs);
                   2704:        return (0);
1.29      cgd      2705: }
1.125     chs      2706:
1.155.2.4  thorpej  2707: void
                   2708: vfs_reinit(void)
                   2709: {
                   2710:        struct vfsops *vfs;
                   2711:
                   2712:        LIST_FOREACH(vfs, &vfs_list, vfs_list) {
                   2713:                if (vfs->vfs_reinit) {
                   2714:                        (*vfs->vfs_reinit)();
                   2715:                }
                   2716:        }
                   2717: }
                   2718:
1.125     chs      2719: #ifdef DDB
                   2720: const char buf_flagbits[] =
1.142     chs      2721:        "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
1.125     chs      2722:        "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
1.142     chs      2723:        "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
                   2724:        "\32XXX\33VFLUSH";
1.125     chs      2725:
                   2726: void
                   2727: vfs_buf_print(bp, full, pr)
                   2728:        struct buf *bp;
                   2729:        int full;
                   2730:        void (*pr) __P((const char *, ...));
                   2731: {
                   2732:        char buf[1024];
                   2733:
                   2734:        (*pr)("  vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n",
                   2735:                  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
                   2736:
                   2737:        bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
                   2738:        (*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
                   2739:
1.155.2.4  thorpej  2740:        (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1.125     chs      2741:                  bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.142     chs      2742:        (*pr)("  data %p saveaddr %p dep %p\n",
                   2743:                  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
1.125     chs      2744:        (*pr)("  iodone %p\n", bp->b_iodone);
                   2745: }
                   2746:
                   2747:
                   2748: const char vnode_flagbits[] =
1.155.2.4  thorpej  2749:        "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP"
                   2750:        "\11XLOCK\12XWANT\13BWAIT\14ALIASED"
1.148     enami    2751:        "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
1.125     chs      2752:
                   2753: const char *vnode_types[] = {
                   2754:        "VNON",
                   2755:        "VREG",
                   2756:        "VDIR",
                   2757:        "VBLK",
                   2758:        "VCHR",
                   2759:        "VLNK",
                   2760:        "VSOCK",
                   2761:        "VFIFO",
                   2762:        "VBAD",
                   2763: };
                   2764:
                   2765: const char *vnode_tags[] = {
                   2766:        "VT_NON",
                   2767:        "VT_UFS",
                   2768:        "VT_NFS",
                   2769:        "VT_MFS",
                   2770:        "VT_MSDOSFS",
                   2771:        "VT_LFS",
                   2772:        "VT_LOFS",
                   2773:        "VT_FDESC",
                   2774:        "VT_PORTAL",
                   2775:        "VT_NULL",
                   2776:        "VT_UMAP",
                   2777:        "VT_KERNFS",
                   2778:        "VT_PROCFS",
                   2779:        "VT_AFS",
                   2780:        "VT_ISOFS",
                   2781:        "VT_UNION",
                   2782:        "VT_ADOSFS",
                   2783:        "VT_EXT2FS",
                   2784:        "VT_CODA",
                   2785:        "VT_FILECORE",
                   2786:        "VT_NTFS",
                   2787:        "VT_VFS",
                   2788:        "VT_OVERLAY"
                   2789: };
                   2790:
                   2791: void
                   2792: vfs_vnode_print(vp, full, pr)
                   2793:        struct vnode *vp;
                   2794:        int full;
                   2795:        void (*pr) __P((const char *, ...));
                   2796: {
1.142     chs      2797:        char buf[256];
1.125     chs      2798:        const char *vtype, *vtag;
1.155.2.7! jdolecek 2799:        int tmp;
1.125     chs      2800:
1.155.2.4  thorpej  2801:        uvm_object_printit(&vp->v_uobj, full, pr);
1.125     chs      2802:        bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
                   2803:        (*pr)("\nVNODE flags %s\n", buf);
1.155.2.4  thorpej  2804:        (*pr)("mp %p numoutput %d size 0x%llx\n",
                   2805:              vp->v_mount, vp->v_numoutput, vp->v_size);
1.125     chs      2806:
1.155.2.4  thorpej  2807:        (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
1.125     chs      2808:              vp->v_data, vp->v_usecount, vp->v_writecount,
                   2809:              vp->v_holdcnt, vp->v_numoutput);
                   2810:
1.155.2.7! jdolecek 2811:        vtype = ((tmp = vp->v_type) >= 0 &&
1.125     chs      2812:                 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
                   2813:                vnode_types[vp->v_type] : "UNKNOWN";
1.155.2.7! jdolecek 2814:        vtag = ((tmp = vp->v_tag) >= 0 &&
1.125     chs      2815:                vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
                   2816:                vnode_tags[vp->v_tag] : "UNKNOWN";
                   2817:
1.155.2.4  thorpej  2818:        (*pr)("type %s(%d) tag %s(%d) id 0x%lx mount %p typedata %p\n",
1.125     chs      2819:              vtype, vp->v_type, vtag, vp->v_tag,
                   2820:              vp->v_id, vp->v_mount, vp->v_mountedhere);
                   2821:
                   2822:        if (full) {
                   2823:                struct buf *bp;
                   2824:
                   2825:                (*pr)("clean bufs:\n");
1.142     chs      2826:                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                   2827:                        (*pr)(" bp %p\n", bp);
1.125     chs      2828:                        vfs_buf_print(bp, full, pr);
                   2829:                }
                   2830:
                   2831:                (*pr)("dirty bufs:\n");
1.142     chs      2832:                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                   2833:                        (*pr)(" bp %p\n", bp);
1.125     chs      2834:                        vfs_buf_print(bp, full, pr);
                   2835:                }
                   2836:        }
                   2837: }
                   2838: #endif

CVSweb <webmaster@jp.NetBSD.org>