[BACK]Return to vfs_subr.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_subr.c, Revision 1.261

1.261   ! reinoud     1: /*     $NetBSD: vfs_subr.c,v 1.260 2006/01/16 21:44:46 yamt Exp $      */
1.74      thorpej     2:
                      3: /*-
1.243     mycroft     4:  * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
1.74      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
1.243     mycroft    10:  * This code is derived from software contributed to The NetBSD Foundation
                     11:  * by Charles M. Hannum.
1.74      thorpej    12:  *
                     13:  * Redistribution and use in source and binary forms, with or without
                     14:  * modification, are permitted provided that the following conditions
                     15:  * are met:
                     16:  * 1. Redistributions of source code must retain the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer.
                     18:  * 2. Redistributions in binary form must reproduce the above copyright
                     19:  *    notice, this list of conditions and the following disclaimer in the
                     20:  *    documentation and/or other materials provided with the distribution.
                     21:  * 3. All advertising materials mentioning features or use of this software
                     22:  *    must display the following acknowledgement:
                     23:  *     This product includes software developed by the NetBSD
                     24:  *     Foundation, Inc. and its contributors.
                     25:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     26:  *    contributors may be used to endorse or promote products derived
                     27:  *    from this software without specific prior written permission.
                     28:  *
                     29:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     30:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     31:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     32:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     33:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     34:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     35:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     36:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     37:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     38:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     39:  * POSSIBILITY OF SUCH DAMAGE.
                     40:  */
1.32      cgd        41:
1.29      cgd        42: /*
1.30      mycroft    43:  * Copyright (c) 1989, 1993
                     44:  *     The Regents of the University of California.  All rights reserved.
1.29      cgd        45:  * (c) UNIX System Laboratories, Inc.
                     46:  * All or some portions of this file are derived from material licensed
                     47:  * to the University of California by American Telephone and Telegraph
                     48:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     49:  * the permission of UNIX System Laboratories, Inc.
                     50:  *
                     51:  * Redistribution and use in source and binary forms, with or without
                     52:  * modification, are permitted provided that the following conditions
                     53:  * are met:
                     54:  * 1. Redistributions of source code must retain the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer.
                     56:  * 2. Redistributions in binary form must reproduce the above copyright
                     57:  *    notice, this list of conditions and the following disclaimer in the
                     58:  *    documentation and/or other materials provided with the distribution.
1.204     agc        59:  * 3. Neither the name of the University nor the names of its contributors
1.29      cgd        60:  *    may be used to endorse or promote products derived from this software
                     61:  *    without specific prior written permission.
                     62:  *
                     63:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     64:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     65:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     66:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     67:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     68:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     69:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     70:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     71:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     72:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     73:  * SUCH DAMAGE.
                     74:  *
1.32      cgd        75:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
1.29      cgd        76:  */
                     77:
                     78: /*
                     79:  * External virtual filesystem routines
                     80:  */
1.162     lukem      81:
                     82: #include <sys/cdefs.h>
1.261   ! reinoud    83: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.260 2006/01/16 21:44:46 yamt Exp $");
1.78      mrg        84:
1.195     christos   85: #include "opt_inet.h"
1.125     chs        86: #include "opt_ddb.h"
1.95      thorpej    87: #include "opt_compat_netbsd.h"
1.97      christos   88: #include "opt_compat_43.h"
1.29      cgd        89:
                     90: #include <sys/param.h>
1.30      mycroft    91: #include <sys/systm.h>
1.29      cgd        92: #include <sys/proc.h>
1.138     bouyer     93: #include <sys/kernel.h>
1.29      cgd        94: #include <sys/mount.h>
1.46      mycroft    95: #include <sys/fcntl.h>
1.29      cgd        96: #include <sys/vnode.h>
1.30      mycroft    97: #include <sys/stat.h>
1.29      cgd        98: #include <sys/namei.h>
                     99: #include <sys/ucred.h>
                    100: #include <sys/buf.h>
                    101: #include <sys/errno.h>
                    102: #include <sys/malloc.h>
1.30      mycroft   103: #include <sys/domain.h>
                    104: #include <sys/mbuf.h>
1.184     thorpej   105: #include <sys/sa.h>
1.51      christos  106: #include <sys/syscallargs.h>
1.58      thorpej   107: #include <sys/device.h>
1.192     christos  108: #include <sys/filedesc.h>
1.50      christos  109:
1.30      mycroft   110: #include <miscfs/specfs/specdev.h>
1.113     fvdl      111: #include <miscfs/genfs/genfs.h>
                    112: #include <miscfs/syncfs/syncfs.h>
1.30      mycroft   113:
1.125     chs       114: #include <uvm/uvm.h>
1.255     yamt      115: #include <uvm/uvm_readahead.h>
1.125     chs       116: #include <uvm/uvm_ddb.h>
1.129     mrg       117:
                    118: #include <sys/sysctl.h>
1.77      mrg       119:
1.189     jdolecek  120: const enum vtype iftovt_tab[16] = {
1.30      mycroft   121:        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
                    122:        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
                    123: };
1.146     jdolecek  124: const int      vttoif_tab[9] = {
1.30      mycroft   125:        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
                    126:        S_IFSOCK, S_IFIFO, S_IFMT,
                    127: };
                    128:
1.31      mycroft   129: int doforce = 1;               /* 1 => permit forcible unmounting */
                    130: int prtactive = 0;             /* 1 => print out reclaim of active vnodes */
1.29      cgd       131:
1.117     fvdl      132: extern int dovfsusermount;     /* 1 => permit any user to mount filesystems */
                    133:
1.29      cgd       134: /*
                    135:  * Insq/Remq for the vnode usage lists.
                    136:  */
                    137: #define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
                    138: #define        bufremvn(bp) {                                                  \
                    139:        LIST_REMOVE(bp, b_vnbufs);                                      \
                    140:        (bp)->b_vnbufs.le_next = NOLIST;                                \
1.217     junyoung  141: }
1.113     fvdl      142: /* TAILQ_HEAD(freelst, vnode) vnode_free_list =        vnode free list (in vnode.h) */
                    143: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114     enami     144: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113     fvdl      145:
1.55      cgd       146: struct mntlist mountlist =                     /* mounted filesystem list */
                    147:     CIRCLEQ_HEAD_INITIALIZER(mountlist);
1.79      thorpej   148: struct vfs_list_head vfs_list =                        /* vfs list */
1.118     mycroft   149:     LIST_HEAD_INITIALIZER(vfs_list);
1.79      thorpej   150:
1.135     sommerfe  151: struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
                    152: static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
                    153: struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
                    154: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
                    155: struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
1.80      fvdl      156:
1.187     pk        157: /* XXX - gross; single global lock to protect v_numoutput */
                    158: struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
                    159:
1.79      thorpej   160: /*
                    161:  * These define the root filesystem and device.
                    162:  */
                    163: struct mount *rootfs;
                    164: struct vnode *rootvnode;
1.80      fvdl      165: struct device *root_device;                    /* root device */
1.79      thorpej   166:
1.223     simonb    167: POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
                    168:     &pool_allocator_nointr);
1.186     thorpej   169:
                    170: MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
1.93      thorpej   171:
1.89      kleink    172: /*
                    173:  * Local declarations.
                    174:  */
1.260     yamt      175: static void insmntque(struct vnode *, struct mount *);
                    176: static int getdevvp(dev_t, struct vnode **, enum vtype);
                    177: static void vclean(struct vnode *, int, struct lwp *);
1.256     christos  178: static struct vnode *getcleanvnode(struct lwp *);
1.51      christos  179:
                    180: #ifdef DEBUG
1.217     junyoung  181: void printlockedvnodes(void);
1.51      christos  182: #endif
                    183:
1.29      cgd       184: /*
1.30      mycroft   185:  * Initialize the vnode management data structures.
1.29      cgd       186:  */
1.50      christos  187: void
1.247     thorpej   188: vntblinit(void)
1.29      cgd       189: {
1.93      thorpej   190:
1.113     fvdl      191:        /*
                    192:         * Initialize the filesystem syncer.
                    193:         */
                    194:        vn_initialize_syncerd();
1.29      cgd       195: }
                    196:
1.202     yamt      197: int
1.256     christos  198: vfs_drainvnodes(long target, struct lwp *l)
1.202     yamt      199: {
                    200:
                    201:        simple_lock(&vnode_free_list_slock);
                    202:        while (numvnodes > target) {
                    203:                struct vnode *vp;
                    204:
1.256     christos  205:                vp = getcleanvnode(l);
1.202     yamt      206:                if (vp == NULL)
                    207:                        return EBUSY; /* give up */
                    208:                pool_put(&vnode_pool, vp);
                    209:                simple_lock(&vnode_free_list_slock);
                    210:                numvnodes--;
                    211:        }
                    212:        simple_unlock(&vnode_free_list_slock);
                    213:
                    214:        return 0;
                    215: }
                    216:
                    217: /*
                    218:  * grab a vnode from freelist and clean it.
                    219:  */
                    220: struct vnode *
1.256     christos  221: getcleanvnode(struct lwp *l)
1.202     yamt      222: {
                    223:        struct vnode *vp;
1.208     hannken   224:        struct mount *mp;
1.202     yamt      225:        struct freelst *listhd;
                    226:
                    227:        LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
1.229     yamt      228:
                    229:        listhd = &vnode_free_list;
                    230: try_nextlist:
                    231:        TAILQ_FOREACH(vp, listhd, v_freelist) {
1.208     hannken   232:                if (!simple_lock_try(&vp->v_interlock))
                    233:                        continue;
1.227     yamt      234:                /*
                    235:                 * as our lwp might hold the underlying vnode locked,
                    236:                 * don't try to reclaim the VLAYER vnode if it's locked.
                    237:                 */
1.228     yamt      238:                if ((vp->v_flag & VXLOCK) == 0 &&
                    239:                    ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.208     hannken   240:                        if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
1.202     yamt      241:                                break;
                    242:                }
1.208     hannken   243:                mp = NULL;
                    244:                simple_unlock(&vp->v_interlock);
1.202     yamt      245:        }
                    246:
                    247:        if (vp == NULLVP) {
1.229     yamt      248:                if (listhd == &vnode_free_list) {
                    249:                        listhd = &vnode_hold_list;
                    250:                        goto try_nextlist;
                    251:                }
1.202     yamt      252:                simple_unlock(&vnode_free_list_slock);
                    253:                return NULLVP;
                    254:        }
                    255:
                    256:        if (vp->v_usecount)
                    257:                panic("free vnode isn't, vp %p", vp);
                    258:        TAILQ_REMOVE(listhd, vp, v_freelist);
                    259:        /* see comment on why 0xdeadb is set at end of vgone (below) */
                    260:        vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
                    261:        simple_unlock(&vnode_free_list_slock);
                    262:        vp->v_lease = NULL;
                    263:
                    264:        if (vp->v_type != VBAD)
1.256     christos  265:                vgonel(vp, l);
1.202     yamt      266:        else
                    267:                simple_unlock(&vp->v_interlock);
1.208     hannken   268:        vn_finished_write(mp, 0);
1.202     yamt      269: #ifdef DIAGNOSTIC
                    270:        if (vp->v_data || vp->v_uobj.uo_npages ||
                    271:            TAILQ_FIRST(&vp->v_uobj.memq))
                    272:                panic("cleaned vnode isn't, vp %p", vp);
                    273:        if (vp->v_numoutput)
                    274:                panic("clean vnode has pending I/O's, vp %p", vp);
                    275: #endif
                    276:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                    277:
                    278:        return vp;
                    279: }
                    280:
1.29      cgd       281: /*
1.80      fvdl      282:  * Mark a mount point as busy. Used to synchronize access and to delay
                    283:  * unmounting. Interlock is not released on failure.
1.29      cgd       284:  */
1.50      christos  285: int
1.247     thorpej   286: vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp)
1.29      cgd       287: {
1.80      fvdl      288:        int lkflags;
1.29      cgd       289:
1.207     dbj       290:        while (mp->mnt_iflag & IMNT_UNMOUNT) {
1.224     pk        291:                int gone, n;
1.217     junyoung  292:
1.80      fvdl      293:                if (flags & LK_NOWAIT)
                    294:                        return (ENOENT);
1.113     fvdl      295:                if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
1.256     christos  296:                    && mp->mnt_unmounter == curlwp)
1.113     fvdl      297:                        return (EDEADLK);
1.80      fvdl      298:                if (interlkp)
                    299:                        simple_unlock(interlkp);
                    300:                /*
                    301:                 * Since all busy locks are shared except the exclusive
                    302:                 * lock granted when unmounting, the only place that a
                    303:                 * wakeup needs to be done is at the release of the
                    304:                 * exclusive lock at the end of dounmount.
                    305:                 */
1.224     pk        306:                simple_lock(&mp->mnt_slock);
1.103     sommerfe  307:                mp->mnt_wcnt++;
1.224     pk        308:                ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
                    309:                n = --mp->mnt_wcnt;
                    310:                simple_unlock(&mp->mnt_slock);
1.207     dbj       311:                gone = mp->mnt_iflag & IMNT_GONE;
1.217     junyoung  312:
1.224     pk        313:                if (n == 0)
1.103     sommerfe  314:                        wakeup(&mp->mnt_wcnt);
1.80      fvdl      315:                if (interlkp)
                    316:                        simple_lock(interlkp);
1.103     sommerfe  317:                if (gone)
                    318:                        return (ENOENT);
1.80      fvdl      319:        }
                    320:        lkflags = LK_SHARED;
                    321:        if (interlkp)
                    322:                lkflags |= LK_INTERLOCK;
                    323:        if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
                    324:                panic("vfs_busy: unexpected lock failure");
1.29      cgd       325:        return (0);
                    326: }
                    327:
                    328: /*
1.80      fvdl      329:  * Free a busy filesystem.
1.29      cgd       330:  */
                    331: void
1.247     thorpej   332: vfs_unbusy(struct mount *mp)
1.29      cgd       333: {
                    334:
1.80      fvdl      335:        lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29      cgd       336: }
                    337:
                    338: /*
1.80      fvdl      339:  * Lookup a filesystem type, and if found allocate and initialize
                    340:  * a mount structure for it.
                    341:  *
                    342:  * Devname is usually updated by mount(8) after booting.
1.29      cgd       343:  */
1.50      christos  344: int
1.247     thorpej   345: vfs_rootmountalloc(const char *fstypename, const char *devname,
                    346:     struct mount **mpp)
1.29      cgd       347: {
1.80      fvdl      348:        struct vfsops *vfsp = NULL;
                    349:        struct mount *mp;
1.29      cgd       350:
1.152     jdolecek  351:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.80      fvdl      352:                if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
                    353:                        break;
                    354:
                    355:        if (vfsp == NULL)
                    356:                return (ENODEV);
                    357:        mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91      perry     358:        memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80      fvdl      359:        lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
1.224     pk        360:        simple_lock_init(&mp->mnt_slock);
1.80      fvdl      361:        (void)vfs_busy(mp, LK_NOWAIT, 0);
                    362:        LIST_INIT(&mp->mnt_vnodelist);
                    363:        mp->mnt_op = vfsp;
                    364:        mp->mnt_flag = MNT_RDONLY;
                    365:        mp->mnt_vnodecovered = NULLVP;
1.230     hannken   366:        mp->mnt_leaf = mp;
1.80      fvdl      367:        vfsp->vfs_refcount++;
                    368:        strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
                    369:        mp->mnt_stat.f_mntonname[0] = '/';
                    370:        (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
                    371:        *mpp = mp;
1.29      cgd       372:        return (0);
                    373: }
                    374:
                    375: /*
                    376:  * Lookup a mount point by filesystem identifier.
                    377:  */
                    378: struct mount *
1.247     thorpej   379: vfs_getvfs(fsid_t *fsid)
1.29      cgd       380: {
1.123     augustss  381:        struct mount *mp;
1.29      cgd       382:
1.80      fvdl      383:        simple_lock(&mountlist_slock);
1.177     matt      384:        CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
1.221     christos  385:                if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
                    386:                    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
1.80      fvdl      387:                        simple_unlock(&mountlist_slock);
1.29      cgd       388:                        return (mp);
1.80      fvdl      389:                }
                    390:        }
                    391:        simple_unlock(&mountlist_slock);
1.29      cgd       392:        return ((struct mount *)0);
                    393: }
                    394:
                    395: /*
                    396:  * Get a new unique fsid
                    397:  */
                    398: void
1.247     thorpej   399: vfs_getnewfsid(struct mount *mp)
1.29      cgd       400: {
                    401:        static u_short xxxfs_mntid;
                    402:        fsid_t tfsid;
1.80      fvdl      403:        int mtype;
1.29      cgd       404:
1.80      fvdl      405:        simple_lock(&mntid_slock);
1.127     assar     406:        mtype = makefstype(mp->mnt_op->vfs_name);
1.221     christos  407:        mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
                    408:        mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
                    409:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1.29      cgd       410:        if (xxxfs_mntid == 0)
                    411:                ++xxxfs_mntid;
1.221     christos  412:        tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
                    413:        tfsid.__fsid_val[1] = mtype;
1.177     matt      414:        if (!CIRCLEQ_EMPTY(&mountlist)) {
1.80      fvdl      415:                while (vfs_getvfs(&tfsid)) {
1.221     christos  416:                        tfsid.__fsid_val[0]++;
1.29      cgd       417:                        xxxfs_mntid++;
                    418:                }
                    419:        }
1.221     christos  420:        mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
                    421:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1.80      fvdl      422:        simple_unlock(&mntid_slock);
1.29      cgd       423: }
                    424:
                    425: /*
1.30      mycroft   426:  * Make a 'unique' number from a mount type name.
1.29      cgd       427:  */
                    428: long
1.247     thorpej   429: makefstype(const char *type)
1.29      cgd       430: {
                    431:        long rv;
                    432:
                    433:        for (rv = 0; *type; type++) {
                    434:                rv <<= 2;
                    435:                rv ^= *type;
                    436:        }
                    437:        return rv;
                    438: }
1.30      mycroft   439:
1.80      fvdl      440:
1.30      mycroft   441: /*
                    442:  * Set vnode attributes to VNOVAL
                    443:  */
                    444: void
1.247     thorpej   445: vattr_null(struct vattr *vap)
1.30      mycroft   446: {
                    447:
                    448:        vap->va_type = VNON;
1.75      enami     449:
                    450:        /*
                    451:         * Assign individually so that it is safe even if size and
                    452:         * sign of each member are varied.
                    453:         */
                    454:        vap->va_mode = VNOVAL;
                    455:        vap->va_nlink = VNOVAL;
                    456:        vap->va_uid = VNOVAL;
                    457:        vap->va_gid = VNOVAL;
                    458:        vap->va_fsid = VNOVAL;
                    459:        vap->va_fileid = VNOVAL;
1.30      mycroft   460:        vap->va_size = VNOVAL;
1.75      enami     461:        vap->va_blocksize = VNOVAL;
1.76      christos  462:        vap->va_atime.tv_sec =
                    463:            vap->va_mtime.tv_sec =
1.191     enami     464:            vap->va_ctime.tv_sec =
                    465:            vap->va_birthtime.tv_sec = VNOVAL;
1.76      christos  466:        vap->va_atime.tv_nsec =
                    467:            vap->va_mtime.tv_nsec =
1.191     enami     468:            vap->va_ctime.tv_nsec =
                    469:            vap->va_birthtime.tv_nsec = VNOVAL;
1.75      enami     470:        vap->va_gen = VNOVAL;
                    471:        vap->va_flags = VNOVAL;
                    472:        vap->va_rdev = VNOVAL;
1.30      mycroft   473:        vap->va_bytes = VNOVAL;
                    474:        vap->va_vaflags = 0;
                    475: }
                    476:
                    477: /*
                    478:  * Routines having to do with the management of the vnode table.
                    479:  */
1.217     junyoung  480: extern int (**dead_vnodeop_p)(void *);
1.30      mycroft   481: long numvnodes;
                    482:
1.29      cgd       483: /*
                    484:  * Return the next vnode from the free list.
                    485:  */
1.50      christos  486: int
1.247     thorpej   487: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
                    488:     struct vnode **vpp)
1.29      cgd       489: {
1.142     chs       490:        extern struct uvm_pagerops uvm_vnodeops;
                    491:        struct uvm_object *uobj;
1.256     christos  492:        struct lwp *l = curlwp;         /* XXX */
1.113     fvdl      493:        static int toggle;
1.80      fvdl      494:        struct vnode *vp;
1.153     thorpej   495:        int error = 0, tryalloc;
1.158     chs       496:
1.159     enami     497:  try_again:
1.103     sommerfe  498:        if (mp) {
                    499:                /*
1.106     sommerfe  500:                 * Mark filesystem busy while we're creating a vnode.
                    501:                 * If unmount is in progress, this will wait; if the
                    502:                 * unmount succeeds (only if umount -f), this will
                    503:                 * return an error.  If the unmount fails, we'll keep
                    504:                 * going afterwards.
                    505:                 * (This puts the per-mount vnode list logically under
                    506:                 * the protection of the vfs_busy lock).
1.103     sommerfe  507:                 */
1.113     fvdl      508:                error = vfs_busy(mp, LK_RECURSEFAIL, 0);
                    509:                if (error && error != EDEADLK)
1.103     sommerfe  510:                        return error;
                    511:        }
1.29      cgd       512:
1.113     fvdl      513:        /*
                    514:         * We must choose whether to allocate a new vnode or recycle an
                    515:         * existing one. The criterion for allocating a new one is that
                    516:         * the total number of vnodes is less than the number desired or
                    517:         * there are no vnodes on either free list. Generally we only
                    518:         * want to recycle vnodes that have no buffers associated with
                    519:         * them, so we look first on the vnode_free_list. If it is empty,
                    520:         * we next consider vnodes with referencing buffers on the
                    521:         * vnode_hold_list. The toggle ensures that half the time we
                    522:         * will use a buffer from the vnode_hold_list, and half the time
                    523:         * we will allocate a new one unless the list has grown to twice
                    524:         * the desired size. We are reticent to recycle vnodes from the
                    525:         * vnode_hold_list because we will lose the identity of all its
                    526:         * referencing buffers.
                    527:         */
1.142     chs       528:
1.153     thorpej   529:        vp = NULL;
                    530:
                    531:        simple_lock(&vnode_free_list_slock);
                    532:
1.113     fvdl      533:        toggle ^= 1;
                    534:        if (numvnodes > 2 * desiredvnodes)
                    535:                toggle = 0;
                    536:
1.153     thorpej   537:        tryalloc = numvnodes < desiredvnodes ||
1.159     enami     538:            (TAILQ_FIRST(&vnode_free_list) == NULL &&
                    539:             (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153     thorpej   540:
                    541:        if (tryalloc &&
                    542:            (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.206     yamt      543:                numvnodes++;
1.80      fvdl      544:                simple_unlock(&vnode_free_list_slock);
1.142     chs       545:                memset(vp, 0, sizeof(*vp));
1.248     yamt      546:                UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1);
1.225     yamt      547:                /*
                    548:                 * done by memset() above.
                    549:                 *      LIST_INIT(&vp->v_nclist);
                    550:                 *      LIST_INIT(&vp->v_dnclist);
                    551:                 */
1.29      cgd       552:        } else {
1.256     christos  553:                vp = getcleanvnode(l);
1.80      fvdl      554:                /*
                    555:                 * Unless this is a bad time of the month, at most
                    556:                 * the first NCPUS items on the free list are
                    557:                 * locked, so this is close enough to being empty.
                    558:                 */
                    559:                if (vp == NULLVP) {
1.113     fvdl      560:                        if (mp && error != EDEADLK)
                    561:                                vfs_unbusy(mp);
1.153     thorpej   562:                        if (tryalloc) {
                    563:                                printf("WARNING: unable to allocate new "
                    564:                                    "vnode, retrying...\n");
                    565:                                (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
                    566:                                goto try_again;
                    567:                        }
1.132     jdolecek  568:                        tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29      cgd       569:                        *vpp = 0;
                    570:                        return (ENFILE);
                    571:                }
1.248     yamt      572:                vp->v_usecount = 1;
1.29      cgd       573:                vp->v_flag = 0;
1.158     chs       574:                vp->v_socket = NULL;
1.29      cgd       575:        }
                    576:        vp->v_type = VNON;
1.104     wrstuden  577:        vp->v_vnlock = &vp->v_lock;
                    578:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.225     yamt      579:        KASSERT(LIST_EMPTY(&vp->v_nclist));
                    580:        KASSERT(LIST_EMPTY(&vp->v_dnclist));
1.29      cgd       581:        vp->v_tag = tag;
                    582:        vp->v_op = vops;
                    583:        insmntque(vp, mp);
1.30      mycroft   584:        *vpp = vp;
                    585:        vp->v_data = 0;
1.240     christos  586:        simple_lock_init(&vp->v_interlock);
1.142     chs       587:
                    588:        /*
                    589:         * initialize uvm_object within vnode.
                    590:         */
                    591:
1.158     chs       592:        uobj = &vp->v_uobj;
                    593:        KASSERT(uobj->pgops == &uvm_vnodeops);
                    594:        KASSERT(uobj->uo_npages == 0);
                    595:        KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
                    596:        vp->v_size = VSIZENOTSET;
1.142     chs       597:
1.113     fvdl      598:        if (mp && error != EDEADLK)
                    599:                vfs_unbusy(mp);
1.29      cgd       600:        return (0);
1.130     fvdl      601: }
                    602:
                    603: /*
                    604:  * This is really just the reverse of getnewvnode(). Needed for
                    605:  * VFS_VGET functions who may need to push back a vnode in case
                    606:  * of a locking race.
                    607:  */
                    608: void
1.247     thorpej   609: ungetnewvnode(struct vnode *vp)
1.130     fvdl      610: {
                    611: #ifdef DIAGNOSTIC
                    612:        if (vp->v_usecount != 1)
1.131     fvdl      613:                panic("ungetnewvnode: busy vnode");
1.130     fvdl      614: #endif
                    615:        vp->v_usecount--;
                    616:        insmntque(vp, NULL);
                    617:        vp->v_type = VBAD;
                    618:
                    619:        simple_lock(&vp->v_interlock);
1.217     junyoung  620:        /*
1.130     fvdl      621:         * Insert at head of LRU list
                    622:         */
                    623:        simple_lock(&vnode_free_list_slock);
                    624:        if (vp->v_holdcnt > 0)
                    625:                TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
                    626:        else
                    627:                TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1.217     junyoung  628:        simple_unlock(&vnode_free_list_slock);
1.130     fvdl      629:        simple_unlock(&vp->v_interlock);
1.29      cgd       630: }
                    631:
                    632: /*
                    633:  * Move a vnode from one mount queue to another.
                    634:  */
1.260     yamt      635: static void
1.247     thorpej   636: insmntque(struct vnode *vp, struct mount *mp)
1.29      cgd       637: {
                    638:
1.103     sommerfe  639: #ifdef DIAGNOSTIC
                    640:        if ((mp != NULL) &&
1.207     dbj       641:            (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113     fvdl      642:            !(mp->mnt_flag & MNT_SOFTDEP) &&
                    643:            vp->v_tag != VT_VFS) {
1.103     sommerfe  644:                panic("insmntque into dying filesystem");
                    645:        }
                    646: #endif
1.217     junyoung  647:
1.80      fvdl      648:        simple_lock(&mntvnode_slock);
1.29      cgd       649:        /*
                    650:         * Delete from old mount point vnode list, if on one.
                    651:         */
                    652:        if (vp->v_mount != NULL)
                    653:                LIST_REMOVE(vp, v_mntvnodes);
                    654:        /*
                    655:         * Insert into list of vnodes for the new mount point, if available.
                    656:         */
1.80      fvdl      657:        if ((vp->v_mount = mp) != NULL)
                    658:                LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
                    659:        simple_unlock(&mntvnode_slock);
1.29      cgd       660: }
                    661:
                    662: /*
                    663:  * Update outstanding I/O count and do wakeup if requested.
                    664:  */
1.50      christos  665: void
1.247     thorpej   666: vwakeup(struct buf *bp)
1.29      cgd       667: {
1.123     augustss  668:        struct vnode *vp;
1.29      cgd       669:
1.50      christos  670:        if ((vp = bp->b_vp) != NULL) {
1.187     pk        671:                /* XXX global lock hack
                    672:                 * can't use v_interlock here since this is called
                    673:                 * in interrupt context from biodone().
                    674:                 */
                    675:                simple_lock(&global_v_numoutput_slock);
1.30      mycroft   676:                if (--vp->v_numoutput < 0)
1.125     chs       677:                        panic("vwakeup: neg numoutput, vp %p", vp);
1.29      cgd       678:                if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
                    679:                        vp->v_flag &= ~VBWAIT;
                    680:                        wakeup((caddr_t)&vp->v_numoutput);
                    681:                }
1.187     pk        682:                simple_unlock(&global_v_numoutput_slock);
1.29      cgd       683:        }
                    684: }
                    685:
                    686: /*
                    687:  * Flush out and invalidate all buffers associated with a vnode.
1.126     mycroft   688:  * Called with the underlying vnode locked, which should prevent new dirty
                    689:  * buffers from being queued.
1.29      cgd       690:  */
1.30      mycroft   691: int
1.256     christos  692: vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct lwp *l,
1.247     thorpej   693:     int slpflag, int slptimeo)
1.29      cgd       694: {
1.126     mycroft   695:        struct buf *bp, *nbp;
1.158     chs       696:        int s, error;
1.166     chs       697:        int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
1.142     chs       698:                (flags & V_SAVE ? PGO_CLEANIT : 0);
                    699:
                    700:        /* XXXUBC this doesn't look at flags or slp* */
1.166     chs       701:        simple_lock(&vp->v_interlock);
                    702:        error = VOP_PUTPAGES(vp, 0, 0, flushflags);
                    703:        if (error) {
                    704:                return error;
1.142     chs       705:        }
1.166     chs       706:
1.30      mycroft   707:        if (flags & V_SAVE) {
1.256     christos  708:                error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, l);
1.126     mycroft   709:                if (error)
1.122     fvdl      710:                        return (error);
1.126     mycroft   711: #ifdef DIAGNOSTIC
1.122     fvdl      712:                s = splbio();
1.126     mycroft   713:                if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
1.125     chs       714:                        panic("vinvalbuf: dirty bufs, vp %p", vp);
1.113     fvdl      715:                splx(s);
1.126     mycroft   716: #endif
1.30      mycroft   717:        }
1.113     fvdl      718:
1.115     fvdl      719:        s = splbio();
                    720:
1.126     mycroft   721: restart:
                    722:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    723:                nbp = LIST_NEXT(bp, b_vnbufs);
1.187     pk        724:                simple_lock(&bp->b_interlock);
1.126     mycroft   725:                if (bp->b_flags & B_BUSY) {
                    726:                        bp->b_flags |= B_WANTED;
1.187     pk        727:                        error = ltsleep((caddr_t)bp,
                    728:                                    slpflag | (PRIBIO + 1) | PNORELOCK,
                    729:                                    "vinvalbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   730:                        if (error) {
                    731:                                splx(s);
                    732:                                return (error);
                    733:                        }
                    734:                        goto restart;
1.113     fvdl      735:                }
1.126     mycroft   736:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        737:                simple_unlock(&bp->b_interlock);
1.126     mycroft   738:                brelse(bp);
                    739:        }
1.30      mycroft   740:
1.126     mycroft   741:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    742:                nbp = LIST_NEXT(bp, b_vnbufs);
1.187     pk        743:                simple_lock(&bp->b_interlock);
1.126     mycroft   744:                if (bp->b_flags & B_BUSY) {
                    745:                        bp->b_flags |= B_WANTED;
1.187     pk        746:                        error = ltsleep((caddr_t)bp,
                    747:                                    slpflag | (PRIBIO + 1) | PNORELOCK,
                    748:                                    "vinvalbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   749:                        if (error) {
                    750:                                splx(s);
                    751:                                return (error);
1.29      cgd       752:                        }
1.126     mycroft   753:                        goto restart;
                    754:                }
                    755:                /*
                    756:                 * XXX Since there are no node locks for NFS, I believe
                    757:                 * there is a slight chance that a delayed write will
                    758:                 * occur while sleeping just above, so check for it.
                    759:                 */
                    760:                if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
                    761: #ifdef DEBUG
                    762:                        printf("buffer still DELWRI\n");
                    763: #endif
1.63      mycroft   764:                        bp->b_flags |= B_BUSY | B_VFLUSH;
1.187     pk        765:                        simple_unlock(&bp->b_interlock);
1.126     mycroft   766:                        VOP_BWRITE(bp);
                    767:                        goto restart;
                    768:                }
                    769:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        770:                simple_unlock(&bp->b_interlock);
1.126     mycroft   771:                brelse(bp);
                    772:        }
                    773:
                    774: #ifdef DIAGNOSTIC
                    775:        if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
                    776:                panic("vinvalbuf: flush failed, vp %p", vp);
1.113     fvdl      777: #endif
1.126     mycroft   778:
                    779:        splx(s);
                    780:
                    781:        return (0);
                    782: }
                    783:
                    784: /*
                    785:  * Destroy any in core blocks past the truncation length.
                    786:  * Called with the underlying vnode locked, which should prevent new dirty
                    787:  * buffers from being queued.
                    788:  */
                    789: int
1.247     thorpej   790: vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
1.126     mycroft   791: {
                    792:        struct buf *bp, *nbp;
1.158     chs       793:        int s, error;
1.166     chs       794:        voff_t off;
                    795:
                    796:        off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
                    797:        simple_lock(&vp->v_interlock);
                    798:        error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
                    799:        if (error) {
                    800:                return error;
                    801:        }
1.126     mycroft   802:
                    803:        s = splbio();
                    804:
                    805: restart:
                    806:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    807:                nbp = LIST_NEXT(bp, b_vnbufs);
                    808:                if (bp->b_lblkno < lbn)
                    809:                        continue;
1.187     pk        810:                simple_lock(&bp->b_interlock);
1.126     mycroft   811:                if (bp->b_flags & B_BUSY) {
                    812:                        bp->b_flags |= B_WANTED;
1.187     pk        813:                        error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
                    814:                            "vtruncbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   815:                        if (error) {
                    816:                                splx(s);
                    817:                                return (error);
1.29      cgd       818:                        }
1.126     mycroft   819:                        goto restart;
1.29      cgd       820:                }
1.126     mycroft   821:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        822:                simple_unlock(&bp->b_interlock);
1.126     mycroft   823:                brelse(bp);
1.29      cgd       824:        }
1.115     fvdl      825:
1.126     mycroft   826:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    827:                nbp = LIST_NEXT(bp, b_vnbufs);
                    828:                if (bp->b_lblkno < lbn)
                    829:                        continue;
1.187     pk        830:                simple_lock(&bp->b_interlock);
1.126     mycroft   831:                if (bp->b_flags & B_BUSY) {
                    832:                        bp->b_flags |= B_WANTED;
1.187     pk        833:                        error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
                    834:                            "vtruncbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   835:                        if (error) {
                    836:                                splx(s);
                    837:                                return (error);
                    838:                        }
                    839:                        goto restart;
                    840:                }
                    841:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        842:                simple_unlock(&bp->b_interlock);
1.126     mycroft   843:                brelse(bp);
                    844:        }
1.115     fvdl      845:
                    846:        splx(s);
                    847:
1.30      mycroft   848:        return (0);
                    849: }
                    850:
                    851: void
1.247     thorpej   852: vflushbuf(struct vnode *vp, int sync)
1.30      mycroft   853: {
1.123     augustss  854:        struct buf *bp, *nbp;
1.166     chs       855:        int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
1.30      mycroft   856:        int s;
                    857:
1.166     chs       858:        simple_lock(&vp->v_interlock);
                    859:        (void) VOP_PUTPAGES(vp, 0, 0, flags);
1.142     chs       860:
1.30      mycroft   861: loop:
                    862:        s = splbio();
1.126     mycroft   863:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    864:                nbp = LIST_NEXT(bp, b_vnbufs);
1.187     pk        865:                simple_lock(&bp->b_interlock);
                    866:                if ((bp->b_flags & B_BUSY)) {
                    867:                        simple_unlock(&bp->b_interlock);
1.30      mycroft   868:                        continue;
1.187     pk        869:                }
1.30      mycroft   870:                if ((bp->b_flags & B_DELWRI) == 0)
1.125     chs       871:                        panic("vflushbuf: not dirty, bp %p", bp);
1.63      mycroft   872:                bp->b_flags |= B_BUSY | B_VFLUSH;
1.187     pk        873:                simple_unlock(&bp->b_interlock);
1.30      mycroft   874:                splx(s);
                    875:                /*
                    876:                 * Wait for I/O associated with indirect blocks to complete,
                    877:                 * since there is no way to quickly wait for them below.
                    878:                 */
                    879:                if (bp->b_vp == vp || sync == 0)
                    880:                        (void) bawrite(bp);
                    881:                else
                    882:                        (void) bwrite(bp);
                    883:                goto loop;
                    884:        }
                    885:        if (sync == 0) {
                    886:                splx(s);
                    887:                return;
                    888:        }
1.187     pk        889:        simple_lock(&global_v_numoutput_slock);
1.30      mycroft   890:        while (vp->v_numoutput) {
                    891:                vp->v_flag |= VBWAIT;
1.187     pk        892:                ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
                    893:                        &global_v_numoutput_slock);
1.30      mycroft   894:        }
1.187     pk        895:        simple_unlock(&global_v_numoutput_slock);
1.30      mycroft   896:        splx(s);
1.126     mycroft   897:        if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1.30      mycroft   898:                vprint("vflushbuf: dirty", vp);
                    899:                goto loop;
                    900:        }
1.29      cgd       901: }
                    902:
                    903: /*
                    904:  * Associate a buffer with a vnode.
                    905:  */
1.50      christos  906: void
1.247     thorpej   907: bgetvp(struct vnode *vp, struct buf *bp)
1.29      cgd       908: {
1.115     fvdl      909:        int s;
1.29      cgd       910:
                    911:        if (bp->b_vp)
1.125     chs       912:                panic("bgetvp: not free, bp %p", bp);
1.29      cgd       913:        VHOLD(vp);
1.115     fvdl      914:        s = splbio();
1.29      cgd       915:        bp->b_vp = vp;
                    916:        if (vp->v_type == VBLK || vp->v_type == VCHR)
                    917:                bp->b_dev = vp->v_rdev;
                    918:        else
                    919:                bp->b_dev = NODEV;
                    920:        /*
                    921:         * Insert onto list for new vnode.
                    922:         */
                    923:        bufinsvn(bp, &vp->v_cleanblkhd);
1.115     fvdl      924:        splx(s);
1.29      cgd       925: }
                    926:
                    927: /*
                    928:  * Disassociate a buffer from a vnode.
                    929:  */
1.50      christos  930: void
1.247     thorpej   931: brelvp(struct buf *bp)
1.29      cgd       932: {
                    933:        struct vnode *vp;
1.115     fvdl      934:        int s;
1.29      cgd       935:
1.125     chs       936:        if (bp->b_vp == NULL)
                    937:                panic("brelvp: vp NULL, bp %p", bp);
1.115     fvdl      938:
                    939:        s = splbio();
1.113     fvdl      940:        vp = bp->b_vp;
1.29      cgd       941:        /*
                    942:         * Delete from old vnode list, if on one.
                    943:         */
1.177     matt      944:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29      cgd       945:                bufremvn(bp);
1.142     chs       946:
1.158     chs       947:        if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
1.142     chs       948:            LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1.252     yamt      949:                vp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST);
1.113     fvdl      950:                LIST_REMOVE(vp, v_synclist);
                    951:        }
1.142     chs       952:
                    953:        bp->b_vp = NULL;
1.29      cgd       954:        HOLDRELE(vp);
1.115     fvdl      955:        splx(s);
1.29      cgd       956: }
                    957:
                    958: /*
                    959:  * Reassign a buffer from one vnode to another.
                    960:  * Used to assign file specific control information
                    961:  * (indirect blocks) to the vnode to which they belong.
1.115     fvdl      962:  *
                    963:  * This function must be called at splbio().
1.29      cgd       964:  */
1.50      christos  965: void
1.247     thorpej   966: reassignbuf(struct buf *bp, struct vnode *newvp)
1.29      cgd       967: {
1.113     fvdl      968:        struct buflists *listheadp;
1.246     christos  969:        int delayx;
1.29      cgd       970:
                    971:        /*
                    972:         * Delete from old vnode list, if on one.
                    973:         */
1.177     matt      974:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29      cgd       975:                bufremvn(bp);
                    976:        /*
                    977:         * If dirty, put on list of dirty buffers;
                    978:         * otherwise insert onto list of clean buffers.
                    979:         */
1.113     fvdl      980:        if ((bp->b_flags & B_DELWRI) == 0) {
                    981:                listheadp = &newvp->v_cleanblkhd;
1.158     chs       982:                if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1.142     chs       983:                    (newvp->v_flag & VONWORKLST) &&
1.113     fvdl      984:                    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
1.252     yamt      985:                        newvp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST);
1.113     fvdl      986:                        LIST_REMOVE(newvp, v_synclist);
                    987:                }
                    988:        } else {
1.29      cgd       989:                listheadp = &newvp->v_dirtyblkhd;
1.113     fvdl      990:                if ((newvp->v_flag & VONWORKLST) == 0) {
                    991:                        switch (newvp->v_type) {
                    992:                        case VDIR:
1.246     christos  993:                                delayx = dirdelay;
1.113     fvdl      994:                                break;
                    995:                        case VBLK:
                    996:                                if (newvp->v_specmountpoint != NULL) {
1.246     christos  997:                                        delayx = metadelay;
1.113     fvdl      998:                                        break;
                    999:                                }
                   1000:                                /* fall through */
                   1001:                        default:
1.246     christos 1002:                                delayx = filedelay;
1.118     mycroft  1003:                                break;
1.113     fvdl     1004:                        }
1.118     mycroft  1005:                        if (!newvp->v_mount ||
                   1006:                            (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1.246     christos 1007:                                vn_syncer_add_to_worklist(newvp, delayx);
1.113     fvdl     1008:                }
                   1009:        }
1.29      cgd      1010:        bufinsvn(bp, listheadp);
                   1011: }
                   1012:
                   1013: /*
                   1014:  * Create a vnode for a block device.
1.59      thorpej  1015:  * Used for root filesystem and swap areas.
1.29      cgd      1016:  * Also used for memory file system special devices.
                   1017:  */
1.50      christos 1018: int
1.247     thorpej  1019: bdevvp(dev_t dev, struct vnode **vpp)
1.29      cgd      1020: {
1.30      mycroft  1021:
                   1022:        return (getdevvp(dev, vpp, VBLK));
1.29      cgd      1023: }
                   1024:
                   1025: /*
                   1026:  * Create a vnode for a character device.
                   1027:  * Used for kernfs and some console handling.
                   1028:  */
1.50      christos 1029: int
1.247     thorpej  1030: cdevvp(dev_t dev, struct vnode **vpp)
1.29      cgd      1031: {
1.30      mycroft  1032:
                   1033:        return (getdevvp(dev, vpp, VCHR));
1.29      cgd      1034: }
                   1035:
                   1036: /*
                   1037:  * Create a vnode for a device.
                   1038:  * Used by bdevvp (block device) for root file system etc.,
                   1039:  * and by cdevvp (character device) for console and kernfs.
                   1040:  */
1.260     yamt     1041: static int
1.247     thorpej  1042: getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
1.29      cgd      1043: {
1.123     augustss 1044:        struct vnode *vp;
1.29      cgd      1045:        struct vnode *nvp;
                   1046:        int error;
                   1047:
1.80      fvdl     1048:        if (dev == NODEV) {
                   1049:                *vpp = NULLVP;
1.29      cgd      1050:                return (0);
1.80      fvdl     1051:        }
1.50      christos 1052:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29      cgd      1053:        if (error) {
                   1054:                *vpp = NULLVP;
                   1055:                return (error);
                   1056:        }
                   1057:        vp = nvp;
                   1058:        vp->v_type = type;
1.50      christos 1059:        if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29      cgd      1060:                vput(vp);
                   1061:                vp = nvp;
                   1062:        }
                   1063:        *vpp = vp;
                   1064:        return (0);
                   1065: }
                   1066:
                   1067: /*
                   1068:  * Check to see if the new vnode represents a special device
                   1069:  * for which we already have a vnode (either because of
                   1070:  * bdevvp() or because of a different vnode representing
                   1071:  * the same block device). If such an alias exists, deallocate
                   1072:  * the existing contents and return the aliased vnode. The
                   1073:  * caller is responsible for filling it with its new contents.
                   1074:  */
                   1075: struct vnode *
1.247     thorpej  1076: checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
1.29      cgd      1077: {
1.256     christos 1078:        struct lwp *l = curlwp;         /* XXX */
1.123     augustss 1079:        struct vnode *vp;
1.29      cgd      1080:        struct vnode **vpp;
                   1081:
                   1082:        if (nvp->v_type != VBLK && nvp->v_type != VCHR)
                   1083:                return (NULLVP);
                   1084:
                   1085:        vpp = &speclisth[SPECHASH(nvp_rdev)];
                   1086: loop:
1.80      fvdl     1087:        simple_lock(&spechash_slock);
1.29      cgd      1088:        for (vp = *vpp; vp; vp = vp->v_specnext) {
                   1089:                if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
                   1090:                        continue;
                   1091:                /*
                   1092:                 * Alias, but not in use, so flush it out.
                   1093:                 */
1.80      fvdl     1094:                simple_lock(&vp->v_interlock);
1.231     mycroft  1095:                simple_unlock(&spechash_slock);
1.29      cgd      1096:                if (vp->v_usecount == 0) {
1.256     christos 1097:                        vgonel(vp, l);
1.29      cgd      1098:                        goto loop;
                   1099:                }
1.231     mycroft  1100:                /*
                   1101:                 * What we're interested to know here is if someone else has
                   1102:                 * removed this vnode from the device hash list while we were
                   1103:                 * waiting.  This can only happen if vclean() did it, and
1.259     yamt     1104:                 * this requires the vnode to be locked.
1.231     mycroft  1105:                 */
1.259     yamt     1106:                if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
1.29      cgd      1107:                        goto loop;
1.259     yamt     1108:                if (vp->v_specinfo == NULL) {
                   1109:                        vput(vp);
                   1110:                        goto loop;
                   1111:                }
1.231     mycroft  1112:                simple_lock(&spechash_slock);
1.29      cgd      1113:                break;
                   1114:        }
1.34      cgd      1115:        if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29      cgd      1116:                MALLOC(nvp->v_specinfo, struct specinfo *,
1.150     thorpej  1117:                        sizeof(struct specinfo), M_VNODE, M_NOWAIT);
                   1118:                /* XXX Erg. */
                   1119:                if (nvp->v_specinfo == NULL) {
                   1120:                        simple_unlock(&spechash_slock);
                   1121:                        uvm_wait("checkalias");
                   1122:                        goto loop;
                   1123:                }
                   1124:
1.29      cgd      1125:                nvp->v_rdev = nvp_rdev;
                   1126:                nvp->v_hashchain = vpp;
                   1127:                nvp->v_specnext = *vpp;
1.113     fvdl     1128:                nvp->v_specmountpoint = NULL;
1.80      fvdl     1129:                simple_unlock(&spechash_slock);
1.62      kleink   1130:                nvp->v_speclockf = NULL;
1.216     hannken  1131:                simple_lock_init(&nvp->v_spec_cow_slock);
                   1132:                SLIST_INIT(&nvp->v_spec_cow_head);
                   1133:                nvp->v_spec_cow_req = 0;
                   1134:                nvp->v_spec_cow_count = 0;
                   1135:
1.29      cgd      1136:                *vpp = nvp;
1.80      fvdl     1137:                if (vp != NULLVP) {
1.29      cgd      1138:                        nvp->v_flag |= VALIASED;
                   1139:                        vp->v_flag |= VALIASED;
                   1140:                        vput(vp);
                   1141:                }
                   1142:                return (NULLVP);
                   1143:        }
1.80      fvdl     1144:        simple_unlock(&spechash_slock);
                   1145:        VOP_UNLOCK(vp, 0);
                   1146:        simple_lock(&vp->v_interlock);
1.256     christos 1147:        vclean(vp, 0, l);
1.29      cgd      1148:        vp->v_op = nvp->v_op;
                   1149:        vp->v_tag = nvp->v_tag;
1.104     wrstuden 1150:        vp->v_vnlock = &vp->v_lock;
                   1151:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd      1152:        nvp->v_type = VNON;
                   1153:        insmntque(vp, mp);
                   1154:        return (vp);
                   1155: }
                   1156:
                   1157: /*
                   1158:  * Grab a particular vnode from the free list, increment its
1.83      fvdl     1159:  * reference count and lock it. If the vnode lock bit is set the
                   1160:  * vnode is being eliminated in vgone. In that case, we can not
                   1161:  * grab the vnode, so the process is awakened when the transition is
                   1162:  * completed, and an error returned to indicate that the vnode is no
                   1163:  * longer usable (possibly having been changed to a new file system type).
1.29      cgd      1164:  */
1.30      mycroft  1165: int
1.247     thorpej  1166: vget(struct vnode *vp, int flags)
1.29      cgd      1167: {
1.175     perseant 1168:        int error;
1.29      cgd      1169:
1.30      mycroft  1170:        /*
                   1171:         * If the vnode is in the process of being cleaned out for
                   1172:         * another use, we wait for the cleaning to finish and then
1.80      fvdl     1173:         * return failure. Cleaning is determined by checking that
                   1174:         * the VXLOCK flag is set.
                   1175:         */
1.142     chs      1176:
1.80      fvdl     1177:        if ((flags & LK_INTERLOCK) == 0)
                   1178:                simple_lock(&vp->v_interlock);
1.257     yamt     1179:        if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) {
1.142     chs      1180:                if (flags & LK_NOWAIT) {
1.143     sommerfe 1181:                        simple_unlock(&vp->v_interlock);
1.142     chs      1182:                        return EBUSY;
                   1183:                }
1.29      cgd      1184:                vp->v_flag |= VXWANT;
1.158     chs      1185:                ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1.80      fvdl     1186:                return (ENOENT);
1.29      cgd      1187:        }
1.80      fvdl     1188:        if (vp->v_usecount == 0) {
                   1189:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1190:                if (vp->v_holdcnt > 0)
                   1191:                        TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1192:                else
                   1193:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1194:                simple_unlock(&vnode_free_list_slock);
                   1195:        }
1.29      cgd      1196:        vp->v_usecount++;
1.112     mycroft  1197: #ifdef DIAGNOSTIC
                   1198:        if (vp->v_usecount == 0) {
                   1199:                vprint("vget", vp);
1.125     chs      1200:                panic("vget: usecount overflow, vp %p", vp);
1.112     mycroft  1201:        }
                   1202: #endif
1.80      fvdl     1203:        if (flags & LK_TYPE_MASK) {
1.113     fvdl     1204:                if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1.257     yamt     1205:                        vrele(vp);
1.113     fvdl     1206:                }
1.80      fvdl     1207:                return (error);
                   1208:        }
                   1209:        simple_unlock(&vp->v_interlock);
1.29      cgd      1210:        return (0);
                   1211: }
                   1212:
                   1213: /*
                   1214:  * vput(), just unlock and vrele()
                   1215:  */
                   1216: void
1.247     thorpej  1217: vput(struct vnode *vp)
1.29      cgd      1218: {
1.256     christos 1219:        struct lwp *l = curlwp;         /* XXX */
1.30      mycroft  1220:
1.111     mycroft  1221: #ifdef DIAGNOSTIC
1.80      fvdl     1222:        if (vp == NULL)
                   1223:                panic("vput: null vp");
                   1224: #endif
                   1225:        simple_lock(&vp->v_interlock);
                   1226:        vp->v_usecount--;
                   1227:        if (vp->v_usecount > 0) {
                   1228:                simple_unlock(&vp->v_interlock);
                   1229:                VOP_UNLOCK(vp, 0);
                   1230:                return;
                   1231:        }
                   1232: #ifdef DIAGNOSTIC
                   1233:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1234:                vprint("vput: bad ref count", vp);
                   1235:                panic("vput: ref cnt");
                   1236:        }
                   1237: #endif
                   1238:        /*
1.87      pk       1239:         * Insert at tail of LRU list.
1.80      fvdl     1240:         */
                   1241:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1242:        if (vp->v_holdcnt > 0)
                   1243:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1244:        else
                   1245:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1246:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej  1247:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1248:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1249:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1250:        }
1.252     yamt     1251:        vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP);
1.80      fvdl     1252:        simple_unlock(&vp->v_interlock);
1.256     christos 1253:        VOP_INACTIVE(vp, l);
1.29      cgd      1254: }
                   1255:
                   1256: /*
                   1257:  * Vnode release.
                   1258:  * If count drops to zero, call inactive routine and return to freelist.
                   1259:  */
                   1260: void
1.247     thorpej  1261: vrele(struct vnode *vp)
1.29      cgd      1262: {
1.256     christos 1263:        struct lwp *l = curlwp;         /* XXX */
1.29      cgd      1264:
                   1265: #ifdef DIAGNOSTIC
                   1266:        if (vp == NULL)
                   1267:                panic("vrele: null vp");
                   1268: #endif
1.80      fvdl     1269:        simple_lock(&vp->v_interlock);
1.29      cgd      1270:        vp->v_usecount--;
1.80      fvdl     1271:        if (vp->v_usecount > 0) {
                   1272:                simple_unlock(&vp->v_interlock);
1.29      cgd      1273:                return;
1.80      fvdl     1274:        }
1.29      cgd      1275: #ifdef DIAGNOSTIC
1.80      fvdl     1276:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29      cgd      1277:                vprint("vrele: bad ref count", vp);
1.142     chs      1278:                panic("vrele: ref cnt vp %p", vp);
1.29      cgd      1279:        }
                   1280: #endif
1.30      mycroft  1281:        /*
1.87      pk       1282:         * Insert at tail of LRU list.
1.30      mycroft  1283:         */
1.80      fvdl     1284:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1285:        if (vp->v_holdcnt > 0)
                   1286:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1287:        else
                   1288:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1289:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej  1290:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1291:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1292:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1293:        }
1.252     yamt     1294:        vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP);
1.80      fvdl     1295:        if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1.256     christos 1296:                VOP_INACTIVE(vp, l);
1.29      cgd      1297: }
                   1298:
                   1299: /*
                   1300:  * Page or buffer structure gets a reference.
1.258     chs      1301:  * Called with v_interlock held.
1.29      cgd      1302:  */
1.30      mycroft  1303: void
1.247     thorpej  1304: vholdl(struct vnode *vp)
1.29      cgd      1305: {
                   1306:
1.113     fvdl     1307:        /*
                   1308:         * If it is on the freelist and the hold count is currently
                   1309:         * zero, move it to the hold list. The test of the back
                   1310:         * pointer and the use reference count of zero is because
                   1311:         * it will be removed from a free list by getnewvnode,
                   1312:         * but will not have its reference count incremented until
                   1313:         * after calling vgone. If the reference count were
                   1314:         * incremented first, vgone would (incorrectly) try to
                   1315:         * close the previous instance of the underlying object.
                   1316:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1317:         * getnewvnode after removing it from a freelist to ensure
                   1318:         * that we do not try to move it here.
                   1319:         */
                   1320:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1321:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1322:                simple_lock(&vnode_free_list_slock);
                   1323:                TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1324:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1325:                simple_unlock(&vnode_free_list_slock);
                   1326:        }
1.29      cgd      1327:        vp->v_holdcnt++;
                   1328: }
                   1329:
                   1330: /*
                   1331:  * Page or buffer structure frees a reference.
1.258     chs      1332:  * Called with v_interlock held.
1.29      cgd      1333:  */
1.30      mycroft  1334: void
1.247     thorpej  1335: holdrelel(struct vnode *vp)
1.29      cgd      1336: {
                   1337:
                   1338:        if (vp->v_holdcnt <= 0)
1.215     yamt     1339:                panic("holdrelel: holdcnt vp %p", vp);
1.29      cgd      1340:        vp->v_holdcnt--;
1.142     chs      1341:
1.113     fvdl     1342:        /*
                   1343:         * If it is on the holdlist and the hold count drops to
                   1344:         * zero, move it to the free list. The test of the back
                   1345:         * pointer and the use reference count of zero is because
                   1346:         * it will be removed from a free list by getnewvnode,
                   1347:         * but will not have its reference count incremented until
                   1348:         * after calling vgone. If the reference count were
                   1349:         * incremented first, vgone would (incorrectly) try to
                   1350:         * close the previous instance of the underlying object.
                   1351:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1352:         * getnewvnode after removing it from a freelist to ensure
                   1353:         * that we do not try to move it here.
                   1354:         */
1.142     chs      1355:
1.113     fvdl     1356:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1357:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1358:                simple_lock(&vnode_free_list_slock);
                   1359:                TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1360:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1361:                simple_unlock(&vnode_free_list_slock);
                   1362:        }
1.81      ross     1363: }
                   1364:
                   1365: /*
                   1366:  * Vnode reference.
                   1367:  */
                   1368: void
1.247     thorpej  1369: vref(struct vnode *vp)
1.81      ross     1370: {
                   1371:
                   1372:        simple_lock(&vp->v_interlock);
                   1373:        if (vp->v_usecount <= 0)
1.125     chs      1374:                panic("vref used where vget required, vp %p", vp);
1.81      ross     1375:        vp->v_usecount++;
1.112     mycroft  1376: #ifdef DIAGNOSTIC
                   1377:        if (vp->v_usecount == 0) {
                   1378:                vprint("vref", vp);
1.125     chs      1379:                panic("vref: usecount overflow, vp %p", vp);
1.112     mycroft  1380:        }
                   1381: #endif
1.80      fvdl     1382:        simple_unlock(&vp->v_interlock);
1.29      cgd      1383: }
                   1384:
                   1385: /*
                   1386:  * Remove any vnodes in the vnode table belonging to mount point mp.
                   1387:  *
1.183     yamt     1388:  * If FORCECLOSE is not specified, there should not be any active ones,
1.29      cgd      1389:  * return error if any are found (nb: this is a user error, not a
1.183     yamt     1390:  * system error). If FORCECLOSE is specified, detach any active vnodes
1.29      cgd      1391:  * that are found.
1.183     yamt     1392:  *
                   1393:  * If WRITECLOSE is set, only flush out regular file vnodes open for
                   1394:  * writing.
                   1395:  *
                   1396:  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29      cgd      1397:  */
1.30      mycroft  1398: #ifdef DEBUG
                   1399: int busyprt = 0;       /* print out busy vnodes */
                   1400: struct ctldebug debug1 = { "busyprt", &busyprt };
                   1401: #endif
1.29      cgd      1402:
1.50      christos 1403: int
1.247     thorpej  1404: vflush(struct mount *mp, struct vnode *skipvp, int flags)
1.29      cgd      1405: {
1.256     christos 1406:        struct lwp *l = curlwp;         /* XXX */
1.123     augustss 1407:        struct vnode *vp, *nvp;
1.29      cgd      1408:        int busy = 0;
                   1409:
1.80      fvdl     1410:        simple_lock(&mntvnode_slock);
1.29      cgd      1411: loop:
1.177     matt     1412:        for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1.29      cgd      1413:                if (vp->v_mount != mp)
                   1414:                        goto loop;
1.177     matt     1415:                nvp = LIST_NEXT(vp, v_mntvnodes);
1.29      cgd      1416:                /*
                   1417:                 * Skip over a selected vnode.
                   1418:                 */
                   1419:                if (vp == skipvp)
                   1420:                        continue;
1.80      fvdl     1421:                simple_lock(&vp->v_interlock);
1.29      cgd      1422:                /*
                   1423:                 * Skip over a vnodes marked VSYSTEM.
                   1424:                 */
1.80      fvdl     1425:                if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
                   1426:                        simple_unlock(&vp->v_interlock);
1.29      cgd      1427:                        continue;
1.80      fvdl     1428:                }
1.29      cgd      1429:                /*
1.30      mycroft  1430:                 * If WRITECLOSE is set, only flush out regular file
                   1431:                 * vnodes open for writing.
                   1432:                 */
                   1433:                if ((flags & WRITECLOSE) &&
1.92      thorpej  1434:                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
                   1435:                        simple_unlock(&vp->v_interlock);
1.30      mycroft  1436:                        continue;
1.92      thorpej  1437:                }
1.30      mycroft  1438:                /*
1.29      cgd      1439:                 * With v_usecount == 0, all we need to do is clear
                   1440:                 * out the vnode data structures and we are done.
                   1441:                 */
                   1442:                if (vp->v_usecount == 0) {
1.80      fvdl     1443:                        simple_unlock(&mntvnode_slock);
1.256     christos 1444:                        vgonel(vp, l);
1.80      fvdl     1445:                        simple_lock(&mntvnode_slock);
1.29      cgd      1446:                        continue;
                   1447:                }
                   1448:                /*
1.30      mycroft  1449:                 * If FORCECLOSE is set, forcibly close the vnode.
1.29      cgd      1450:                 * For block or character devices, revert to an
                   1451:                 * anonymous device. For all other files, just kill them.
                   1452:                 */
                   1453:                if (flags & FORCECLOSE) {
1.80      fvdl     1454:                        simple_unlock(&mntvnode_slock);
1.29      cgd      1455:                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.256     christos 1456:                                vgonel(vp, l);
1.29      cgd      1457:                        } else {
1.256     christos 1458:                                vclean(vp, 0, l);
1.30      mycroft  1459:                                vp->v_op = spec_vnodeop_p;
1.29      cgd      1460:                                insmntque(vp, (struct mount *)0);
                   1461:                        }
1.80      fvdl     1462:                        simple_lock(&mntvnode_slock);
1.29      cgd      1463:                        continue;
                   1464:                }
1.30      mycroft  1465: #ifdef DEBUG
1.29      cgd      1466:                if (busyprt)
                   1467:                        vprint("vflush: busy vnode", vp);
1.30      mycroft  1468: #endif
1.80      fvdl     1469:                simple_unlock(&vp->v_interlock);
1.29      cgd      1470:                busy++;
                   1471:        }
1.80      fvdl     1472:        simple_unlock(&mntvnode_slock);
1.29      cgd      1473:        if (busy)
                   1474:                return (EBUSY);
                   1475:        return (0);
                   1476: }
                   1477:
                   1478: /*
                   1479:  * Disassociate the underlying file system from a vnode.
                   1480:  */
1.260     yamt     1481: static void
1.256     christos 1482: vclean(struct vnode *vp, int flags, struct lwp *l)
1.29      cgd      1483: {
1.208     hannken  1484:        struct mount *mp;
1.175     perseant 1485:        int active;
1.29      cgd      1486:
1.166     chs      1487:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1488:
1.29      cgd      1489:        /*
                   1490:         * Check to see if the vnode is in use.
                   1491:         * If so we have to reference it before we clean it out
                   1492:         * so that its count cannot fall to zero and generate a
                   1493:         * race against ourselves to recycle it.
                   1494:         */
1.166     chs      1495:
1.112     mycroft  1496:        if ((active = vp->v_usecount) != 0) {
1.87      pk       1497:                vp->v_usecount++;
1.112     mycroft  1498: #ifdef DIAGNOSTIC
                   1499:                if (vp->v_usecount == 0) {
                   1500:                        vprint("vclean", vp);
                   1501:                        panic("vclean: usecount overflow");
                   1502:                }
                   1503: #endif
                   1504:        }
1.87      pk       1505:
1.29      cgd      1506:        /*
                   1507:         * Prevent the vnode from being recycled or
                   1508:         * brought into use while we clean it out.
                   1509:         */
                   1510:        if (vp->v_flag & VXLOCK)
1.125     chs      1511:                panic("vclean: deadlock, vp %p", vp);
1.29      cgd      1512:        vp->v_flag |= VXLOCK;
1.161     thorpej  1513:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1514:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1515:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1516:        }
1.161     thorpej  1517:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.142     chs      1518:
1.29      cgd      1519:        /*
1.80      fvdl     1520:         * Even if the count is zero, the VOP_INACTIVE routine may still
                   1521:         * have the object locked while it cleans it out. The VOP_LOCK
                   1522:         * ensures that the VOP_INACTIVE routine is done with its work.
                   1523:         * For active vnodes, it ensures that no other activity can
                   1524:         * occur while the underlying object is being cleaned out.
                   1525:         */
                   1526:        VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
                   1527:
1.98      wrstuden 1528:        /*
1.142     chs      1529:         * Clean out any cached data associated with the vnode.
1.231     mycroft  1530:         * If special device, remove it from special device alias list.
                   1531:         * if it is on one.
1.29      cgd      1532:         */
1.166     chs      1533:        if (flags & DOCLOSE) {
1.211     dbj      1534:                int error;
1.231     mycroft  1535:                struct vnode *vq, *vx;
                   1536:
1.208     hannken  1537:                vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1.256     christos 1538:                error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.208     hannken  1539:                vn_finished_write(mp, V_LOWER);
1.211     dbj      1540:                if (error)
1.256     christos 1541:                        error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.211     dbj      1542:                KASSERT(error == 0);
1.166     chs      1543:                KASSERT((vp->v_flag & VONWORKLST) == 0);
1.231     mycroft  1544:
                   1545:                if (active)
                   1546:                        VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
                   1547:
                   1548:                if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
                   1549:                    vp->v_specinfo != 0) {
                   1550:                        simple_lock(&spechash_slock);
                   1551:                        if (vp->v_hashchain != NULL) {
                   1552:                                if (*vp->v_hashchain == vp) {
                   1553:                                        *vp->v_hashchain = vp->v_specnext;
                   1554:                                } else {
                   1555:                                        for (vq = *vp->v_hashchain; vq;
                   1556:                                             vq = vq->v_specnext) {
                   1557:                                                if (vq->v_specnext != vp)
                   1558:                                                        continue;
                   1559:                                                vq->v_specnext = vp->v_specnext;
                   1560:                                                break;
                   1561:                                        }
                   1562:                                        if (vq == NULL)
                   1563:                                                panic("missing bdev");
                   1564:                                }
                   1565:                                if (vp->v_flag & VALIASED) {
                   1566:                                        vx = NULL;
                   1567:                                                for (vq = *vp->v_hashchain; vq;
                   1568:                                                     vq = vq->v_specnext) {
                   1569:                                                if (vq->v_rdev != vp->v_rdev ||
                   1570:                                                    vq->v_type != vp->v_type)
                   1571:                                                        continue;
                   1572:                                                if (vx)
                   1573:                                                        break;
                   1574:                                                vx = vq;
                   1575:                                        }
                   1576:                                        if (vx == NULL)
                   1577:                                                panic("missing alias");
                   1578:                                        if (vq == NULL)
                   1579:                                                vx->v_flag &= ~VALIASED;
                   1580:                                        vp->v_flag &= ~VALIASED;
                   1581:                                }
                   1582:                        }
                   1583:                        simple_unlock(&spechash_slock);
                   1584:                        FREE(vp->v_specinfo, M_VNODE);
                   1585:                        vp->v_specinfo = NULL;
                   1586:                }
1.166     chs      1587:        }
                   1588:        LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1.80      fvdl     1589:
1.29      cgd      1590:        /*
1.30      mycroft  1591:         * If purging an active vnode, it must be closed and
1.80      fvdl     1592:         * deactivated before being reclaimed. Note that the
                   1593:         * VOP_INACTIVE will unlock the vnode.
1.29      cgd      1594:         */
                   1595:        if (active) {
1.256     christos 1596:                VOP_INACTIVE(vp, l);
1.80      fvdl     1597:        } else {
                   1598:                /*
                   1599:                 * Any other processes trying to obtain this lock must first
                   1600:                 * wait for VXLOCK to clear, then call the new lock operation.
                   1601:                 */
                   1602:                VOP_UNLOCK(vp, 0);
1.29      cgd      1603:        }
                   1604:        /*
                   1605:         * Reclaim the vnode.
                   1606:         */
1.256     christos 1607:        if (VOP_RECLAIM(vp, l))
1.125     chs      1608:                panic("vclean: cannot reclaim, vp %p", vp);
1.87      pk       1609:        if (active) {
                   1610:                /*
                   1611:                 * Inline copy of vrele() since VOP_INACTIVE
                   1612:                 * has already been called.
                   1613:                 */
                   1614:                simple_lock(&vp->v_interlock);
                   1615:                if (--vp->v_usecount <= 0) {
                   1616: #ifdef DIAGNOSTIC
                   1617:                        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1618:                                vprint("vclean: bad ref count", vp);
                   1619:                                panic("vclean: ref cnt");
                   1620:                        }
                   1621: #endif
                   1622:                        /*
                   1623:                         * Insert at tail of LRU list.
                   1624:                         */
1.142     chs      1625:
1.113     fvdl     1626:                        simple_unlock(&vp->v_interlock);
1.87      pk       1627:                        simple_lock(&vnode_free_list_slock);
1.104     wrstuden 1628: #ifdef DIAGNOSTIC
1.113     fvdl     1629:                        if (vp->v_holdcnt > 0)
1.125     chs      1630:                                panic("vclean: not clean, vp %p", vp);
1.104     wrstuden 1631: #endif
1.87      pk       1632:                        TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1633:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl     1634:                } else
                   1635:                        simple_unlock(&vp->v_interlock);
1.87      pk       1636:        }
1.30      mycroft  1637:
1.169     chs      1638:        KASSERT(vp->v_uobj.uo_npages == 0);
1.255     yamt     1639:        if (vp->v_type == VREG && vp->v_ractx != NULL) {
                   1640:                uvm_ra_freectx(vp->v_ractx);
                   1641:                vp->v_ractx = NULL;
                   1642:        }
1.80      fvdl     1643:        cache_purge(vp);
                   1644:
1.29      cgd      1645:        /*
1.30      mycroft  1646:         * Done with purge, notify sleepers of the grim news.
1.29      cgd      1647:         */
1.30      mycroft  1648:        vp->v_op = dead_vnodeop_p;
                   1649:        vp->v_tag = VT_NON;
1.139     enami    1650:        simple_lock(&vp->v_interlock);
1.181     jdolecek 1651:        VN_KNOTE(vp, NOTE_REVOKE);      /* FreeBSD has this in vn_pollgone() */
1.234     thorpej  1652:        vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1.29      cgd      1653:        if (vp->v_flag & VXWANT) {
                   1654:                vp->v_flag &= ~VXWANT;
1.139     enami    1655:                simple_unlock(&vp->v_interlock);
1.29      cgd      1656:                wakeup((caddr_t)vp);
1.139     enami    1657:        } else
                   1658:                simple_unlock(&vp->v_interlock);
1.29      cgd      1659: }
                   1660:
                   1661: /*
1.80      fvdl     1662:  * Recycle an unused vnode to the front of the free list.
                   1663:  * Release the passed interlock if the vnode will be recycled.
1.29      cgd      1664:  */
1.80      fvdl     1665: int
1.256     christos 1666: vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l)
1.217     junyoung 1667: {
                   1668:
1.80      fvdl     1669:        simple_lock(&vp->v_interlock);
                   1670:        if (vp->v_usecount == 0) {
                   1671:                if (inter_lkp)
                   1672:                        simple_unlock(inter_lkp);
1.256     christos 1673:                vgonel(vp, l);
1.80      fvdl     1674:                return (1);
1.29      cgd      1675:        }
1.80      fvdl     1676:        simple_unlock(&vp->v_interlock);
                   1677:        return (0);
1.29      cgd      1678: }
                   1679:
                   1680: /*
                   1681:  * Eliminate all activity associated with a vnode
                   1682:  * in preparation for reuse.
                   1683:  */
                   1684: void
1.247     thorpej  1685: vgone(struct vnode *vp)
1.80      fvdl     1686: {
1.256     christos 1687:        struct lwp *l = curlwp;         /* XXX */
1.80      fvdl     1688:
                   1689:        simple_lock(&vp->v_interlock);
1.256     christos 1690:        vgonel(vp, l);
1.80      fvdl     1691: }
                   1692:
                   1693: /*
                   1694:  * vgone, with the vp interlock held.
                   1695:  */
                   1696: void
1.256     christos 1697: vgonel(struct vnode *vp, struct lwp *l)
1.29      cgd      1698: {
                   1699:
1.166     chs      1700:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1701:
1.29      cgd      1702:        /*
                   1703:         * If a vgone (or vclean) is already in progress,
                   1704:         * wait until it is done and return.
                   1705:         */
1.166     chs      1706:
1.29      cgd      1707:        if (vp->v_flag & VXLOCK) {
                   1708:                vp->v_flag |= VXWANT;
1.166     chs      1709:                ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1.29      cgd      1710:                return;
                   1711:        }
1.166     chs      1712:
1.29      cgd      1713:        /*
                   1714:         * Clean out the filesystem specific data.
                   1715:         */
1.166     chs      1716:
1.256     christos 1717:        vclean(vp, DOCLOSE, l);
1.166     chs      1718:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                   1719:
1.29      cgd      1720:        /*
                   1721:         * Delete from old mount point vnode list, if on one.
                   1722:         */
1.166     chs      1723:
1.80      fvdl     1724:        if (vp->v_mount != NULL)
                   1725:                insmntque(vp, (struct mount *)0);
1.166     chs      1726:
1.29      cgd      1727:        /*
1.202     yamt     1728:         * The test of the back pointer and the reference count of
                   1729:         * zero is because it will be removed from the free list by
                   1730:         * getcleanvnode, but will not have its reference count
                   1731:         * incremented until after calling vgone. If the reference
                   1732:         * count were incremented first, vgone would (incorrectly)
                   1733:         * try to close the previous instance of the underlying object.
1.30      mycroft  1734:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1735:         * getnewvnode after removing it from the freelist to ensure
                   1736:         * that we do not try to move it here.
1.29      cgd      1737:         */
1.166     chs      1738:
1.202     yamt     1739:        vp->v_type = VBAD;
1.80      fvdl     1740:        if (vp->v_usecount == 0) {
1.202     yamt     1741:                boolean_t dofree;
                   1742:
1.80      fvdl     1743:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1744:                if (vp->v_holdcnt > 0)
1.125     chs      1745:                        panic("vgonel: not clean, vp %p", vp);
1.202     yamt     1746:                /*
                   1747:                 * if it isn't on the freelist, we're called by getcleanvnode
                   1748:                 * and vnode is being re-used.  otherwise, we'll free it.
                   1749:                 */
                   1750:                dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
                   1751:                if (dofree) {
1.80      fvdl     1752:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.202     yamt     1753:                        numvnodes--;
1.80      fvdl     1754:                }
                   1755:                simple_unlock(&vnode_free_list_slock);
1.202     yamt     1756:                if (dofree)
                   1757:                        pool_put(&vnode_pool, vp);
1.29      cgd      1758:        }
                   1759: }
                   1760:
                   1761: /*
                   1762:  * Lookup a vnode by device number.
                   1763:  */
1.50      christos 1764: int
1.247     thorpej  1765: vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1.29      cgd      1766: {
1.80      fvdl     1767:        struct vnode *vp;
                   1768:        int rc = 0;
1.29      cgd      1769:
1.80      fvdl     1770:        simple_lock(&spechash_slock);
1.29      cgd      1771:        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                   1772:                if (dev != vp->v_rdev || type != vp->v_type)
                   1773:                        continue;
                   1774:                *vpp = vp;
1.80      fvdl     1775:                rc = 1;
                   1776:                break;
1.29      cgd      1777:        }
1.80      fvdl     1778:        simple_unlock(&spechash_slock);
                   1779:        return (rc);
1.96      thorpej  1780: }
                   1781:
                   1782: /*
                   1783:  * Revoke all the vnodes corresponding to the specified minor number
                   1784:  * range (endpoints inclusive) of the specified major.
                   1785:  */
                   1786: void
1.247     thorpej  1787: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96      thorpej  1788: {
                   1789:        struct vnode *vp;
                   1790:        int mn;
                   1791:
                   1792:        for (mn = minl; mn <= minh; mn++)
                   1793:                if (vfinddev(makedev(maj, mn), type, &vp))
                   1794:                        VOP_REVOKE(vp, REVOKEALL);
1.29      cgd      1795: }
                   1796:
                   1797: /*
                   1798:  * Calculate the total number of references to a special device.
                   1799:  */
1.30      mycroft  1800: int
1.247     thorpej  1801: vcount(struct vnode *vp)
1.29      cgd      1802: {
1.123     augustss 1803:        struct vnode *vq, *vnext;
1.29      cgd      1804:        int count;
                   1805:
                   1806: loop:
                   1807:        if ((vp->v_flag & VALIASED) == 0)
                   1808:                return (vp->v_usecount);
1.80      fvdl     1809:        simple_lock(&spechash_slock);
1.30      mycroft  1810:        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
                   1811:                vnext = vq->v_specnext;
1.29      cgd      1812:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   1813:                        continue;
                   1814:                /*
                   1815:                 * Alias, but not in use, so flush it out.
                   1816:                 */
1.151     wrstuden 1817:                if (vq->v_usecount == 0 && vq != vp &&
                   1818:                    (vq->v_flag & VXLOCK) == 0) {
1.80      fvdl     1819:                        simple_unlock(&spechash_slock);
1.29      cgd      1820:                        vgone(vq);
                   1821:                        goto loop;
                   1822:                }
                   1823:                count += vq->v_usecount;
                   1824:        }
1.80      fvdl     1825:        simple_unlock(&spechash_slock);
1.29      cgd      1826:        return (count);
                   1827: }
                   1828:
1.237     christos 1829: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
                   1830: #define ARRAY_PRINT(idx, arr) \
                   1831:     ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
                   1832:
                   1833: const char * const vnode_tags[] = { VNODE_TAGS };
                   1834: const char * const vnode_types[] = { VNODE_TYPES };
                   1835: const char vnode_flagbits[] = VNODE_FLAGBITS;
                   1836:
1.29      cgd      1837: /*
                   1838:  * Print out a description of a vnode.
                   1839:  */
                   1840: void
1.247     thorpej  1841: vprint(const char *label, struct vnode *vp)
1.29      cgd      1842: {
1.245     christos 1843:        char bf[96];
1.29      cgd      1844:
                   1845:        if (label != NULL)
1.57      christos 1846:                printf("%s: ", label);
1.237     christos 1847:        printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, "
                   1848:            "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
                   1849:            ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1.190     jdolecek 1850:            vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1.245     christos 1851:        bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
                   1852:        if (bf[0] != '\0')
                   1853:                printf(" flags (%s)", &bf[1]);
1.30      mycroft  1854:        if (vp->v_data == NULL) {
1.57      christos 1855:                printf("\n");
1.30      mycroft  1856:        } else {
1.57      christos 1857:                printf("\n\t");
1.30      mycroft  1858:                VOP_PRINT(vp);
                   1859:        }
1.29      cgd      1860: }
                   1861:
                   1862: #ifdef DEBUG
                   1863: /*
                   1864:  * List all of the locked vnodes in the system.
                   1865:  * Called when debugging the kernel.
                   1866:  */
1.51      christos 1867: void
1.247     thorpej  1868: printlockedvnodes(void)
1.29      cgd      1869: {
1.80      fvdl     1870:        struct mount *mp, *nmp;
                   1871:        struct vnode *vp;
1.29      cgd      1872:
1.57      christos 1873:        printf("Locked vnodes\n");
1.80      fvdl     1874:        simple_lock(&mountlist_slock);
1.177     matt     1875:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
                   1876:             mp = nmp) {
1.80      fvdl     1877:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177     matt     1878:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80      fvdl     1879:                        continue;
                   1880:                }
1.158     chs      1881:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1.29      cgd      1882:                        if (VOP_ISLOCKED(vp))
1.158     chs      1883:                                vprint(NULL, vp);
1.80      fvdl     1884:                }
                   1885:                simple_lock(&mountlist_slock);
1.177     matt     1886:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80      fvdl     1887:                vfs_unbusy(mp);
1.29      cgd      1888:        }
1.80      fvdl     1889:        simple_unlock(&mountlist_slock);
1.29      cgd      1890: }
                   1891: #endif
                   1892:
1.101     mrg      1893: /*
1.220     lukem    1894:  * sysctl helper routine to return list of supported fstypes
                   1895:  */
                   1896: static int
                   1897: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
                   1898: {
1.245     christos 1899:        char bf[MFSNAMELEN];
1.220     lukem    1900:        char *where = oldp;
                   1901:        struct vfsops *v;
                   1902:        size_t needed, left, slen;
                   1903:        int error, first;
                   1904:
                   1905:        if (newp != NULL)
                   1906:                return (EPERM);
                   1907:        if (namelen != 0)
                   1908:                return (EINVAL);
                   1909:
                   1910:        first = 1;
                   1911:        error = 0;
                   1912:        needed = 0;
                   1913:        left = *oldlenp;
                   1914:
                   1915:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   1916:                if (where == NULL)
                   1917:                        needed += strlen(v->vfs_name) + 1;
                   1918:                else {
1.245     christos 1919:                        memset(bf, 0, sizeof(bf));
1.220     lukem    1920:                        if (first) {
1.245     christos 1921:                                strncpy(bf, v->vfs_name, sizeof(bf));
1.220     lukem    1922:                                first = 0;
                   1923:                        } else {
1.245     christos 1924:                                bf[0] = ' ';
                   1925:                                strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220     lukem    1926:                        }
1.245     christos 1927:                        bf[sizeof(bf)-1] = '\0';
                   1928:                        slen = strlen(bf);
1.220     lukem    1929:                        if (left < slen + 1)
                   1930:                                break;
                   1931:                        /* +1 to copy out the trailing NUL byte */
1.245     christos 1932:                        error = copyout(bf, where, slen + 1);
1.220     lukem    1933:                        if (error)
                   1934:                                break;
                   1935:                        where += slen;
                   1936:                        needed += slen;
                   1937:                        left -= slen;
                   1938:                }
                   1939:        }
                   1940:        *oldlenp = needed;
                   1941:        return (error);
                   1942: }
                   1943:
                   1944: /*
1.80      fvdl     1945:  * Top level filesystem related information gathering.
                   1946:  */
1.212     atatat   1947: SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1.80      fvdl     1948: {
1.218     atatat   1949:        sysctl_createv(clog, 0, NULL, NULL,
                   1950:                       CTLFLAG_PERMANENT,
1.212     atatat   1951:                       CTLTYPE_NODE, "vfs", NULL,
                   1952:                       NULL, 0, NULL, 0,
                   1953:                       CTL_VFS, CTL_EOL);
1.218     atatat   1954:        sysctl_createv(clog, 0, NULL, NULL,
                   1955:                       CTLFLAG_PERMANENT,
1.226     atatat   1956:                       CTLTYPE_NODE, "generic",
                   1957:                       SYSCTL_DESCR("Non-specific vfs related information"),
1.212     atatat   1958:                       NULL, 0, NULL, 0,
                   1959:                       CTL_VFS, VFS_GENERIC, CTL_EOL);
1.218     atatat   1960:        sysctl_createv(clog, 0, NULL, NULL,
                   1961:                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.226     atatat   1962:                       CTLTYPE_INT, "usermount",
                   1963:                       SYSCTL_DESCR("Whether unprivileged users may mount "
                   1964:                                    "filesystems"),
1.212     atatat   1965:                       NULL, 0, &dovfsusermount, 0,
                   1966:                       CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1.220     lukem    1967:        sysctl_createv(clog, 0, NULL, NULL,
                   1968:                       CTLFLAG_PERMANENT,
                   1969:                       CTLTYPE_STRING, "fstypes",
                   1970:                       SYSCTL_DESCR("List of file systems present"),
                   1971:                       sysctl_vfs_generic_fstypes, 0, NULL, 0,
                   1972:                       CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1.80      fvdl     1973: }
                   1974:
1.212     atatat   1975:
1.29      cgd      1976: int kinfo_vdebug = 1;
                   1977: int kinfo_vgetfailed;
                   1978: #define KINFO_VNODESLOP        10
                   1979: /*
                   1980:  * Dump vnode list (via sysctl).
                   1981:  * Copyout address of vnode followed by vnode.
                   1982:  */
                   1983: /* ARGSUSED */
1.50      christos 1984: int
1.212     atatat   1985: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29      cgd      1986: {
1.212     atatat   1987:        char *where = oldp;
                   1988:        size_t *sizep = oldlenp;
1.80      fvdl     1989:        struct mount *mp, *nmp;
                   1990:        struct vnode *nvp, *vp;
                   1991:        char *bp = where, *savebp;
1.29      cgd      1992:        char *ewhere;
                   1993:        int error;
1.212     atatat   1994:
                   1995:        if (namelen != 0)
                   1996:                return (EOPNOTSUPP);
                   1997:        if (newp != NULL)
                   1998:                return (EPERM);
1.29      cgd      1999:
1.90      perry    2000: #define VPTRSZ sizeof(struct vnode *)
                   2001: #define VNODESZ        sizeof(struct vnode)
1.29      cgd      2002:        if (where == NULL) {
                   2003:                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                   2004:                return (0);
                   2005:        }
                   2006:        ewhere = where + *sizep;
1.80      fvdl     2007:
                   2008:        simple_lock(&mountlist_slock);
1.177     matt     2009:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
                   2010:             mp = nmp) {
1.80      fvdl     2011:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177     matt     2012:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      2013:                        continue;
1.80      fvdl     2014:                }
1.29      cgd      2015:                savebp = bp;
                   2016: again:
1.80      fvdl     2017:                simple_lock(&mntvnode_slock);
1.177     matt     2018:                for (vp = LIST_FIRST(&mp->mnt_vnodelist);
1.29      cgd      2019:                     vp != NULL;
1.80      fvdl     2020:                     vp = nvp) {
1.29      cgd      2021:                        /*
                   2022:                         * Check that the vp is still associated with
                   2023:                         * this filesystem.  RACE: could have been
                   2024:                         * recycled onto the same filesystem.
                   2025:                         */
                   2026:                        if (vp->v_mount != mp) {
1.80      fvdl     2027:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2028:                                if (kinfo_vdebug)
1.57      christos 2029:                                        printf("kinfo: vp changed\n");
1.29      cgd      2030:                                bp = savebp;
                   2031:                                goto again;
                   2032:                        }
1.177     matt     2033:                        nvp = LIST_NEXT(vp, v_mntvnodes);
1.29      cgd      2034:                        if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80      fvdl     2035:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2036:                                *sizep = bp - where;
                   2037:                                return (ENOMEM);
                   2038:                        }
1.80      fvdl     2039:                        simple_unlock(&mntvnode_slock);
1.29      cgd      2040:                        if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
                   2041:                           (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
                   2042:                                return (error);
                   2043:                        bp += VPTRSZ + VNODESZ;
1.80      fvdl     2044:                        simple_lock(&mntvnode_slock);
1.29      cgd      2045:                }
1.80      fvdl     2046:                simple_unlock(&mntvnode_slock);
                   2047:                simple_lock(&mountlist_slock);
1.177     matt     2048:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      2049:                vfs_unbusy(mp);
                   2050:        }
1.80      fvdl     2051:        simple_unlock(&mountlist_slock);
1.29      cgd      2052:
                   2053:        *sizep = bp - where;
                   2054:        return (0);
1.30      mycroft  2055: }
                   2056:
                   2057: /*
                   2058:  * Check to see if a filesystem is mounted on a block device.
                   2059:  */
                   2060: int
1.247     thorpej  2061: vfs_mountedon(struct vnode *vp)
1.30      mycroft  2062: {
1.80      fvdl     2063:        struct vnode *vq;
                   2064:        int error = 0;
1.30      mycroft  2065:
1.261   ! reinoud  2066:        if (vp->v_type != VBLK)
        !          2067:                return ENOTBLK;
1.113     fvdl     2068:        if (vp->v_specmountpoint != NULL)
1.30      mycroft  2069:                return (EBUSY);
                   2070:        if (vp->v_flag & VALIASED) {
1.80      fvdl     2071:                simple_lock(&spechash_slock);
1.30      mycroft  2072:                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   2073:                        if (vq->v_rdev != vp->v_rdev ||
                   2074:                            vq->v_type != vp->v_type)
                   2075:                                continue;
1.113     fvdl     2076:                        if (vq->v_specmountpoint != NULL) {
1.80      fvdl     2077:                                error = EBUSY;
                   2078:                                break;
                   2079:                        }
1.30      mycroft  2080:                }
1.80      fvdl     2081:                simple_unlock(&spechash_slock);
1.30      mycroft  2082:        }
1.80      fvdl     2083:        return (error);
1.30      mycroft  2084: }
                   2085:
1.35      ws       2086: /*
                   2087:  * Do the usual access checking.
                   2088:  * file_mode, uid and gid are from the vnode in question,
                   2089:  * while acc_mode and cred are from the VOP_ACCESS parameter list
                   2090:  */
1.41      mycroft  2091: int
1.247     thorpej  2092: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
                   2093:     mode_t acc_mode, struct ucred *cred)
1.35      ws       2094: {
                   2095:        mode_t mask;
1.217     junyoung 2096:
1.64      mycroft  2097:        /*
                   2098:         * Super-user always gets read/write access, but execute access depends
                   2099:         * on at least one execute bit being set.
                   2100:         */
                   2101:        if (cred->cr_uid == 0) {
1.69      mycroft  2102:                if ((acc_mode & VEXEC) && type != VDIR &&
1.68      mycroft  2103:                    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1.64      mycroft  2104:                        return (EACCES);
1.68      mycroft  2105:                return (0);
1.64      mycroft  2106:        }
1.217     junyoung 2107:
1.35      ws       2108:        mask = 0;
1.217     junyoung 2109:
1.35      ws       2110:        /* Otherwise, check the owner. */
                   2111:        if (cred->cr_uid == uid) {
1.68      mycroft  2112:                if (acc_mode & VEXEC)
1.35      ws       2113:                        mask |= S_IXUSR;
                   2114:                if (acc_mode & VREAD)
                   2115:                        mask |= S_IRUSR;
                   2116:                if (acc_mode & VWRITE)
                   2117:                        mask |= S_IWUSR;
1.64      mycroft  2118:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2119:        }
1.217     junyoung 2120:
1.35      ws       2121:        /* Otherwise, check the groups. */
1.44      jtc      2122:        if (cred->cr_gid == gid || groupmember(gid, cred)) {
1.68      mycroft  2123:                if (acc_mode & VEXEC)
1.35      ws       2124:                        mask |= S_IXGRP;
                   2125:                if (acc_mode & VREAD)
                   2126:                        mask |= S_IRGRP;
                   2127:                if (acc_mode & VWRITE)
                   2128:                        mask |= S_IWGRP;
1.64      mycroft  2129:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2130:        }
1.217     junyoung 2131:
1.35      ws       2132:        /* Otherwise, check everyone else. */
1.68      mycroft  2133:        if (acc_mode & VEXEC)
1.35      ws       2134:                mask |= S_IXOTH;
                   2135:        if (acc_mode & VREAD)
                   2136:                mask |= S_IROTH;
                   2137:        if (acc_mode & VWRITE)
                   2138:                mask |= S_IWOTH;
1.64      mycroft  2139:        return ((file_mode & mask) == mask ? 0 : EACCES);
1.39      mycroft  2140: }
                   2141:
                   2142: /*
                   2143:  * Unmount all file systems.
                   2144:  * We traverse the list in reverse order under the assumption that doing so
                   2145:  * will avoid needing to worry about dependencies.
                   2146:  */
                   2147: void
1.256     christos 2148: vfs_unmountall(struct lwp *l)
1.39      mycroft  2149: {
1.123     augustss 2150:        struct mount *mp, *nmp;
1.40      mycroft  2151:        int allerror, error;
1.39      mycroft  2152:
1.235     lukem    2153:        printf("unmounting file systems...");
1.39      mycroft  2154:        for (allerror = 0,
                   2155:             mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
                   2156:                nmp = mp->mnt_list.cqe_prev;
1.54      jtk      2157: #ifdef DEBUG
1.235     lukem    2158:                printf("\nunmounting %s (%s)...",
1.56      christos 2159:                    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54      jtk      2160: #endif
1.149     thorpej  2161:                /*
                   2162:                 * XXX Freeze syncer.  Must do this before locking the
                   2163:                 * mount point.  See dounmount() for details.
                   2164:                 */
                   2165:                lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
                   2166:                if (vfs_busy(mp, 0, 0)) {
                   2167:                        lockmgr(&syncer_lock, LK_RELEASE, NULL);
1.60      fvdl     2168:                        continue;
1.149     thorpej  2169:                }
1.256     christos 2170:                if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
1.57      christos 2171:                        printf("unmount of %s failed with error %d\n",
1.40      mycroft  2172:                            mp->mnt_stat.f_mntonname, error);
                   2173:                        allerror = 1;
                   2174:                }
1.39      mycroft  2175:        }
1.235     lukem    2176:        printf(" done\n");
1.39      mycroft  2177:        if (allerror)
1.57      christos 2178:                printf("WARNING: some file systems would not unmount\n");
1.40      mycroft  2179: }
                   2180:
1.205     yamt     2181: extern struct simplelock bqueue_slock; /* XXX */
                   2182:
1.40      mycroft  2183: /*
                   2184:  * Sync and unmount file systems before shutting down.
                   2185:  */
                   2186: void
1.247     thorpej  2187: vfs_shutdown(void)
1.40      mycroft  2188: {
1.184     thorpej  2189:        struct lwp *l = curlwp;
1.185     christos 2190:        struct proc *p;
1.40      mycroft  2191:
1.128     sommerfe 2192:        /* XXX we're certainly not running in proc0's context! */
1.185     christos 2193:        if (l == NULL || (p = l->l_proc) == NULL)
1.128     sommerfe 2194:                p = &proc0;
1.185     christos 2195:
1.70      cgd      2196:        printf("syncing disks... ");
                   2197:
1.138     bouyer   2198:        /* remove user process from run queue */
                   2199:        suspendsched();
1.40      mycroft  2200:        (void) spl0();
                   2201:
1.128     sommerfe 2202:        /* avoid coming back this way again if we panic. */
                   2203:        doing_shutdown = 1;
                   2204:
1.184     thorpej  2205:        sys_sync(l, NULL, NULL);
1.40      mycroft  2206:
                   2207:        /* Wait for sync to finish. */
1.213     pk       2208:        if (buf_syncwait() != 0) {
1.124     augustss 2209: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   2210:                Debugger();
                   2211: #endif
1.57      christos 2212:                printf("giving up\n");
1.84      thorpej  2213:                return;
1.73      thorpej  2214:        } else
1.57      christos 2215:                printf("done\n");
1.73      thorpej  2216:
1.84      thorpej  2217:        /*
                   2218:         * If we've panic'd, don't make the situation potentially
                   2219:         * worse by unmounting the file systems.
                   2220:         */
                   2221:        if (panicstr != NULL)
                   2222:                return;
                   2223:
                   2224:        /* Release inodes held by texts before update. */
1.73      thorpej  2225: #ifdef notdef
1.84      thorpej  2226:        vnshutdown();
1.73      thorpej  2227: #endif
1.84      thorpej  2228:        /* Unmount file systems. */
1.256     christos 2229:        vfs_unmountall(l);
1.58      thorpej  2230: }
                   2231:
                   2232: /*
                   2233:  * Mount the root file system.  If the operator didn't specify a
                   2234:  * file system to use, try all possible file systems until one
                   2235:  * succeeds.
                   2236:  */
                   2237: int
1.247     thorpej  2238: vfs_mountroot(void)
1.58      thorpej  2239: {
1.79      thorpej  2240:        struct vfsops *v;
1.239     mycroft  2241:        int error = ENODEV;
1.58      thorpej  2242:
                   2243:        if (root_device == NULL)
                   2244:                panic("vfs_mountroot: root device unknown");
                   2245:
                   2246:        switch (root_device->dv_class) {
                   2247:        case DV_IFNET:
                   2248:                if (rootdev != NODEV)
1.173     thorpej  2249:                        panic("vfs_mountroot: rootdev set for DV_IFNET "
                   2250:                            "(0x%08x -> %d,%d)", rootdev,
                   2251:                            major(rootdev), minor(rootdev));
1.58      thorpej  2252:                break;
                   2253:
                   2254:        case DV_DISK:
                   2255:                if (rootdev == NODEV)
                   2256:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239     mycroft  2257:                if (bdevvp(rootdev, &rootvp))
                   2258:                        panic("vfs_mountroot: can't get vnode for rootdev");
1.256     christos 2259:                error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp);
1.239     mycroft  2260:                if (error) {
                   2261:                        printf("vfs_mountroot: can't open root device\n");
                   2262:                        return (error);
                   2263:                }
1.58      thorpej  2264:                break;
                   2265:
                   2266:        default:
                   2267:                printf("%s: inappropriate for root file system\n",
                   2268:                    root_device->dv_xname);
                   2269:                return (ENODEV);
                   2270:        }
                   2271:
                   2272:        /*
                   2273:         * If user specified a file system, use it.
                   2274:         */
1.239     mycroft  2275:        if (mountroot != NULL) {
                   2276:                error = (*mountroot)();
                   2277:                goto done;
                   2278:        }
1.58      thorpej  2279:
                   2280:        /*
                   2281:         * Try each file system currently configured into the kernel.
                   2282:         */
1.220     lukem    2283:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2284:                if (v->vfs_mountroot == NULL)
1.58      thorpej  2285:                        continue;
                   2286: #ifdef DEBUG
1.197     thorpej  2287:                aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58      thorpej  2288: #endif
1.239     mycroft  2289:                error = (*v->vfs_mountroot)();
                   2290:                if (!error) {
1.197     thorpej  2291:                        aprint_normal("root file system type: %s\n",
                   2292:                            v->vfs_name);
1.79      thorpej  2293:                        break;
1.58      thorpej  2294:                }
                   2295:        }
                   2296:
1.79      thorpej  2297:        if (v == NULL) {
                   2298:                printf("no file system for %s", root_device->dv_xname);
                   2299:                if (root_device->dv_class == DV_DISK)
                   2300:                        printf(" (dev 0x%x)", rootdev);
                   2301:                printf("\n");
1.239     mycroft  2302:                error = EFTYPE;
1.79      thorpej  2303:        }
1.239     mycroft  2304:
                   2305: done:
                   2306:        if (error && root_device->dv_class == DV_DISK) {
1.256     christos 2307:                VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp);
1.239     mycroft  2308:                vrele(rootvp);
                   2309:        }
                   2310:        return (error);
1.58      thorpej  2311: }
                   2312:
                   2313: /*
                   2314:  * Given a file system name, look up the vfsops for that
                   2315:  * file system, or return NULL if file system isn't present
                   2316:  * in the kernel.
                   2317:  */
                   2318: struct vfsops *
1.247     thorpej  2319: vfs_getopsbyname(const char *name)
1.58      thorpej  2320: {
1.79      thorpej  2321:        struct vfsops *v;
                   2322:
1.220     lukem    2323:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2324:                if (strcmp(v->vfs_name, name) == 0)
                   2325:                        break;
                   2326:        }
                   2327:
                   2328:        return (v);
                   2329: }
                   2330:
                   2331: /*
                   2332:  * Establish a file system and initialize it.
                   2333:  */
                   2334: int
1.247     thorpej  2335: vfs_attach(struct vfsops *vfs)
1.79      thorpej  2336: {
                   2337:        struct vfsops *v;
                   2338:        int error = 0;
                   2339:
1.58      thorpej  2340:
1.79      thorpej  2341:        /*
                   2342:         * Make sure this file system doesn't already exist.
                   2343:         */
1.157     chs      2344:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2345:                if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
                   2346:                        error = EEXIST;
                   2347:                        goto out;
                   2348:                }
                   2349:        }
                   2350:
                   2351:        /*
                   2352:         * Initialize the vnode operations for this file system.
                   2353:         */
                   2354:        vfs_opv_init(vfs->vfs_opv_descs);
                   2355:
                   2356:        /*
                   2357:         * Now initialize the file system itself.
                   2358:         */
                   2359:        (*vfs->vfs_init)();
                   2360:
                   2361:        /*
                   2362:         * ...and link it into the kernel's list.
                   2363:         */
                   2364:        LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
                   2365:
                   2366:        /*
                   2367:         * Sanity: make sure the reference count is 0.
                   2368:         */
                   2369:        vfs->vfs_refcount = 0;
                   2370:
                   2371:  out:
                   2372:        return (error);
                   2373: }
                   2374:
                   2375: /*
                   2376:  * Remove a file system from the kernel.
                   2377:  */
                   2378: int
1.247     thorpej  2379: vfs_detach(struct vfsops *vfs)
1.79      thorpej  2380: {
                   2381:        struct vfsops *v;
                   2382:
                   2383:        /*
                   2384:         * Make sure no one is using the filesystem.
                   2385:         */
                   2386:        if (vfs->vfs_refcount != 0)
                   2387:                return (EBUSY);
                   2388:
                   2389:        /*
                   2390:         * ...and remove it from the kernel's list.
                   2391:         */
1.157     chs      2392:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2393:                if (v == vfs) {
                   2394:                        LIST_REMOVE(v, vfs_list);
                   2395:                        break;
                   2396:                }
                   2397:        }
                   2398:
                   2399:        if (v == NULL)
                   2400:                return (ESRCH);
1.121     jdolecek 2401:
                   2402:        /*
                   2403:         * Now run the file system-specific cleanups.
                   2404:         */
                   2405:        (*vfs->vfs_done)();
1.79      thorpej  2406:
                   2407:        /*
                   2408:         * Free the vnode operations vector.
                   2409:         */
                   2410:        vfs_opv_free(vfs->vfs_opv_descs);
                   2411:        return (0);
1.157     chs      2412: }
                   2413:
                   2414: void
                   2415: vfs_reinit(void)
                   2416: {
                   2417:        struct vfsops *vfs;
                   2418:
                   2419:        LIST_FOREACH(vfs, &vfs_list, vfs_list) {
                   2420:                if (vfs->vfs_reinit) {
                   2421:                        (*vfs->vfs_reinit)();
                   2422:                }
                   2423:        }
1.192     christos 2424: }
                   2425:
1.214     hannken  2426: /*
                   2427:  * Request a filesystem to suspend write operations.
                   2428:  */
                   2429: int
                   2430: vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
                   2431: {
1.256     christos 2432:        struct lwp *l = curlwp; /* XXX */
1.214     hannken  2433:        int error;
                   2434:
                   2435:        while ((mp->mnt_iflag & IMNT_SUSPEND)) {
                   2436:                if (slptimeo < 0)
                   2437:                        return EWOULDBLOCK;
                   2438:                error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
                   2439:                if (error)
                   2440:                        return error;
                   2441:        }
                   2442:        mp->mnt_iflag |= IMNT_SUSPEND;
                   2443:
1.224     pk       2444:        simple_lock(&mp->mnt_slock);
1.214     hannken  2445:        if (mp->mnt_writeopcountupper > 0)
1.224     pk       2446:                ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
                   2447:                        0, &mp->mnt_slock);
                   2448:        simple_unlock(&mp->mnt_slock);
1.214     hannken  2449:
1.256     christos 2450:        error = VFS_SYNC(mp, MNT_WAIT, l->l_proc->p_ucred, l);
1.214     hannken  2451:        if (error) {
                   2452:                vfs_write_resume(mp);
                   2453:                return error;
                   2454:        }
                   2455:        mp->mnt_iflag |= IMNT_SUSPENDLOW;
                   2456:
1.224     pk       2457:        simple_lock(&mp->mnt_slock);
1.214     hannken  2458:        if (mp->mnt_writeopcountlower > 0)
1.224     pk       2459:                ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
                   2460:                        0, &mp->mnt_slock);
1.214     hannken  2461:        mp->mnt_iflag |= IMNT_SUSPENDED;
1.224     pk       2462:        simple_unlock(&mp->mnt_slock);
1.214     hannken  2463:
                   2464:        return 0;
                   2465: }
                   2466:
                   2467: /*
                   2468:  * Request a filesystem to resume write operations.
                   2469:  */
                   2470: void
                   2471: vfs_write_resume(struct mount *mp)
                   2472: {
                   2473:
                   2474:        if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
                   2475:                return;
                   2476:        mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
                   2477:        wakeup(&mp->mnt_flag);
                   2478: }
                   2479:
1.192     christos 2480: void
1.221     christos 2481: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
1.192     christos 2482: {
1.221     christos 2483:        const struct statvfs *mbp;
1.193     christos 2484:
                   2485:        if (sbp == (mbp = &mp->mnt_stat))
1.192     christos 2486:                return;
1.193     christos 2487:
1.222     enami    2488:        (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
                   2489:        sbp->f_fsid = mbp->f_fsid;
1.193     christos 2490:        sbp->f_owner = mbp->f_owner;
1.221     christos 2491:        sbp->f_flag = mbp->f_flag;
1.193     christos 2492:        sbp->f_syncwrites = mbp->f_syncwrites;
                   2493:        sbp->f_asyncwrites = mbp->f_asyncwrites;
1.221     christos 2494:        sbp->f_syncreads = mbp->f_syncreads;
                   2495:        sbp->f_asyncreads = mbp->f_asyncreads;
                   2496:        (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
1.193     christos 2497:        (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
1.192     christos 2498:            sizeof(sbp->f_fstypename));
1.193     christos 2499:        (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
1.192     christos 2500:            sizeof(sbp->f_mntonname));
                   2501:        (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
                   2502:            sizeof(sbp->f_mntfromname));
1.233     jdolecek 2503:        sbp->f_namemax = mbp->f_namemax;
1.192     christos 2504: }
                   2505:
                   2506: int
1.221     christos 2507: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
1.256     christos 2508:     struct mount *mp, struct lwp *l)
1.192     christos 2509: {
                   2510:        int error;
                   2511:        size_t size;
1.221     christos 2512:        struct statvfs *sfs = &mp->mnt_stat;
1.192     christos 2513:        int (*fun)(const void *, void *, size_t, size_t *);
                   2514:
1.217     junyoung 2515:        (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
1.192     christos 2516:            sizeof(mp->mnt_stat.f_fstypename));
                   2517:
                   2518:        if (onp) {
1.256     christos 2519:                struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1.192     christos 2520:                fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
                   2521:                if (cwdi->cwdi_rdir != NULL) {
                   2522:                        size_t len;
                   2523:                        char *bp;
                   2524:                        char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
                   2525:
1.209     dbj      2526:                        if (!path) /* XXX can't happen with M_WAITOK */
1.192     christos 2527:                                return ENOMEM;
                   2528:
                   2529:                        bp = path + MAXPATHLEN;
                   2530:                        *--bp = '\0';
                   2531:                        error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
1.256     christos 2532:                            path, MAXPATHLEN / 2, 0, l);
1.192     christos 2533:                        if (error) {
                   2534:                                free(path, M_TEMP);
                   2535:                                return error;
                   2536:                        }
                   2537:
                   2538:                        len = strlen(bp);
                   2539:                        if (len > sizeof(sfs->f_mntonname) - 1)
                   2540:                                len = sizeof(sfs->f_mntonname) - 1;
                   2541:                        (void)strncpy(sfs->f_mntonname, bp, len);
                   2542:                        free(path, M_TEMP);
                   2543:
                   2544:                        if (len < sizeof(sfs->f_mntonname) - 1) {
                   2545:                                error = (*fun)(onp, &sfs->f_mntonname[len],
1.194     christos 2546:                                    sizeof(sfs->f_mntonname) - len - 1, &size);
1.192     christos 2547:                                if (error)
                   2548:                                        return error;
                   2549:                                size += len;
                   2550:                        } else {
                   2551:                                size = len;
                   2552:                        }
                   2553:                } else {
                   2554:                        error = (*fun)(onp, &sfs->f_mntonname,
                   2555:                            sizeof(sfs->f_mntonname) - 1, &size);
                   2556:                        if (error)
                   2557:                                return error;
                   2558:                }
                   2559:                (void)memset(sfs->f_mntonname + size, 0,
                   2560:                    sizeof(sfs->f_mntonname) - size);
                   2561:        }
                   2562:
                   2563:        if (fromp) {
                   2564:                fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
                   2565:                error = (*fun)(fromp, sfs->f_mntfromname,
                   2566:                    sizeof(sfs->f_mntfromname) - 1, &size);
                   2567:                if (error)
                   2568:                        return error;
                   2569:                (void)memset(sfs->f_mntfromname + size, 0,
                   2570:                    sizeof(sfs->f_mntfromname) - size);
                   2571:        }
                   2572:        return 0;
1.29      cgd      2573: }
1.125     chs      2574:
                   2575: #ifdef DDB
1.247     thorpej  2576: static const char buf_flagbits[] = BUF_FLAGBITS;
1.125     chs      2577:
                   2578: void
1.247     thorpej  2579: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
1.125     chs      2580: {
1.245     christos 2581:        char bf[1024];
1.125     chs      2582:
1.198     dbj      2583:        (*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
1.125     chs      2584:                  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
                   2585:
1.245     christos 2586:        bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf));
                   2587:        (*pr)("  error %d flags 0x%s\n", bp->b_error, bf);
1.125     chs      2588:
1.164     msaitoh  2589:        (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1.125     chs      2590:                  bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.142     chs      2591:        (*pr)("  data %p saveaddr %p dep %p\n",
                   2592:                  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
1.125     chs      2593:        (*pr)("  iodone %p\n", bp->b_iodone);
                   2594: }
                   2595:
                   2596:
                   2597: void
1.247     thorpej  2598: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
1.125     chs      2599: {
1.245     christos 2600:        char bf[256];
1.125     chs      2601:
1.158     chs      2602:        uvm_object_printit(&vp->v_uobj, full, pr);
1.245     christos 2603:        bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
                   2604:        (*pr)("\nVNODE flags %s\n", bf);
1.158     chs      2605:        (*pr)("mp %p numoutput %d size 0x%llx\n",
                   2606:              vp->v_mount, vp->v_numoutput, vp->v_size);
1.125     chs      2607:
1.164     msaitoh  2608:        (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
1.125     chs      2609:              vp->v_data, vp->v_usecount, vp->v_writecount,
                   2610:              vp->v_holdcnt, vp->v_numoutput);
                   2611:
1.237     christos 2612:        (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
                   2613:              ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
                   2614:              ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1.203     yamt     2615:              vp->v_mount, vp->v_mountedhere);
1.125     chs      2616:
                   2617:        if (full) {
                   2618:                struct buf *bp;
                   2619:
                   2620:                (*pr)("clean bufs:\n");
1.142     chs      2621:                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                   2622:                        (*pr)(" bp %p\n", bp);
1.125     chs      2623:                        vfs_buf_print(bp, full, pr);
                   2624:                }
                   2625:
                   2626:                (*pr)("dirty bufs:\n");
1.142     chs      2627:                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                   2628:                        (*pr)(" bp %p\n", bp);
1.125     chs      2629:                        vfs_buf_print(bp, full, pr);
                   2630:                }
                   2631:        }
                   2632: }
1.210     dbj      2633:
                   2634: void
1.247     thorpej  2635: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
1.210     dbj      2636: {
                   2637:        char sbuf[256];
                   2638:
                   2639:        (*pr)("vnodecovered = %p syncer = %p data = %p\n",
                   2640:                        mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
                   2641:
1.232     mycroft  2642:        (*pr)("fs_bshift %d dev_bshift = %d\n",
                   2643:                        mp->mnt_fs_bshift,mp->mnt_dev_bshift);
1.210     dbj      2644:
                   2645:        bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
                   2646:        (*pr)("flag = %s\n", sbuf);
                   2647:
                   2648:        bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
                   2649:        (*pr)("iflag = %s\n", sbuf);
                   2650:
                   2651:        /* XXX use lockmgr_printinfo */
                   2652:        if (mp->mnt_lock.lk_sharecount)
                   2653:                (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
                   2654:                    mp->mnt_lock.lk_sharecount);
                   2655:        else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
                   2656:                (*pr)(" lock type %s: EXCL (count %d) by ",
                   2657:                    mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
                   2658:                if (mp->mnt_lock.lk_flags & LK_SPIN)
                   2659:                        (*pr)("processor %lu", mp->mnt_lock.lk_cpu);
                   2660:                else
                   2661:                        (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
                   2662:                            mp->mnt_lock.lk_locklwp);
                   2663:        } else
                   2664:                (*pr)(" not locked");
                   2665:        if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
                   2666:                (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
                   2667:
                   2668:        (*pr)("\n");
                   2669:
                   2670:        if (mp->mnt_unmounter) {
1.256     christos 2671:                (*pr)("unmounter pid = %d ",mp->mnt_unmounter->l_proc);
1.210     dbj      2672:        }
                   2673:        (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
                   2674:                mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
                   2675:
1.221     christos 2676:        (*pr)("statvfs cache:\n");
                   2677:        (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
                   2678:        (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
                   2679:        (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
                   2680:
                   2681:        (*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks);
                   2682:        (*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree);
                   2683:        (*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail);
                   2684:        (*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd);
                   2685:
                   2686:        (*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files);
                   2687:        (*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree);
                   2688:        (*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail);
                   2689:        (*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd);
                   2690:
                   2691:        (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
                   2692:                        mp->mnt_stat.f_fsidx.__fsid_val[0],
                   2693:                        mp->mnt_stat.f_fsidx.__fsid_val[1]);
                   2694:
1.210     dbj      2695:        (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
1.221     christos 2696:        (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
                   2697:
                   2698:        bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
                   2699:            sizeof(sbuf));
                   2700:        (*pr)("\tflag = %s\n",sbuf);
                   2701:        (*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites);
                   2702:        (*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
                   2703:        (*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads);
                   2704:        (*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads);
1.210     dbj      2705:        (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
                   2706:        (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
                   2707:        (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
                   2708:
                   2709:        {
                   2710:                int cnt = 0;
                   2711:                struct vnode *vp;
                   2712:                (*pr)("locked vnodes =");
                   2713:                /* XXX would take mountlist lock, except ddb may not have context */
                   2714:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   2715:                        if (VOP_ISLOCKED(vp)) {
                   2716:                                if ((++cnt % 6) == 0) {
                   2717:                                        (*pr)(" %p,\n\t", vp);
                   2718:                                } else {
                   2719:                                        (*pr)(" %p,", vp);
                   2720:                                }
                   2721:                        }
                   2722:                }
                   2723:                (*pr)("\n");
                   2724:        }
                   2725:
                   2726:        if (full) {
                   2727:                int cnt = 0;
                   2728:                struct vnode *vp;
                   2729:                (*pr)("all vnodes =");
                   2730:                /* XXX would take mountlist lock, except ddb may not have context */
                   2731:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   2732:                        if (!LIST_NEXT(vp, v_mntvnodes)) {
                   2733:                                (*pr)(" %p", vp);
                   2734:                        } else if ((++cnt % 6) == 0) {
                   2735:                                (*pr)(" %p,\n\t", vp);
                   2736:                        } else {
                   2737:                                (*pr)(" %p,", vp);
                   2738:                        }
                   2739:                }
                   2740:                (*pr)("\n", vp);
                   2741:        }
                   2742: }
1.247     thorpej  2743: #endif /* DDB */

CVSweb <webmaster@jp.NetBSD.org>