[BACK]Return to vfs_subr.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_subr.c, Revision 1.249

1.249   ! elad        1: /*     $NetBSD: vfs_subr.c,v 1.248 2005/06/06 12:09:19 yamt Exp $      */
1.74      thorpej     2:
                      3: /*-
1.243     mycroft     4:  * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
1.74      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
1.243     mycroft    10:  * This code is derived from software contributed to The NetBSD Foundation
                     11:  * by Charles M. Hannum.
1.74      thorpej    12:  *
                     13:  * Redistribution and use in source and binary forms, with or without
                     14:  * modification, are permitted provided that the following conditions
                     15:  * are met:
                     16:  * 1. Redistributions of source code must retain the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer.
                     18:  * 2. Redistributions in binary form must reproduce the above copyright
                     19:  *    notice, this list of conditions and the following disclaimer in the
                     20:  *    documentation and/or other materials provided with the distribution.
                     21:  * 3. All advertising materials mentioning features or use of this software
                     22:  *    must display the following acknowledgement:
                     23:  *     This product includes software developed by the NetBSD
                     24:  *     Foundation, Inc. and its contributors.
                     25:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     26:  *    contributors may be used to endorse or promote products derived
                     27:  *    from this software without specific prior written permission.
                     28:  *
                     29:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     30:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     31:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     32:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     33:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     34:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     35:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     36:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     37:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     38:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     39:  * POSSIBILITY OF SUCH DAMAGE.
                     40:  */
1.32      cgd        41:
1.29      cgd        42: /*
1.30      mycroft    43:  * Copyright (c) 1989, 1993
                     44:  *     The Regents of the University of California.  All rights reserved.
1.29      cgd        45:  * (c) UNIX System Laboratories, Inc.
                     46:  * All or some portions of this file are derived from material licensed
                     47:  * to the University of California by American Telephone and Telegraph
                     48:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     49:  * the permission of UNIX System Laboratories, Inc.
                     50:  *
                     51:  * Redistribution and use in source and binary forms, with or without
                     52:  * modification, are permitted provided that the following conditions
                     53:  * are met:
                     54:  * 1. Redistributions of source code must retain the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer.
                     56:  * 2. Redistributions in binary form must reproduce the above copyright
                     57:  *    notice, this list of conditions and the following disclaimer in the
                     58:  *    documentation and/or other materials provided with the distribution.
1.204     agc        59:  * 3. Neither the name of the University nor the names of its contributors
1.29      cgd        60:  *    may be used to endorse or promote products derived from this software
                     61:  *    without specific prior written permission.
                     62:  *
                     63:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     64:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     65:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     66:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     67:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     68:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     69:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     70:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     71:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     72:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     73:  * SUCH DAMAGE.
                     74:  *
1.32      cgd        75:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
1.29      cgd        76:  */
                     77:
                     78: /*
                     79:  * External virtual filesystem routines
                     80:  */
1.162     lukem      81:
                     82: #include <sys/cdefs.h>
1.249   ! elad       83: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.248 2005/06/06 12:09:19 yamt Exp $");
1.78      mrg        84:
1.195     christos   85: #include "opt_inet.h"
1.125     chs        86: #include "opt_ddb.h"
1.95      thorpej    87: #include "opt_compat_netbsd.h"
1.97      christos   88: #include "opt_compat_43.h"
1.29      cgd        89:
                     90: #include <sys/param.h>
1.30      mycroft    91: #include <sys/systm.h>
1.29      cgd        92: #include <sys/proc.h>
1.138     bouyer     93: #include <sys/kernel.h>
1.29      cgd        94: #include <sys/mount.h>
                     95: #include <sys/time.h>
1.181     jdolecek   96: #include <sys/event.h>
1.46      mycroft    97: #include <sys/fcntl.h>
1.29      cgd        98: #include <sys/vnode.h>
1.30      mycroft    99: #include <sys/stat.h>
1.29      cgd       100: #include <sys/namei.h>
                    101: #include <sys/ucred.h>
                    102: #include <sys/buf.h>
                    103: #include <sys/errno.h>
                    104: #include <sys/malloc.h>
1.30      mycroft   105: #include <sys/domain.h>
                    106: #include <sys/mbuf.h>
1.184     thorpej   107: #include <sys/sa.h>
1.51      christos  108: #include <sys/syscallargs.h>
1.58      thorpej   109: #include <sys/device.h>
1.238     thorpej   110: #include <sys/extattr.h>
1.71      fvdl      111: #include <sys/dirent.h>
1.192     christos  112: #include <sys/filedesc.h>
1.50      christos  113:
1.30      mycroft   114: #include <miscfs/specfs/specdev.h>
1.113     fvdl      115: #include <miscfs/genfs/genfs.h>
                    116: #include <miscfs/syncfs/syncfs.h>
1.30      mycroft   117:
1.195     christos  118: #include <netinet/in.h>
                    119:
1.125     chs       120: #include <uvm/uvm.h>
                    121: #include <uvm/uvm_ddb.h>
1.129     mrg       122:
1.195     christos  123: #include <netinet/in.h>
                    124:
1.129     mrg       125: #include <sys/sysctl.h>
1.77      mrg       126:
1.189     jdolecek  127: const enum vtype iftovt_tab[16] = {
1.30      mycroft   128:        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
                    129:        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
                    130: };
1.146     jdolecek  131: const int      vttoif_tab[9] = {
1.30      mycroft   132:        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
                    133:        S_IFSOCK, S_IFIFO, S_IFMT,
                    134: };
                    135:
1.31      mycroft   136: int doforce = 1;               /* 1 => permit forcible unmounting */
                    137: int prtactive = 0;             /* 1 => print out reclaim of active vnodes */
1.29      cgd       138:
1.117     fvdl      139: extern int dovfsusermount;     /* 1 => permit any user to mount filesystems */
                    140:
1.29      cgd       141: /*
                    142:  * Insq/Remq for the vnode usage lists.
                    143:  */
                    144: #define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
                    145: #define        bufremvn(bp) {                                                  \
                    146:        LIST_REMOVE(bp, b_vnbufs);                                      \
                    147:        (bp)->b_vnbufs.le_next = NOLIST;                                \
1.217     junyoung  148: }
1.113     fvdl      149: /* TAILQ_HEAD(freelst, vnode) vnode_free_list =        vnode free list (in vnode.h) */
                    150: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114     enami     151: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113     fvdl      152:
1.55      cgd       153: struct mntlist mountlist =                     /* mounted filesystem list */
                    154:     CIRCLEQ_HEAD_INITIALIZER(mountlist);
1.79      thorpej   155: struct vfs_list_head vfs_list =                        /* vfs list */
1.118     mycroft   156:     LIST_HEAD_INITIALIZER(vfs_list);
1.79      thorpej   157:
1.71      fvdl      158: struct nfs_public nfs_pub;                     /* publicly exported FS */
1.58      thorpej   159:
1.135     sommerfe  160: struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
                    161: static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
                    162: struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
                    163: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
                    164: struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
1.80      fvdl      165:
1.187     pk        166: /* XXX - gross; single global lock to protect v_numoutput */
                    167: struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
                    168:
1.79      thorpej   169: /*
                    170:  * These define the root filesystem and device.
                    171:  */
                    172: struct mount *rootfs;
                    173: struct vnode *rootvnode;
1.80      fvdl      174: struct device *root_device;                    /* root device */
1.79      thorpej   175:
1.223     simonb    176: POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
                    177:     &pool_allocator_nointr);
1.186     thorpej   178:
                    179: MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
1.93      thorpej   180:
1.89      kleink    181: /*
                    182:  * Local declarations.
                    183:  */
1.217     junyoung  184: void insmntque(struct vnode *, struct mount *);
                    185: int getdevvp(dev_t, struct vnode **, enum vtype);
1.180     gmcgarry  186:
1.201     fvdl      187: void vclean(struct vnode *, int, struct proc *);
1.50      christos  188:
1.217     junyoung  189: static int vfs_hang_addrlist(struct mount *, struct netexport *,
                    190:                             struct export_args *);
                    191: static int vfs_free_netcred(struct radix_node *, void *);
                    192: static void vfs_free_addrlist(struct netexport *);
                    193: static struct vnode *getcleanvnode(struct proc *);
1.51      christos  194:
                    195: #ifdef DEBUG
1.217     junyoung  196: void printlockedvnodes(void);
1.51      christos  197: #endif
                    198:
1.29      cgd       199: /*
1.30      mycroft   200:  * Initialize the vnode management data structures.
1.29      cgd       201:  */
1.50      christos  202: void
1.247     thorpej   203: vntblinit(void)
1.29      cgd       204: {
1.93      thorpej   205:
1.113     fvdl      206:        /*
                    207:         * Initialize the filesystem syncer.
                    208:         */
                    209:        vn_initialize_syncerd();
1.29      cgd       210: }
                    211:
1.202     yamt      212: int
                    213: vfs_drainvnodes(long target, struct proc *p)
                    214: {
                    215:
                    216:        simple_lock(&vnode_free_list_slock);
                    217:        while (numvnodes > target) {
                    218:                struct vnode *vp;
                    219:
                    220:                vp = getcleanvnode(p);
                    221:                if (vp == NULL)
                    222:                        return EBUSY; /* give up */
                    223:                pool_put(&vnode_pool, vp);
                    224:                simple_lock(&vnode_free_list_slock);
                    225:                numvnodes--;
                    226:        }
                    227:        simple_unlock(&vnode_free_list_slock);
                    228:
                    229:        return 0;
                    230: }
                    231:
                    232: /*
                    233:  * grab a vnode from freelist and clean it.
                    234:  */
                    235: struct vnode *
1.247     thorpej   236: getcleanvnode(struct proc *p)
1.202     yamt      237: {
                    238:        struct vnode *vp;
1.208     hannken   239:        struct mount *mp;
1.202     yamt      240:        struct freelst *listhd;
                    241:
                    242:        LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
1.229     yamt      243:
                    244:        listhd = &vnode_free_list;
                    245: try_nextlist:
                    246:        TAILQ_FOREACH(vp, listhd, v_freelist) {
1.208     hannken   247:                if (!simple_lock_try(&vp->v_interlock))
                    248:                        continue;
1.227     yamt      249:                /*
                    250:                 * as our lwp might hold the underlying vnode locked,
                    251:                 * don't try to reclaim the VLAYER vnode if it's locked.
                    252:                 */
1.228     yamt      253:                if ((vp->v_flag & VXLOCK) == 0 &&
                    254:                    ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.208     hannken   255:                        if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
1.202     yamt      256:                                break;
                    257:                }
1.208     hannken   258:                mp = NULL;
                    259:                simple_unlock(&vp->v_interlock);
1.202     yamt      260:        }
                    261:
                    262:        if (vp == NULLVP) {
1.229     yamt      263:                if (listhd == &vnode_free_list) {
                    264:                        listhd = &vnode_hold_list;
                    265:                        goto try_nextlist;
                    266:                }
1.202     yamt      267:                simple_unlock(&vnode_free_list_slock);
                    268:                return NULLVP;
                    269:        }
                    270:
                    271:        if (vp->v_usecount)
                    272:                panic("free vnode isn't, vp %p", vp);
                    273:        TAILQ_REMOVE(listhd, vp, v_freelist);
                    274:        /* see comment on why 0xdeadb is set at end of vgone (below) */
                    275:        vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
                    276:        simple_unlock(&vnode_free_list_slock);
                    277:        vp->v_lease = NULL;
                    278:
                    279:        if (vp->v_type != VBAD)
                    280:                vgonel(vp, p);
                    281:        else
                    282:                simple_unlock(&vp->v_interlock);
1.208     hannken   283:        vn_finished_write(mp, 0);
1.202     yamt      284: #ifdef DIAGNOSTIC
                    285:        if (vp->v_data || vp->v_uobj.uo_npages ||
                    286:            TAILQ_FIRST(&vp->v_uobj.memq))
                    287:                panic("cleaned vnode isn't, vp %p", vp);
                    288:        if (vp->v_numoutput)
                    289:                panic("clean vnode has pending I/O's, vp %p", vp);
                    290: #endif
                    291:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                    292:
                    293:        return vp;
                    294: }
                    295:
1.29      cgd       296: /*
1.80      fvdl      297:  * Mark a mount point as busy. Used to synchronize access and to delay
                    298:  * unmounting. Interlock is not released on failure.
1.29      cgd       299:  */
1.50      christos  300: int
1.247     thorpej   301: vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp)
1.29      cgd       302: {
1.80      fvdl      303:        int lkflags;
1.29      cgd       304:
1.207     dbj       305:        while (mp->mnt_iflag & IMNT_UNMOUNT) {
1.224     pk        306:                int gone, n;
1.217     junyoung  307:
1.80      fvdl      308:                if (flags & LK_NOWAIT)
                    309:                        return (ENOENT);
1.113     fvdl      310:                if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
1.201     fvdl      311:                    && mp->mnt_unmounter == curproc)
1.113     fvdl      312:                        return (EDEADLK);
1.80      fvdl      313:                if (interlkp)
                    314:                        simple_unlock(interlkp);
                    315:                /*
                    316:                 * Since all busy locks are shared except the exclusive
                    317:                 * lock granted when unmounting, the only place that a
                    318:                 * wakeup needs to be done is at the release of the
                    319:                 * exclusive lock at the end of dounmount.
                    320:                 */
1.224     pk        321:                simple_lock(&mp->mnt_slock);
1.103     sommerfe  322:                mp->mnt_wcnt++;
1.224     pk        323:                ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
                    324:                n = --mp->mnt_wcnt;
                    325:                simple_unlock(&mp->mnt_slock);
1.207     dbj       326:                gone = mp->mnt_iflag & IMNT_GONE;
1.217     junyoung  327:
1.224     pk        328:                if (n == 0)
1.103     sommerfe  329:                        wakeup(&mp->mnt_wcnt);
1.80      fvdl      330:                if (interlkp)
                    331:                        simple_lock(interlkp);
1.103     sommerfe  332:                if (gone)
                    333:                        return (ENOENT);
1.80      fvdl      334:        }
                    335:        lkflags = LK_SHARED;
                    336:        if (interlkp)
                    337:                lkflags |= LK_INTERLOCK;
                    338:        if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
                    339:                panic("vfs_busy: unexpected lock failure");
1.29      cgd       340:        return (0);
                    341: }
                    342:
                    343: /*
1.80      fvdl      344:  * Free a busy filesystem.
1.29      cgd       345:  */
                    346: void
1.247     thorpej   347: vfs_unbusy(struct mount *mp)
1.29      cgd       348: {
                    349:
1.80      fvdl      350:        lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29      cgd       351: }
                    352:
                    353: /*
1.80      fvdl      354:  * Lookup a filesystem type, and if found allocate and initialize
                    355:  * a mount structure for it.
                    356:  *
                    357:  * Devname is usually updated by mount(8) after booting.
1.29      cgd       358:  */
1.50      christos  359: int
1.247     thorpej   360: vfs_rootmountalloc(const char *fstypename, const char *devname,
                    361:     struct mount **mpp)
1.29      cgd       362: {
1.80      fvdl      363:        struct vfsops *vfsp = NULL;
                    364:        struct mount *mp;
1.29      cgd       365:
1.152     jdolecek  366:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.80      fvdl      367:                if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
                    368:                        break;
                    369:
                    370:        if (vfsp == NULL)
                    371:                return (ENODEV);
                    372:        mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91      perry     373:        memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80      fvdl      374:        lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
1.224     pk        375:        simple_lock_init(&mp->mnt_slock);
1.80      fvdl      376:        (void)vfs_busy(mp, LK_NOWAIT, 0);
                    377:        LIST_INIT(&mp->mnt_vnodelist);
                    378:        mp->mnt_op = vfsp;
                    379:        mp->mnt_flag = MNT_RDONLY;
                    380:        mp->mnt_vnodecovered = NULLVP;
1.230     hannken   381:        mp->mnt_leaf = mp;
1.80      fvdl      382:        vfsp->vfs_refcount++;
                    383:        strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
                    384:        mp->mnt_stat.f_mntonname[0] = '/';
                    385:        (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
                    386:        *mpp = mp;
1.29      cgd       387:        return (0);
                    388: }
                    389:
                    390: /*
                    391:  * Lookup a mount point by filesystem identifier.
                    392:  */
                    393: struct mount *
1.247     thorpej   394: vfs_getvfs(fsid_t *fsid)
1.29      cgd       395: {
1.123     augustss  396:        struct mount *mp;
1.29      cgd       397:
1.80      fvdl      398:        simple_lock(&mountlist_slock);
1.177     matt      399:        CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
1.221     christos  400:                if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
                    401:                    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
1.80      fvdl      402:                        simple_unlock(&mountlist_slock);
1.29      cgd       403:                        return (mp);
1.80      fvdl      404:                }
                    405:        }
                    406:        simple_unlock(&mountlist_slock);
1.29      cgd       407:        return ((struct mount *)0);
                    408: }
                    409:
                    410: /*
                    411:  * Get a new unique fsid
                    412:  */
                    413: void
1.247     thorpej   414: vfs_getnewfsid(struct mount *mp)
1.29      cgd       415: {
                    416:        static u_short xxxfs_mntid;
                    417:        fsid_t tfsid;
1.80      fvdl      418:        int mtype;
1.29      cgd       419:
1.80      fvdl      420:        simple_lock(&mntid_slock);
1.127     assar     421:        mtype = makefstype(mp->mnt_op->vfs_name);
1.221     christos  422:        mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
                    423:        mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
                    424:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1.29      cgd       425:        if (xxxfs_mntid == 0)
                    426:                ++xxxfs_mntid;
1.221     christos  427:        tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
                    428:        tfsid.__fsid_val[1] = mtype;
1.177     matt      429:        if (!CIRCLEQ_EMPTY(&mountlist)) {
1.80      fvdl      430:                while (vfs_getvfs(&tfsid)) {
1.221     christos  431:                        tfsid.__fsid_val[0]++;
1.29      cgd       432:                        xxxfs_mntid++;
                    433:                }
                    434:        }
1.221     christos  435:        mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
                    436:        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1.80      fvdl      437:        simple_unlock(&mntid_slock);
1.29      cgd       438: }
                    439:
                    440: /*
1.30      mycroft   441:  * Make a 'unique' number from a mount type name.
1.29      cgd       442:  */
                    443: long
1.247     thorpej   444: makefstype(const char *type)
1.29      cgd       445: {
                    446:        long rv;
                    447:
                    448:        for (rv = 0; *type; type++) {
                    449:                rv <<= 2;
                    450:                rv ^= *type;
                    451:        }
                    452:        return rv;
                    453: }
1.30      mycroft   454:
1.80      fvdl      455:
1.30      mycroft   456: /*
                    457:  * Set vnode attributes to VNOVAL
                    458:  */
                    459: void
1.247     thorpej   460: vattr_null(struct vattr *vap)
1.30      mycroft   461: {
                    462:
                    463:        vap->va_type = VNON;
1.75      enami     464:
                    465:        /*
                    466:         * Assign individually so that it is safe even if size and
                    467:         * sign of each member are varied.
                    468:         */
                    469:        vap->va_mode = VNOVAL;
                    470:        vap->va_nlink = VNOVAL;
                    471:        vap->va_uid = VNOVAL;
                    472:        vap->va_gid = VNOVAL;
                    473:        vap->va_fsid = VNOVAL;
                    474:        vap->va_fileid = VNOVAL;
1.30      mycroft   475:        vap->va_size = VNOVAL;
1.75      enami     476:        vap->va_blocksize = VNOVAL;
1.76      christos  477:        vap->va_atime.tv_sec =
                    478:            vap->va_mtime.tv_sec =
1.191     enami     479:            vap->va_ctime.tv_sec =
                    480:            vap->va_birthtime.tv_sec = VNOVAL;
1.76      christos  481:        vap->va_atime.tv_nsec =
                    482:            vap->va_mtime.tv_nsec =
1.191     enami     483:            vap->va_ctime.tv_nsec =
                    484:            vap->va_birthtime.tv_nsec = VNOVAL;
1.75      enami     485:        vap->va_gen = VNOVAL;
                    486:        vap->va_flags = VNOVAL;
                    487:        vap->va_rdev = VNOVAL;
1.30      mycroft   488:        vap->va_bytes = VNOVAL;
                    489:        vap->va_vaflags = 0;
                    490: }
                    491:
                    492: /*
                    493:  * Routines having to do with the management of the vnode table.
                    494:  */
1.217     junyoung  495: extern int (**dead_vnodeop_p)(void *);
1.30      mycroft   496: long numvnodes;
                    497:
1.29      cgd       498: /*
                    499:  * Return the next vnode from the free list.
                    500:  */
1.50      christos  501: int
1.247     thorpej   502: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
                    503:     struct vnode **vpp)
1.29      cgd       504: {
1.142     chs       505:        extern struct uvm_pagerops uvm_vnodeops;
                    506:        struct uvm_object *uobj;
1.201     fvdl      507:        struct proc *p = curproc;       /* XXX */
1.113     fvdl      508:        static int toggle;
1.80      fvdl      509:        struct vnode *vp;
1.153     thorpej   510:        int error = 0, tryalloc;
1.158     chs       511:
1.159     enami     512:  try_again:
1.103     sommerfe  513:        if (mp) {
                    514:                /*
1.106     sommerfe  515:                 * Mark filesystem busy while we're creating a vnode.
                    516:                 * If unmount is in progress, this will wait; if the
                    517:                 * unmount succeeds (only if umount -f), this will
                    518:                 * return an error.  If the unmount fails, we'll keep
                    519:                 * going afterwards.
                    520:                 * (This puts the per-mount vnode list logically under
                    521:                 * the protection of the vfs_busy lock).
1.103     sommerfe  522:                 */
1.113     fvdl      523:                error = vfs_busy(mp, LK_RECURSEFAIL, 0);
                    524:                if (error && error != EDEADLK)
1.103     sommerfe  525:                        return error;
                    526:        }
1.29      cgd       527:
1.113     fvdl      528:        /*
                    529:         * We must choose whether to allocate a new vnode or recycle an
                    530:         * existing one. The criterion for allocating a new one is that
                    531:         * the total number of vnodes is less than the number desired or
                    532:         * there are no vnodes on either free list. Generally we only
                    533:         * want to recycle vnodes that have no buffers associated with
                    534:         * them, so we look first on the vnode_free_list. If it is empty,
                    535:         * we next consider vnodes with referencing buffers on the
                    536:         * vnode_hold_list. The toggle ensures that half the time we
                    537:         * will use a buffer from the vnode_hold_list, and half the time
                    538:         * we will allocate a new one unless the list has grown to twice
                    539:         * the desired size. We are reticent to recycle vnodes from the
                    540:         * vnode_hold_list because we will lose the identity of all its
                    541:         * referencing buffers.
                    542:         */
1.142     chs       543:
1.153     thorpej   544:        vp = NULL;
                    545:
                    546:        simple_lock(&vnode_free_list_slock);
                    547:
1.113     fvdl      548:        toggle ^= 1;
                    549:        if (numvnodes > 2 * desiredvnodes)
                    550:                toggle = 0;
                    551:
1.153     thorpej   552:        tryalloc = numvnodes < desiredvnodes ||
1.159     enami     553:            (TAILQ_FIRST(&vnode_free_list) == NULL &&
                    554:             (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153     thorpej   555:
                    556:        if (tryalloc &&
                    557:            (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.206     yamt      558:                numvnodes++;
1.80      fvdl      559:                simple_unlock(&vnode_free_list_slock);
1.142     chs       560:                memset(vp, 0, sizeof(*vp));
1.248     yamt      561:                UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1);
1.225     yamt      562:                /*
                    563:                 * done by memset() above.
                    564:                 *      LIST_INIT(&vp->v_nclist);
                    565:                 *      LIST_INIT(&vp->v_dnclist);
                    566:                 */
1.29      cgd       567:        } else {
1.202     yamt      568:                vp = getcleanvnode(p);
1.80      fvdl      569:                /*
                    570:                 * Unless this is a bad time of the month, at most
                    571:                 * the first NCPUS items on the free list are
                    572:                 * locked, so this is close enough to being empty.
                    573:                 */
                    574:                if (vp == NULLVP) {
1.113     fvdl      575:                        if (mp && error != EDEADLK)
                    576:                                vfs_unbusy(mp);
1.153     thorpej   577:                        if (tryalloc) {
                    578:                                printf("WARNING: unable to allocate new "
                    579:                                    "vnode, retrying...\n");
                    580:                                (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
                    581:                                goto try_again;
                    582:                        }
1.132     jdolecek  583:                        tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29      cgd       584:                        *vpp = 0;
                    585:                        return (ENFILE);
                    586:                }
1.248     yamt      587:                vp->v_usecount = 1;
1.29      cgd       588:                vp->v_flag = 0;
1.158     chs       589:                vp->v_socket = NULL;
1.182     blymn     590: #ifdef VERIFIED_EXEC
1.244     blymn     591:                vp->fp_status = FINGERPRINT_NOTEVAL;
1.249   ! elad      592:                vp->vhe = NULL;
1.182     blymn     593: #endif
1.29      cgd       594:        }
                    595:        vp->v_type = VNON;
1.104     wrstuden  596:        vp->v_vnlock = &vp->v_lock;
                    597:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.225     yamt      598:        KASSERT(LIST_EMPTY(&vp->v_nclist));
                    599:        KASSERT(LIST_EMPTY(&vp->v_dnclist));
1.29      cgd       600:        vp->v_tag = tag;
                    601:        vp->v_op = vops;
                    602:        insmntque(vp, mp);
1.30      mycroft   603:        *vpp = vp;
                    604:        vp->v_data = 0;
1.240     christos  605:        simple_lock_init(&vp->v_interlock);
1.142     chs       606:
                    607:        /*
                    608:         * initialize uvm_object within vnode.
                    609:         */
                    610:
1.158     chs       611:        uobj = &vp->v_uobj;
                    612:        KASSERT(uobj->pgops == &uvm_vnodeops);
                    613:        KASSERT(uobj->uo_npages == 0);
                    614:        KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
                    615:        vp->v_size = VSIZENOTSET;
1.142     chs       616:
1.113     fvdl      617:        if (mp && error != EDEADLK)
                    618:                vfs_unbusy(mp);
1.29      cgd       619:        return (0);
1.130     fvdl      620: }
                    621:
                    622: /*
                    623:  * This is really just the reverse of getnewvnode(). Needed for
                    624:  * VFS_VGET functions who may need to push back a vnode in case
                    625:  * of a locking race.
                    626:  */
                    627: void
1.247     thorpej   628: ungetnewvnode(struct vnode *vp)
1.130     fvdl      629: {
                    630: #ifdef DIAGNOSTIC
                    631:        if (vp->v_usecount != 1)
1.131     fvdl      632:                panic("ungetnewvnode: busy vnode");
1.130     fvdl      633: #endif
                    634:        vp->v_usecount--;
                    635:        insmntque(vp, NULL);
                    636:        vp->v_type = VBAD;
                    637:
                    638:        simple_lock(&vp->v_interlock);
1.217     junyoung  639:        /*
1.130     fvdl      640:         * Insert at head of LRU list
                    641:         */
                    642:        simple_lock(&vnode_free_list_slock);
                    643:        if (vp->v_holdcnt > 0)
                    644:                TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
                    645:        else
                    646:                TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1.217     junyoung  647:        simple_unlock(&vnode_free_list_slock);
1.130     fvdl      648:        simple_unlock(&vp->v_interlock);
1.29      cgd       649: }
                    650:
                    651: /*
                    652:  * Move a vnode from one mount queue to another.
                    653:  */
1.50      christos  654: void
1.247     thorpej   655: insmntque(struct vnode *vp, struct mount *mp)
1.29      cgd       656: {
                    657:
1.103     sommerfe  658: #ifdef DIAGNOSTIC
                    659:        if ((mp != NULL) &&
1.207     dbj       660:            (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113     fvdl      661:            !(mp->mnt_flag & MNT_SOFTDEP) &&
                    662:            vp->v_tag != VT_VFS) {
1.103     sommerfe  663:                panic("insmntque into dying filesystem");
                    664:        }
                    665: #endif
1.217     junyoung  666:
1.80      fvdl      667:        simple_lock(&mntvnode_slock);
1.29      cgd       668:        /*
                    669:         * Delete from old mount point vnode list, if on one.
                    670:         */
                    671:        if (vp->v_mount != NULL)
                    672:                LIST_REMOVE(vp, v_mntvnodes);
                    673:        /*
                    674:         * Insert into list of vnodes for the new mount point, if available.
                    675:         */
1.80      fvdl      676:        if ((vp->v_mount = mp) != NULL)
                    677:                LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
                    678:        simple_unlock(&mntvnode_slock);
1.29      cgd       679: }
                    680:
                    681: /*
                    682:  * Update outstanding I/O count and do wakeup if requested.
                    683:  */
1.50      christos  684: void
1.247     thorpej   685: vwakeup(struct buf *bp)
1.29      cgd       686: {
1.123     augustss  687:        struct vnode *vp;
1.29      cgd       688:
1.50      christos  689:        if ((vp = bp->b_vp) != NULL) {
1.187     pk        690:                /* XXX global lock hack
                    691:                 * can't use v_interlock here since this is called
                    692:                 * in interrupt context from biodone().
                    693:                 */
                    694:                simple_lock(&global_v_numoutput_slock);
1.30      mycroft   695:                if (--vp->v_numoutput < 0)
1.125     chs       696:                        panic("vwakeup: neg numoutput, vp %p", vp);
1.29      cgd       697:                if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
                    698:                        vp->v_flag &= ~VBWAIT;
                    699:                        wakeup((caddr_t)&vp->v_numoutput);
                    700:                }
1.187     pk        701:                simple_unlock(&global_v_numoutput_slock);
1.29      cgd       702:        }
                    703: }
                    704:
                    705: /*
                    706:  * Flush out and invalidate all buffers associated with a vnode.
1.126     mycroft   707:  * Called with the underlying vnode locked, which should prevent new dirty
                    708:  * buffers from being queued.
1.29      cgd       709:  */
1.30      mycroft   710: int
1.247     thorpej   711: vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
                    712:     int slpflag, int slptimeo)
1.29      cgd       713: {
1.126     mycroft   714:        struct buf *bp, *nbp;
1.158     chs       715:        int s, error;
1.166     chs       716:        int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
1.142     chs       717:                (flags & V_SAVE ? PGO_CLEANIT : 0);
                    718:
                    719:        /* XXXUBC this doesn't look at flags or slp* */
1.166     chs       720:        simple_lock(&vp->v_interlock);
                    721:        error = VOP_PUTPAGES(vp, 0, 0, flushflags);
                    722:        if (error) {
                    723:                return error;
1.142     chs       724:        }
1.166     chs       725:
1.30      mycroft   726:        if (flags & V_SAVE) {
1.201     fvdl      727:                error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
1.126     mycroft   728:                if (error)
1.122     fvdl      729:                        return (error);
1.126     mycroft   730: #ifdef DIAGNOSTIC
1.122     fvdl      731:                s = splbio();
1.126     mycroft   732:                if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
1.125     chs       733:                        panic("vinvalbuf: dirty bufs, vp %p", vp);
1.113     fvdl      734:                splx(s);
1.126     mycroft   735: #endif
1.30      mycroft   736:        }
1.113     fvdl      737:
1.115     fvdl      738:        s = splbio();
                    739:
1.126     mycroft   740: restart:
                    741:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    742:                nbp = LIST_NEXT(bp, b_vnbufs);
1.187     pk        743:                simple_lock(&bp->b_interlock);
1.126     mycroft   744:                if (bp->b_flags & B_BUSY) {
                    745:                        bp->b_flags |= B_WANTED;
1.187     pk        746:                        error = ltsleep((caddr_t)bp,
                    747:                                    slpflag | (PRIBIO + 1) | PNORELOCK,
                    748:                                    "vinvalbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   749:                        if (error) {
                    750:                                splx(s);
                    751:                                return (error);
                    752:                        }
                    753:                        goto restart;
1.113     fvdl      754:                }
1.126     mycroft   755:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        756:                simple_unlock(&bp->b_interlock);
1.126     mycroft   757:                brelse(bp);
                    758:        }
1.30      mycroft   759:
1.126     mycroft   760:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    761:                nbp = LIST_NEXT(bp, b_vnbufs);
1.187     pk        762:                simple_lock(&bp->b_interlock);
1.126     mycroft   763:                if (bp->b_flags & B_BUSY) {
                    764:                        bp->b_flags |= B_WANTED;
1.187     pk        765:                        error = ltsleep((caddr_t)bp,
                    766:                                    slpflag | (PRIBIO + 1) | PNORELOCK,
                    767:                                    "vinvalbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   768:                        if (error) {
                    769:                                splx(s);
                    770:                                return (error);
1.29      cgd       771:                        }
1.126     mycroft   772:                        goto restart;
                    773:                }
                    774:                /*
                    775:                 * XXX Since there are no node locks for NFS, I believe
                    776:                 * there is a slight chance that a delayed write will
                    777:                 * occur while sleeping just above, so check for it.
                    778:                 */
                    779:                if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
                    780: #ifdef DEBUG
                    781:                        printf("buffer still DELWRI\n");
                    782: #endif
1.63      mycroft   783:                        bp->b_flags |= B_BUSY | B_VFLUSH;
1.187     pk        784:                        simple_unlock(&bp->b_interlock);
1.126     mycroft   785:                        VOP_BWRITE(bp);
                    786:                        goto restart;
                    787:                }
                    788:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        789:                simple_unlock(&bp->b_interlock);
1.126     mycroft   790:                brelse(bp);
                    791:        }
                    792:
                    793: #ifdef DIAGNOSTIC
                    794:        if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
                    795:                panic("vinvalbuf: flush failed, vp %p", vp);
1.113     fvdl      796: #endif
1.126     mycroft   797:
                    798:        splx(s);
                    799:
                    800:        return (0);
                    801: }
                    802:
                    803: /*
                    804:  * Destroy any in core blocks past the truncation length.
                    805:  * Called with the underlying vnode locked, which should prevent new dirty
                    806:  * buffers from being queued.
                    807:  */
                    808: int
1.247     thorpej   809: vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
1.126     mycroft   810: {
                    811:        struct buf *bp, *nbp;
1.158     chs       812:        int s, error;
1.166     chs       813:        voff_t off;
                    814:
                    815:        off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
                    816:        simple_lock(&vp->v_interlock);
                    817:        error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
                    818:        if (error) {
                    819:                return error;
                    820:        }
1.126     mycroft   821:
                    822:        s = splbio();
                    823:
                    824: restart:
                    825:        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                    826:                nbp = LIST_NEXT(bp, b_vnbufs);
                    827:                if (bp->b_lblkno < lbn)
                    828:                        continue;
1.187     pk        829:                simple_lock(&bp->b_interlock);
1.126     mycroft   830:                if (bp->b_flags & B_BUSY) {
                    831:                        bp->b_flags |= B_WANTED;
1.187     pk        832:                        error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
                    833:                            "vtruncbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   834:                        if (error) {
                    835:                                splx(s);
                    836:                                return (error);
1.29      cgd       837:                        }
1.126     mycroft   838:                        goto restart;
1.29      cgd       839:                }
1.126     mycroft   840:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        841:                simple_unlock(&bp->b_interlock);
1.126     mycroft   842:                brelse(bp);
1.29      cgd       843:        }
1.115     fvdl      844:
1.126     mycroft   845:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    846:                nbp = LIST_NEXT(bp, b_vnbufs);
                    847:                if (bp->b_lblkno < lbn)
                    848:                        continue;
1.187     pk        849:                simple_lock(&bp->b_interlock);
1.126     mycroft   850:                if (bp->b_flags & B_BUSY) {
                    851:                        bp->b_flags |= B_WANTED;
1.187     pk        852:                        error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
                    853:                            "vtruncbuf", slptimeo, &bp->b_interlock);
1.126     mycroft   854:                        if (error) {
                    855:                                splx(s);
                    856:                                return (error);
                    857:                        }
                    858:                        goto restart;
                    859:                }
                    860:                bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
1.187     pk        861:                simple_unlock(&bp->b_interlock);
1.126     mycroft   862:                brelse(bp);
                    863:        }
1.115     fvdl      864:
                    865:        splx(s);
                    866:
1.30      mycroft   867:        return (0);
                    868: }
                    869:
                    870: void
1.247     thorpej   871: vflushbuf(struct vnode *vp, int sync)
1.30      mycroft   872: {
1.123     augustss  873:        struct buf *bp, *nbp;
1.166     chs       874:        int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
1.30      mycroft   875:        int s;
                    876:
1.166     chs       877:        simple_lock(&vp->v_interlock);
                    878:        (void) VOP_PUTPAGES(vp, 0, 0, flags);
1.142     chs       879:
1.30      mycroft   880: loop:
                    881:        s = splbio();
1.126     mycroft   882:        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                    883:                nbp = LIST_NEXT(bp, b_vnbufs);
1.187     pk        884:                simple_lock(&bp->b_interlock);
                    885:                if ((bp->b_flags & B_BUSY)) {
                    886:                        simple_unlock(&bp->b_interlock);
1.30      mycroft   887:                        continue;
1.187     pk        888:                }
1.30      mycroft   889:                if ((bp->b_flags & B_DELWRI) == 0)
1.125     chs       890:                        panic("vflushbuf: not dirty, bp %p", bp);
1.63      mycroft   891:                bp->b_flags |= B_BUSY | B_VFLUSH;
1.187     pk        892:                simple_unlock(&bp->b_interlock);
1.30      mycroft   893:                splx(s);
                    894:                /*
                    895:                 * Wait for I/O associated with indirect blocks to complete,
                    896:                 * since there is no way to quickly wait for them below.
                    897:                 */
                    898:                if (bp->b_vp == vp || sync == 0)
                    899:                        (void) bawrite(bp);
                    900:                else
                    901:                        (void) bwrite(bp);
                    902:                goto loop;
                    903:        }
                    904:        if (sync == 0) {
                    905:                splx(s);
                    906:                return;
                    907:        }
1.187     pk        908:        simple_lock(&global_v_numoutput_slock);
1.30      mycroft   909:        while (vp->v_numoutput) {
                    910:                vp->v_flag |= VBWAIT;
1.187     pk        911:                ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
                    912:                        &global_v_numoutput_slock);
1.30      mycroft   913:        }
1.187     pk        914:        simple_unlock(&global_v_numoutput_slock);
1.30      mycroft   915:        splx(s);
1.126     mycroft   916:        if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1.30      mycroft   917:                vprint("vflushbuf: dirty", vp);
                    918:                goto loop;
                    919:        }
1.29      cgd       920: }
                    921:
                    922: /*
                    923:  * Associate a buffer with a vnode.
                    924:  */
1.50      christos  925: void
1.247     thorpej   926: bgetvp(struct vnode *vp, struct buf *bp)
1.29      cgd       927: {
1.115     fvdl      928:        int s;
1.29      cgd       929:
                    930:        if (bp->b_vp)
1.125     chs       931:                panic("bgetvp: not free, bp %p", bp);
1.29      cgd       932:        VHOLD(vp);
1.115     fvdl      933:        s = splbio();
1.29      cgd       934:        bp->b_vp = vp;
                    935:        if (vp->v_type == VBLK || vp->v_type == VCHR)
                    936:                bp->b_dev = vp->v_rdev;
                    937:        else
                    938:                bp->b_dev = NODEV;
                    939:        /*
                    940:         * Insert onto list for new vnode.
                    941:         */
                    942:        bufinsvn(bp, &vp->v_cleanblkhd);
1.115     fvdl      943:        splx(s);
1.29      cgd       944: }
                    945:
                    946: /*
                    947:  * Disassociate a buffer from a vnode.
                    948:  */
1.50      christos  949: void
1.247     thorpej   950: brelvp(struct buf *bp)
1.29      cgd       951: {
                    952:        struct vnode *vp;
1.115     fvdl      953:        int s;
1.29      cgd       954:
1.125     chs       955:        if (bp->b_vp == NULL)
                    956:                panic("brelvp: vp NULL, bp %p", bp);
1.115     fvdl      957:
                    958:        s = splbio();
1.113     fvdl      959:        vp = bp->b_vp;
1.29      cgd       960:        /*
                    961:         * Delete from old vnode list, if on one.
                    962:         */
1.177     matt      963:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29      cgd       964:                bufremvn(bp);
1.142     chs       965:
1.158     chs       966:        if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
1.142     chs       967:            LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1.113     fvdl      968:                vp->v_flag &= ~VONWORKLST;
                    969:                LIST_REMOVE(vp, v_synclist);
                    970:        }
1.142     chs       971:
                    972:        bp->b_vp = NULL;
1.29      cgd       973:        HOLDRELE(vp);
1.115     fvdl      974:        splx(s);
1.29      cgd       975: }
                    976:
                    977: /*
                    978:  * Reassign a buffer from one vnode to another.
                    979:  * Used to assign file specific control information
                    980:  * (indirect blocks) to the vnode to which they belong.
1.115     fvdl      981:  *
                    982:  * This function must be called at splbio().
1.29      cgd       983:  */
1.50      christos  984: void
1.247     thorpej   985: reassignbuf(struct buf *bp, struct vnode *newvp)
1.29      cgd       986: {
1.113     fvdl      987:        struct buflists *listheadp;
1.246     christos  988:        int delayx;
1.29      cgd       989:
                    990:        /*
                    991:         * Delete from old vnode list, if on one.
                    992:         */
1.177     matt      993:        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1.29      cgd       994:                bufremvn(bp);
                    995:        /*
                    996:         * If dirty, put on list of dirty buffers;
                    997:         * otherwise insert onto list of clean buffers.
                    998:         */
1.113     fvdl      999:        if ((bp->b_flags & B_DELWRI) == 0) {
                   1000:                listheadp = &newvp->v_cleanblkhd;
1.158     chs      1001:                if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1.142     chs      1002:                    (newvp->v_flag & VONWORKLST) &&
1.113     fvdl     1003:                    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
                   1004:                        newvp->v_flag &= ~VONWORKLST;
                   1005:                        LIST_REMOVE(newvp, v_synclist);
                   1006:                }
                   1007:        } else {
1.29      cgd      1008:                listheadp = &newvp->v_dirtyblkhd;
1.113     fvdl     1009:                if ((newvp->v_flag & VONWORKLST) == 0) {
                   1010:                        switch (newvp->v_type) {
                   1011:                        case VDIR:
1.246     christos 1012:                                delayx = dirdelay;
1.113     fvdl     1013:                                break;
                   1014:                        case VBLK:
                   1015:                                if (newvp->v_specmountpoint != NULL) {
1.246     christos 1016:                                        delayx = metadelay;
1.113     fvdl     1017:                                        break;
                   1018:                                }
                   1019:                                /* fall through */
                   1020:                        default:
1.246     christos 1021:                                delayx = filedelay;
1.118     mycroft  1022:                                break;
1.113     fvdl     1023:                        }
1.118     mycroft  1024:                        if (!newvp->v_mount ||
                   1025:                            (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1.246     christos 1026:                                vn_syncer_add_to_worklist(newvp, delayx);
1.113     fvdl     1027:                }
                   1028:        }
1.29      cgd      1029:        bufinsvn(bp, listheadp);
                   1030: }
                   1031:
                   1032: /*
                   1033:  * Create a vnode for a block device.
1.59      thorpej  1034:  * Used for root filesystem and swap areas.
1.29      cgd      1035:  * Also used for memory file system special devices.
                   1036:  */
1.50      christos 1037: int
1.247     thorpej  1038: bdevvp(dev_t dev, struct vnode **vpp)
1.29      cgd      1039: {
1.30      mycroft  1040:
                   1041:        return (getdevvp(dev, vpp, VBLK));
1.29      cgd      1042: }
                   1043:
                   1044: /*
                   1045:  * Create a vnode for a character device.
                   1046:  * Used for kernfs and some console handling.
                   1047:  */
1.50      christos 1048: int
1.247     thorpej  1049: cdevvp(dev_t dev, struct vnode **vpp)
1.29      cgd      1050: {
1.30      mycroft  1051:
                   1052:        return (getdevvp(dev, vpp, VCHR));
1.29      cgd      1053: }
                   1054:
                   1055: /*
                   1056:  * Create a vnode for a device.
                   1057:  * Used by bdevvp (block device) for root file system etc.,
                   1058:  * and by cdevvp (character device) for console and kernfs.
                   1059:  */
1.50      christos 1060: int
1.247     thorpej  1061: getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
1.29      cgd      1062: {
1.123     augustss 1063:        struct vnode *vp;
1.29      cgd      1064:        struct vnode *nvp;
                   1065:        int error;
                   1066:
1.80      fvdl     1067:        if (dev == NODEV) {
                   1068:                *vpp = NULLVP;
1.29      cgd      1069:                return (0);
1.80      fvdl     1070:        }
1.50      christos 1071:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29      cgd      1072:        if (error) {
                   1073:                *vpp = NULLVP;
                   1074:                return (error);
                   1075:        }
                   1076:        vp = nvp;
                   1077:        vp->v_type = type;
1.50      christos 1078:        if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29      cgd      1079:                vput(vp);
                   1080:                vp = nvp;
                   1081:        }
                   1082:        *vpp = vp;
                   1083:        return (0);
                   1084: }
                   1085:
                   1086: /*
                   1087:  * Check to see if the new vnode represents a special device
                   1088:  * for which we already have a vnode (either because of
                   1089:  * bdevvp() or because of a different vnode representing
                   1090:  * the same block device). If such an alias exists, deallocate
                   1091:  * the existing contents and return the aliased vnode. The
                   1092:  * caller is responsible for filling it with its new contents.
                   1093:  */
                   1094: struct vnode *
1.247     thorpej  1095: checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
1.29      cgd      1096: {
1.201     fvdl     1097:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1098:        struct vnode *vp;
1.29      cgd      1099:        struct vnode **vpp;
                   1100:
                   1101:        if (nvp->v_type != VBLK && nvp->v_type != VCHR)
                   1102:                return (NULLVP);
                   1103:
                   1104:        vpp = &speclisth[SPECHASH(nvp_rdev)];
                   1105: loop:
1.80      fvdl     1106:        simple_lock(&spechash_slock);
1.29      cgd      1107:        for (vp = *vpp; vp; vp = vp->v_specnext) {
                   1108:                if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
                   1109:                        continue;
                   1110:                /*
                   1111:                 * Alias, but not in use, so flush it out.
                   1112:                 */
1.80      fvdl     1113:                simple_lock(&vp->v_interlock);
1.231     mycroft  1114:                simple_unlock(&spechash_slock);
1.29      cgd      1115:                if (vp->v_usecount == 0) {
1.201     fvdl     1116:                        vgonel(vp, p);
1.29      cgd      1117:                        goto loop;
                   1118:                }
1.231     mycroft  1119:                /*
                   1120:                 * What we're interested to know here is if someone else has
                   1121:                 * removed this vnode from the device hash list while we were
                   1122:                 * waiting.  This can only happen if vclean() did it, and
                   1123:                 * this requires the vnode to be locked.  Therefore, we use
                   1124:                 * LK_SLEEPFAIL and retry.
                   1125:                 */
                   1126:                if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL))
1.29      cgd      1127:                        goto loop;
1.231     mycroft  1128:                simple_lock(&spechash_slock);
1.29      cgd      1129:                break;
                   1130:        }
1.34      cgd      1131:        if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29      cgd      1132:                MALLOC(nvp->v_specinfo, struct specinfo *,
1.150     thorpej  1133:                        sizeof(struct specinfo), M_VNODE, M_NOWAIT);
                   1134:                /* XXX Erg. */
                   1135:                if (nvp->v_specinfo == NULL) {
                   1136:                        simple_unlock(&spechash_slock);
                   1137:                        uvm_wait("checkalias");
                   1138:                        goto loop;
                   1139:                }
                   1140:
1.29      cgd      1141:                nvp->v_rdev = nvp_rdev;
                   1142:                nvp->v_hashchain = vpp;
                   1143:                nvp->v_specnext = *vpp;
1.113     fvdl     1144:                nvp->v_specmountpoint = NULL;
1.80      fvdl     1145:                simple_unlock(&spechash_slock);
1.62      kleink   1146:                nvp->v_speclockf = NULL;
1.216     hannken  1147:                simple_lock_init(&nvp->v_spec_cow_slock);
                   1148:                SLIST_INIT(&nvp->v_spec_cow_head);
                   1149:                nvp->v_spec_cow_req = 0;
                   1150:                nvp->v_spec_cow_count = 0;
                   1151:
1.29      cgd      1152:                *vpp = nvp;
1.80      fvdl     1153:                if (vp != NULLVP) {
1.29      cgd      1154:                        nvp->v_flag |= VALIASED;
                   1155:                        vp->v_flag |= VALIASED;
                   1156:                        vput(vp);
                   1157:                }
                   1158:                return (NULLVP);
                   1159:        }
1.80      fvdl     1160:        simple_unlock(&spechash_slock);
                   1161:        VOP_UNLOCK(vp, 0);
                   1162:        simple_lock(&vp->v_interlock);
1.201     fvdl     1163:        vclean(vp, 0, p);
1.29      cgd      1164:        vp->v_op = nvp->v_op;
                   1165:        vp->v_tag = nvp->v_tag;
1.104     wrstuden 1166:        vp->v_vnlock = &vp->v_lock;
                   1167:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd      1168:        nvp->v_type = VNON;
                   1169:        insmntque(vp, mp);
                   1170:        return (vp);
                   1171: }
                   1172:
                   1173: /*
                   1174:  * Grab a particular vnode from the free list, increment its
1.83      fvdl     1175:  * reference count and lock it. If the vnode lock bit is set the
                   1176:  * vnode is being eliminated in vgone. In that case, we can not
                   1177:  * grab the vnode, so the process is awakened when the transition is
                   1178:  * completed, and an error returned to indicate that the vnode is no
                   1179:  * longer usable (possibly having been changed to a new file system type).
1.29      cgd      1180:  */
1.30      mycroft  1181: int
1.247     thorpej  1182: vget(struct vnode *vp, int flags)
1.29      cgd      1183: {
1.175     perseant 1184:        int error;
1.29      cgd      1185:
1.30      mycroft  1186:        /*
                   1187:         * If the vnode is in the process of being cleaned out for
                   1188:         * another use, we wait for the cleaning to finish and then
1.80      fvdl     1189:         * return failure. Cleaning is determined by checking that
                   1190:         * the VXLOCK flag is set.
                   1191:         */
1.142     chs      1192:
1.80      fvdl     1193:        if ((flags & LK_INTERLOCK) == 0)
                   1194:                simple_lock(&vp->v_interlock);
                   1195:        if (vp->v_flag & VXLOCK) {
1.142     chs      1196:                if (flags & LK_NOWAIT) {
1.143     sommerfe 1197:                        simple_unlock(&vp->v_interlock);
1.142     chs      1198:                        return EBUSY;
                   1199:                }
1.29      cgd      1200:                vp->v_flag |= VXWANT;
1.158     chs      1201:                ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1.80      fvdl     1202:                return (ENOENT);
1.29      cgd      1203:        }
1.80      fvdl     1204:        if (vp->v_usecount == 0) {
                   1205:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1206:                if (vp->v_holdcnt > 0)
                   1207:                        TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1208:                else
                   1209:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1210:                simple_unlock(&vnode_free_list_slock);
                   1211:        }
1.29      cgd      1212:        vp->v_usecount++;
1.112     mycroft  1213: #ifdef DIAGNOSTIC
                   1214:        if (vp->v_usecount == 0) {
                   1215:                vprint("vget", vp);
1.125     chs      1216:                panic("vget: usecount overflow, vp %p", vp);
1.112     mycroft  1217:        }
                   1218: #endif
1.80      fvdl     1219:        if (flags & LK_TYPE_MASK) {
1.113     fvdl     1220:                if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
                   1221:                        /*
                   1222:                         * must expand vrele here because we do not want
                   1223:                         * to call VOP_INACTIVE if the reference count
                   1224:                         * drops back to zero since it was never really
                   1225:                         * active. We must remove it from the free list
                   1226:                         * before sleeping so that multiple processes do
                   1227:                         * not try to recycle it.
                   1228:                         */
                   1229:                        simple_lock(&vp->v_interlock);
                   1230:                        vp->v_usecount--;
                   1231:                        if (vp->v_usecount > 0) {
                   1232:                                simple_unlock(&vp->v_interlock);
                   1233:                                return (error);
                   1234:                        }
                   1235:                        /*
                   1236:                         * insert at tail of LRU list
                   1237:                         */
                   1238:                        simple_lock(&vnode_free_list_slock);
                   1239:                        if (vp->v_holdcnt > 0)
                   1240:                                TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
                   1241:                                    v_freelist);
                   1242:                        else
                   1243:                                TAILQ_INSERT_TAIL(&vnode_free_list, vp,
                   1244:                                    v_freelist);
                   1245:                        simple_unlock(&vnode_free_list_slock);
                   1246:                        simple_unlock(&vp->v_interlock);
                   1247:                }
1.80      fvdl     1248:                return (error);
                   1249:        }
                   1250:        simple_unlock(&vp->v_interlock);
1.29      cgd      1251:        return (0);
                   1252: }
                   1253:
                   1254: /*
                   1255:  * vput(), just unlock and vrele()
                   1256:  */
                   1257: void
1.247     thorpej  1258: vput(struct vnode *vp)
1.29      cgd      1259: {
1.201     fvdl     1260:        struct proc *p = curproc;       /* XXX */
1.30      mycroft  1261:
1.111     mycroft  1262: #ifdef DIAGNOSTIC
1.80      fvdl     1263:        if (vp == NULL)
                   1264:                panic("vput: null vp");
                   1265: #endif
                   1266:        simple_lock(&vp->v_interlock);
                   1267:        vp->v_usecount--;
                   1268:        if (vp->v_usecount > 0) {
                   1269:                simple_unlock(&vp->v_interlock);
                   1270:                VOP_UNLOCK(vp, 0);
                   1271:                return;
                   1272:        }
                   1273: #ifdef DIAGNOSTIC
                   1274:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1275:                vprint("vput: bad ref count", vp);
                   1276:                panic("vput: ref cnt");
                   1277:        }
                   1278: #endif
                   1279:        /*
1.87      pk       1280:         * Insert at tail of LRU list.
1.80      fvdl     1281:         */
                   1282:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1283:        if (vp->v_holdcnt > 0)
                   1284:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1285:        else
                   1286:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1287:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej  1288:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1289:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1290:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1291:        }
1.161     thorpej  1292:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80      fvdl     1293:        simple_unlock(&vp->v_interlock);
1.201     fvdl     1294:        VOP_INACTIVE(vp, p);
1.29      cgd      1295: }
                   1296:
                   1297: /*
                   1298:  * Vnode release.
                   1299:  * If count drops to zero, call inactive routine and return to freelist.
                   1300:  */
                   1301: void
1.247     thorpej  1302: vrele(struct vnode *vp)
1.29      cgd      1303: {
1.201     fvdl     1304:        struct proc *p = curproc;       /* XXX */
1.29      cgd      1305:
                   1306: #ifdef DIAGNOSTIC
                   1307:        if (vp == NULL)
                   1308:                panic("vrele: null vp");
                   1309: #endif
1.80      fvdl     1310:        simple_lock(&vp->v_interlock);
1.29      cgd      1311:        vp->v_usecount--;
1.80      fvdl     1312:        if (vp->v_usecount > 0) {
                   1313:                simple_unlock(&vp->v_interlock);
1.29      cgd      1314:                return;
1.80      fvdl     1315:        }
1.29      cgd      1316: #ifdef DIAGNOSTIC
1.80      fvdl     1317:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29      cgd      1318:                vprint("vrele: bad ref count", vp);
1.142     chs      1319:                panic("vrele: ref cnt vp %p", vp);
1.29      cgd      1320:        }
                   1321: #endif
1.30      mycroft  1322:        /*
1.87      pk       1323:         * Insert at tail of LRU list.
1.30      mycroft  1324:         */
1.80      fvdl     1325:        simple_lock(&vnode_free_list_slock);
1.113     fvdl     1326:        if (vp->v_holdcnt > 0)
                   1327:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1328:        else
                   1329:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl     1330:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej  1331:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1332:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1333:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1334:        }
1.161     thorpej  1335:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.80      fvdl     1336:        if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1.201     fvdl     1337:                VOP_INACTIVE(vp, p);
1.29      cgd      1338: }
                   1339:
1.80      fvdl     1340: #ifdef DIAGNOSTIC
1.29      cgd      1341: /*
                   1342:  * Page or buffer structure gets a reference.
                   1343:  */
1.30      mycroft  1344: void
1.247     thorpej  1345: vholdl(struct vnode *vp)
1.29      cgd      1346: {
                   1347:
1.113     fvdl     1348:        /*
                   1349:         * If it is on the freelist and the hold count is currently
                   1350:         * zero, move it to the hold list. The test of the back
                   1351:         * pointer and the use reference count of zero is because
                   1352:         * it will be removed from a free list by getnewvnode,
                   1353:         * but will not have its reference count incremented until
                   1354:         * after calling vgone. If the reference count were
                   1355:         * incremented first, vgone would (incorrectly) try to
                   1356:         * close the previous instance of the underlying object.
                   1357:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1358:         * getnewvnode after removing it from a freelist to ensure
                   1359:         * that we do not try to move it here.
                   1360:         */
                   1361:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1362:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1363:                simple_lock(&vnode_free_list_slock);
                   1364:                TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                   1365:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                   1366:                simple_unlock(&vnode_free_list_slock);
                   1367:        }
1.29      cgd      1368:        vp->v_holdcnt++;
                   1369: }
                   1370:
                   1371: /*
                   1372:  * Page or buffer structure frees a reference.
                   1373:  */
1.30      mycroft  1374: void
1.247     thorpej  1375: holdrelel(struct vnode *vp)
1.29      cgd      1376: {
                   1377:
                   1378:        if (vp->v_holdcnt <= 0)
1.215     yamt     1379:                panic("holdrelel: holdcnt vp %p", vp);
1.29      cgd      1380:        vp->v_holdcnt--;
1.142     chs      1381:
1.113     fvdl     1382:        /*
                   1383:         * If it is on the holdlist and the hold count drops to
                   1384:         * zero, move it to the free list. The test of the back
                   1385:         * pointer and the use reference count of zero is because
                   1386:         * it will be removed from a free list by getnewvnode,
                   1387:         * but will not have its reference count incremented until
                   1388:         * after calling vgone. If the reference count were
                   1389:         * incremented first, vgone would (incorrectly) try to
                   1390:         * close the previous instance of the underlying object.
                   1391:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1392:         * getnewvnode after removing it from a freelist to ensure
                   1393:         * that we do not try to move it here.
                   1394:         */
1.142     chs      1395:
1.113     fvdl     1396:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                   1397:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                   1398:                simple_lock(&vnode_free_list_slock);
                   1399:                TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                   1400:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1401:                simple_unlock(&vnode_free_list_slock);
                   1402:        }
1.81      ross     1403: }
                   1404:
                   1405: /*
                   1406:  * Vnode reference.
                   1407:  */
                   1408: void
1.247     thorpej  1409: vref(struct vnode *vp)
1.81      ross     1410: {
                   1411:
                   1412:        simple_lock(&vp->v_interlock);
                   1413:        if (vp->v_usecount <= 0)
1.125     chs      1414:                panic("vref used where vget required, vp %p", vp);
1.81      ross     1415:        vp->v_usecount++;
1.112     mycroft  1416: #ifdef DIAGNOSTIC
                   1417:        if (vp->v_usecount == 0) {
                   1418:                vprint("vref", vp);
1.125     chs      1419:                panic("vref: usecount overflow, vp %p", vp);
1.112     mycroft  1420:        }
                   1421: #endif
1.80      fvdl     1422:        simple_unlock(&vp->v_interlock);
1.29      cgd      1423: }
1.80      fvdl     1424: #endif /* DIAGNOSTIC */
1.29      cgd      1425:
                   1426: /*
                   1427:  * Remove any vnodes in the vnode table belonging to mount point mp.
                   1428:  *
1.183     yamt     1429:  * If FORCECLOSE is not specified, there should not be any active ones,
1.29      cgd      1430:  * return error if any are found (nb: this is a user error, not a
1.183     yamt     1431:  * system error). If FORCECLOSE is specified, detach any active vnodes
1.29      cgd      1432:  * that are found.
1.183     yamt     1433:  *
                   1434:  * If WRITECLOSE is set, only flush out regular file vnodes open for
                   1435:  * writing.
                   1436:  *
                   1437:  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29      cgd      1438:  */
1.30      mycroft  1439: #ifdef DEBUG
                   1440: int busyprt = 0;       /* print out busy vnodes */
                   1441: struct ctldebug debug1 = { "busyprt", &busyprt };
                   1442: #endif
1.29      cgd      1443:
1.50      christos 1444: int
1.247     thorpej  1445: vflush(struct mount *mp, struct vnode *skipvp, int flags)
1.29      cgd      1446: {
1.201     fvdl     1447:        struct proc *p = curproc;       /* XXX */
1.123     augustss 1448:        struct vnode *vp, *nvp;
1.29      cgd      1449:        int busy = 0;
                   1450:
1.80      fvdl     1451:        simple_lock(&mntvnode_slock);
1.29      cgd      1452: loop:
1.177     matt     1453:        for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1.29      cgd      1454:                if (vp->v_mount != mp)
                   1455:                        goto loop;
1.177     matt     1456:                nvp = LIST_NEXT(vp, v_mntvnodes);
1.29      cgd      1457:                /*
                   1458:                 * Skip over a selected vnode.
                   1459:                 */
                   1460:                if (vp == skipvp)
                   1461:                        continue;
1.80      fvdl     1462:                simple_lock(&vp->v_interlock);
1.29      cgd      1463:                /*
                   1464:                 * Skip over a vnodes marked VSYSTEM.
                   1465:                 */
1.80      fvdl     1466:                if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
                   1467:                        simple_unlock(&vp->v_interlock);
1.29      cgd      1468:                        continue;
1.80      fvdl     1469:                }
1.29      cgd      1470:                /*
1.30      mycroft  1471:                 * If WRITECLOSE is set, only flush out regular file
                   1472:                 * vnodes open for writing.
                   1473:                 */
                   1474:                if ((flags & WRITECLOSE) &&
1.92      thorpej  1475:                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
                   1476:                        simple_unlock(&vp->v_interlock);
1.30      mycroft  1477:                        continue;
1.92      thorpej  1478:                }
1.30      mycroft  1479:                /*
1.29      cgd      1480:                 * With v_usecount == 0, all we need to do is clear
                   1481:                 * out the vnode data structures and we are done.
                   1482:                 */
                   1483:                if (vp->v_usecount == 0) {
1.80      fvdl     1484:                        simple_unlock(&mntvnode_slock);
1.201     fvdl     1485:                        vgonel(vp, p);
1.80      fvdl     1486:                        simple_lock(&mntvnode_slock);
1.29      cgd      1487:                        continue;
                   1488:                }
                   1489:                /*
1.30      mycroft  1490:                 * If FORCECLOSE is set, forcibly close the vnode.
1.29      cgd      1491:                 * For block or character devices, revert to an
                   1492:                 * anonymous device. For all other files, just kill them.
                   1493:                 */
                   1494:                if (flags & FORCECLOSE) {
1.80      fvdl     1495:                        simple_unlock(&mntvnode_slock);
1.29      cgd      1496:                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.201     fvdl     1497:                                vgonel(vp, p);
1.29      cgd      1498:                        } else {
1.201     fvdl     1499:                                vclean(vp, 0, p);
1.30      mycroft  1500:                                vp->v_op = spec_vnodeop_p;
1.29      cgd      1501:                                insmntque(vp, (struct mount *)0);
                   1502:                        }
1.80      fvdl     1503:                        simple_lock(&mntvnode_slock);
1.29      cgd      1504:                        continue;
                   1505:                }
1.30      mycroft  1506: #ifdef DEBUG
1.29      cgd      1507:                if (busyprt)
                   1508:                        vprint("vflush: busy vnode", vp);
1.30      mycroft  1509: #endif
1.80      fvdl     1510:                simple_unlock(&vp->v_interlock);
1.29      cgd      1511:                busy++;
                   1512:        }
1.80      fvdl     1513:        simple_unlock(&mntvnode_slock);
1.29      cgd      1514:        if (busy)
                   1515:                return (EBUSY);
                   1516:        return (0);
                   1517: }
                   1518:
                   1519: /*
                   1520:  * Disassociate the underlying file system from a vnode.
                   1521:  */
                   1522: void
1.247     thorpej  1523: vclean(struct vnode *vp, int flags, struct proc *p)
1.29      cgd      1524: {
1.208     hannken  1525:        struct mount *mp;
1.175     perseant 1526:        int active;
1.29      cgd      1527:
1.166     chs      1528:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1529:
1.29      cgd      1530:        /*
                   1531:         * Check to see if the vnode is in use.
                   1532:         * If so we have to reference it before we clean it out
                   1533:         * so that its count cannot fall to zero and generate a
                   1534:         * race against ourselves to recycle it.
                   1535:         */
1.166     chs      1536:
1.112     mycroft  1537:        if ((active = vp->v_usecount) != 0) {
1.87      pk       1538:                vp->v_usecount++;
1.112     mycroft  1539: #ifdef DIAGNOSTIC
                   1540:                if (vp->v_usecount == 0) {
                   1541:                        vprint("vclean", vp);
                   1542:                        panic("vclean: usecount overflow");
                   1543:                }
                   1544: #endif
                   1545:        }
1.87      pk       1546:
1.29      cgd      1547:        /*
                   1548:         * Prevent the vnode from being recycled or
                   1549:         * brought into use while we clean it out.
                   1550:         */
                   1551:        if (vp->v_flag & VXLOCK)
1.125     chs      1552:                panic("vclean: deadlock, vp %p", vp);
1.29      cgd      1553:        vp->v_flag |= VXLOCK;
1.161     thorpej  1554:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1555:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1556:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1557:        }
1.161     thorpej  1558:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.142     chs      1559:
1.29      cgd      1560:        /*
1.80      fvdl     1561:         * Even if the count is zero, the VOP_INACTIVE routine may still
                   1562:         * have the object locked while it cleans it out. The VOP_LOCK
                   1563:         * ensures that the VOP_INACTIVE routine is done with its work.
                   1564:         * For active vnodes, it ensures that no other activity can
                   1565:         * occur while the underlying object is being cleaned out.
                   1566:         */
                   1567:        VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
                   1568:
1.98      wrstuden 1569:        /*
1.142     chs      1570:         * Clean out any cached data associated with the vnode.
1.231     mycroft  1571:         * If special device, remove it from special device alias list.
                   1572:         * if it is on one.
1.29      cgd      1573:         */
1.166     chs      1574:        if (flags & DOCLOSE) {
1.211     dbj      1575:                int error;
1.231     mycroft  1576:                struct vnode *vq, *vx;
                   1577:
1.208     hannken  1578:                vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1.211     dbj      1579:                error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1.208     hannken  1580:                vn_finished_write(mp, V_LOWER);
1.211     dbj      1581:                if (error)
                   1582:                        error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
                   1583:                KASSERT(error == 0);
1.166     chs      1584:                KASSERT((vp->v_flag & VONWORKLST) == 0);
1.231     mycroft  1585:
                   1586:                if (active)
                   1587:                        VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
                   1588:
                   1589:                if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
                   1590:                    vp->v_specinfo != 0) {
                   1591:                        simple_lock(&spechash_slock);
                   1592:                        if (vp->v_hashchain != NULL) {
                   1593:                                if (*vp->v_hashchain == vp) {
                   1594:                                        *vp->v_hashchain = vp->v_specnext;
                   1595:                                } else {
                   1596:                                        for (vq = *vp->v_hashchain; vq;
                   1597:                                             vq = vq->v_specnext) {
                   1598:                                                if (vq->v_specnext != vp)
                   1599:                                                        continue;
                   1600:                                                vq->v_specnext = vp->v_specnext;
                   1601:                                                break;
                   1602:                                        }
                   1603:                                        if (vq == NULL)
                   1604:                                                panic("missing bdev");
                   1605:                                }
                   1606:                                if (vp->v_flag & VALIASED) {
                   1607:                                        vx = NULL;
                   1608:                                                for (vq = *vp->v_hashchain; vq;
                   1609:                                                     vq = vq->v_specnext) {
                   1610:                                                if (vq->v_rdev != vp->v_rdev ||
                   1611:                                                    vq->v_type != vp->v_type)
                   1612:                                                        continue;
                   1613:                                                if (vx)
                   1614:                                                        break;
                   1615:                                                vx = vq;
                   1616:                                        }
                   1617:                                        if (vx == NULL)
                   1618:                                                panic("missing alias");
                   1619:                                        if (vq == NULL)
                   1620:                                                vx->v_flag &= ~VALIASED;
                   1621:                                        vp->v_flag &= ~VALIASED;
                   1622:                                }
                   1623:                        }
                   1624:                        simple_unlock(&spechash_slock);
                   1625:                        FREE(vp->v_specinfo, M_VNODE);
                   1626:                        vp->v_specinfo = NULL;
                   1627:                }
1.166     chs      1628:        }
                   1629:        LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1.80      fvdl     1630:
1.29      cgd      1631:        /*
1.30      mycroft  1632:         * If purging an active vnode, it must be closed and
1.80      fvdl     1633:         * deactivated before being reclaimed. Note that the
                   1634:         * VOP_INACTIVE will unlock the vnode.
1.29      cgd      1635:         */
                   1636:        if (active) {
1.201     fvdl     1637:                VOP_INACTIVE(vp, p);
1.80      fvdl     1638:        } else {
                   1639:                /*
                   1640:                 * Any other processes trying to obtain this lock must first
                   1641:                 * wait for VXLOCK to clear, then call the new lock operation.
                   1642:                 */
                   1643:                VOP_UNLOCK(vp, 0);
1.29      cgd      1644:        }
                   1645:        /*
                   1646:         * Reclaim the vnode.
                   1647:         */
1.201     fvdl     1648:        if (VOP_RECLAIM(vp, p))
1.125     chs      1649:                panic("vclean: cannot reclaim, vp %p", vp);
1.87      pk       1650:        if (active) {
                   1651:                /*
                   1652:                 * Inline copy of vrele() since VOP_INACTIVE
                   1653:                 * has already been called.
                   1654:                 */
                   1655:                simple_lock(&vp->v_interlock);
                   1656:                if (--vp->v_usecount <= 0) {
                   1657: #ifdef DIAGNOSTIC
                   1658:                        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1659:                                vprint("vclean: bad ref count", vp);
                   1660:                                panic("vclean: ref cnt");
                   1661:                        }
                   1662: #endif
                   1663:                        /*
                   1664:                         * Insert at tail of LRU list.
                   1665:                         */
1.142     chs      1666:
1.113     fvdl     1667:                        simple_unlock(&vp->v_interlock);
1.87      pk       1668:                        simple_lock(&vnode_free_list_slock);
1.104     wrstuden 1669: #ifdef DIAGNOSTIC
1.113     fvdl     1670:                        if (vp->v_holdcnt > 0)
1.125     chs      1671:                                panic("vclean: not clean, vp %p", vp);
1.104     wrstuden 1672: #endif
1.87      pk       1673:                        TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1674:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl     1675:                } else
                   1676:                        simple_unlock(&vp->v_interlock);
1.87      pk       1677:        }
1.30      mycroft  1678:
1.169     chs      1679:        KASSERT(vp->v_uobj.uo_npages == 0);
1.80      fvdl     1680:        cache_purge(vp);
                   1681:
1.29      cgd      1682:        /*
1.30      mycroft  1683:         * Done with purge, notify sleepers of the grim news.
1.29      cgd      1684:         */
1.30      mycroft  1685:        vp->v_op = dead_vnodeop_p;
                   1686:        vp->v_tag = VT_NON;
1.139     enami    1687:        simple_lock(&vp->v_interlock);
1.181     jdolecek 1688:        VN_KNOTE(vp, NOTE_REVOKE);      /* FreeBSD has this in vn_pollgone() */
1.234     thorpej  1689:        vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1.29      cgd      1690:        if (vp->v_flag & VXWANT) {
                   1691:                vp->v_flag &= ~VXWANT;
1.139     enami    1692:                simple_unlock(&vp->v_interlock);
1.29      cgd      1693:                wakeup((caddr_t)vp);
1.139     enami    1694:        } else
                   1695:                simple_unlock(&vp->v_interlock);
1.29      cgd      1696: }
                   1697:
                   1698: /*
1.80      fvdl     1699:  * Recycle an unused vnode to the front of the free list.
                   1700:  * Release the passed interlock if the vnode will be recycled.
1.29      cgd      1701:  */
1.80      fvdl     1702: int
1.247     thorpej  1703: vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct proc *p)
1.217     junyoung 1704: {
                   1705:
1.80      fvdl     1706:        simple_lock(&vp->v_interlock);
                   1707:        if (vp->v_usecount == 0) {
                   1708:                if (inter_lkp)
                   1709:                        simple_unlock(inter_lkp);
1.201     fvdl     1710:                vgonel(vp, p);
1.80      fvdl     1711:                return (1);
1.29      cgd      1712:        }
1.80      fvdl     1713:        simple_unlock(&vp->v_interlock);
                   1714:        return (0);
1.29      cgd      1715: }
                   1716:
                   1717: /*
                   1718:  * Eliminate all activity associated with a vnode
                   1719:  * in preparation for reuse.
                   1720:  */
                   1721: void
1.247     thorpej  1722: vgone(struct vnode *vp)
1.80      fvdl     1723: {
1.201     fvdl     1724:        struct proc *p = curproc;       /* XXX */
1.80      fvdl     1725:
                   1726:        simple_lock(&vp->v_interlock);
1.201     fvdl     1727:        vgonel(vp, p);
1.80      fvdl     1728: }
                   1729:
                   1730: /*
                   1731:  * vgone, with the vp interlock held.
                   1732:  */
                   1733: void
1.247     thorpej  1734: vgonel(struct vnode *vp, struct proc *p)
1.29      cgd      1735: {
                   1736:
1.166     chs      1737:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1738:
1.29      cgd      1739:        /*
                   1740:         * If a vgone (or vclean) is already in progress,
                   1741:         * wait until it is done and return.
                   1742:         */
1.166     chs      1743:
1.29      cgd      1744:        if (vp->v_flag & VXLOCK) {
                   1745:                vp->v_flag |= VXWANT;
1.166     chs      1746:                ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1.29      cgd      1747:                return;
                   1748:        }
1.166     chs      1749:
1.29      cgd      1750:        /*
                   1751:         * Clean out the filesystem specific data.
                   1752:         */
1.166     chs      1753:
1.201     fvdl     1754:        vclean(vp, DOCLOSE, p);
1.166     chs      1755:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                   1756:
1.29      cgd      1757:        /*
                   1758:         * Delete from old mount point vnode list, if on one.
                   1759:         */
1.166     chs      1760:
1.80      fvdl     1761:        if (vp->v_mount != NULL)
                   1762:                insmntque(vp, (struct mount *)0);
1.166     chs      1763:
1.29      cgd      1764:        /*
1.202     yamt     1765:         * The test of the back pointer and the reference count of
                   1766:         * zero is because it will be removed from the free list by
                   1767:         * getcleanvnode, but will not have its reference count
                   1768:         * incremented until after calling vgone. If the reference
                   1769:         * count were incremented first, vgone would (incorrectly)
                   1770:         * try to close the previous instance of the underlying object.
1.30      mycroft  1771:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1772:         * getnewvnode after removing it from the freelist to ensure
                   1773:         * that we do not try to move it here.
1.29      cgd      1774:         */
1.166     chs      1775:
1.202     yamt     1776:        vp->v_type = VBAD;
1.80      fvdl     1777:        if (vp->v_usecount == 0) {
1.202     yamt     1778:                boolean_t dofree;
                   1779:
1.80      fvdl     1780:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1781:                if (vp->v_holdcnt > 0)
1.125     chs      1782:                        panic("vgonel: not clean, vp %p", vp);
1.202     yamt     1783:                /*
                   1784:                 * if it isn't on the freelist, we're called by getcleanvnode
                   1785:                 * and vnode is being re-used.  otherwise, we'll free it.
                   1786:                 */
                   1787:                dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
                   1788:                if (dofree) {
1.80      fvdl     1789:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.202     yamt     1790:                        numvnodes--;
1.80      fvdl     1791:                }
                   1792:                simple_unlock(&vnode_free_list_slock);
1.202     yamt     1793:                if (dofree)
                   1794:                        pool_put(&vnode_pool, vp);
1.29      cgd      1795:        }
                   1796: }
                   1797:
                   1798: /*
                   1799:  * Lookup a vnode by device number.
                   1800:  */
1.50      christos 1801: int
1.247     thorpej  1802: vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1.29      cgd      1803: {
1.80      fvdl     1804:        struct vnode *vp;
                   1805:        int rc = 0;
1.29      cgd      1806:
1.80      fvdl     1807:        simple_lock(&spechash_slock);
1.29      cgd      1808:        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                   1809:                if (dev != vp->v_rdev || type != vp->v_type)
                   1810:                        continue;
                   1811:                *vpp = vp;
1.80      fvdl     1812:                rc = 1;
                   1813:                break;
1.29      cgd      1814:        }
1.80      fvdl     1815:        simple_unlock(&spechash_slock);
                   1816:        return (rc);
1.96      thorpej  1817: }
                   1818:
                   1819: /*
                   1820:  * Revoke all the vnodes corresponding to the specified minor number
                   1821:  * range (endpoints inclusive) of the specified major.
                   1822:  */
                   1823: void
1.247     thorpej  1824: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96      thorpej  1825: {
                   1826:        struct vnode *vp;
                   1827:        int mn;
                   1828:
                   1829:        for (mn = minl; mn <= minh; mn++)
                   1830:                if (vfinddev(makedev(maj, mn), type, &vp))
                   1831:                        VOP_REVOKE(vp, REVOKEALL);
1.29      cgd      1832: }
                   1833:
                   1834: /*
                   1835:  * Calculate the total number of references to a special device.
                   1836:  */
1.30      mycroft  1837: int
1.247     thorpej  1838: vcount(struct vnode *vp)
1.29      cgd      1839: {
1.123     augustss 1840:        struct vnode *vq, *vnext;
1.29      cgd      1841:        int count;
                   1842:
                   1843: loop:
                   1844:        if ((vp->v_flag & VALIASED) == 0)
                   1845:                return (vp->v_usecount);
1.80      fvdl     1846:        simple_lock(&spechash_slock);
1.30      mycroft  1847:        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
                   1848:                vnext = vq->v_specnext;
1.29      cgd      1849:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   1850:                        continue;
                   1851:                /*
                   1852:                 * Alias, but not in use, so flush it out.
                   1853:                 */
1.151     wrstuden 1854:                if (vq->v_usecount == 0 && vq != vp &&
                   1855:                    (vq->v_flag & VXLOCK) == 0) {
1.80      fvdl     1856:                        simple_unlock(&spechash_slock);
1.29      cgd      1857:                        vgone(vq);
                   1858:                        goto loop;
                   1859:                }
                   1860:                count += vq->v_usecount;
                   1861:        }
1.80      fvdl     1862:        simple_unlock(&spechash_slock);
1.29      cgd      1863:        return (count);
                   1864: }
                   1865:
1.237     christos 1866: #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
                   1867: #define ARRAY_PRINT(idx, arr) \
                   1868:     ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
                   1869:
                   1870: const char * const vnode_tags[] = { VNODE_TAGS };
                   1871: const char * const vnode_types[] = { VNODE_TYPES };
                   1872: const char vnode_flagbits[] = VNODE_FLAGBITS;
                   1873:
1.29      cgd      1874: /*
                   1875:  * Print out a description of a vnode.
                   1876:  */
                   1877: void
1.247     thorpej  1878: vprint(const char *label, struct vnode *vp)
1.29      cgd      1879: {
1.245     christos 1880:        char bf[96];
1.29      cgd      1881:
                   1882:        if (label != NULL)
1.57      christos 1883:                printf("%s: ", label);
1.237     christos 1884:        printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, "
                   1885:            "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
                   1886:            ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1.190     jdolecek 1887:            vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1.245     christos 1888:        bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
                   1889:        if (bf[0] != '\0')
                   1890:                printf(" flags (%s)", &bf[1]);
1.30      mycroft  1891:        if (vp->v_data == NULL) {
1.57      christos 1892:                printf("\n");
1.30      mycroft  1893:        } else {
1.57      christos 1894:                printf("\n\t");
1.30      mycroft  1895:                VOP_PRINT(vp);
                   1896:        }
1.29      cgd      1897: }
                   1898:
                   1899: #ifdef DEBUG
                   1900: /*
                   1901:  * List all of the locked vnodes in the system.
                   1902:  * Called when debugging the kernel.
                   1903:  */
1.51      christos 1904: void
1.247     thorpej  1905: printlockedvnodes(void)
1.29      cgd      1906: {
1.80      fvdl     1907:        struct mount *mp, *nmp;
                   1908:        struct vnode *vp;
1.29      cgd      1909:
1.57      christos 1910:        printf("Locked vnodes\n");
1.80      fvdl     1911:        simple_lock(&mountlist_slock);
1.177     matt     1912:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
                   1913:             mp = nmp) {
1.80      fvdl     1914:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177     matt     1915:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80      fvdl     1916:                        continue;
                   1917:                }
1.158     chs      1918:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1.29      cgd      1919:                        if (VOP_ISLOCKED(vp))
1.158     chs      1920:                                vprint(NULL, vp);
1.80      fvdl     1921:                }
                   1922:                simple_lock(&mountlist_slock);
1.177     matt     1923:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.80      fvdl     1924:                vfs_unbusy(mp);
1.29      cgd      1925:        }
1.80      fvdl     1926:        simple_unlock(&mountlist_slock);
1.29      cgd      1927: }
                   1928: #endif
                   1929:
1.101     mrg      1930: /*
1.212     atatat   1931:  * sysctl helper routine for vfs.generic.conf lookups.
                   1932:  */
                   1933: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
                   1934: static int
                   1935: sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
                   1936: {
                   1937:         struct vfsconf vfc;
                   1938:         extern const char * const mountcompatnames[];
                   1939:         extern int nmountcompatnames;
                   1940:        struct sysctlnode node;
                   1941:        struct vfsops *vfsp;
                   1942:        u_int vfsnum;
                   1943:
                   1944:        if (namelen != 1)
                   1945:                return (ENOTDIR);
                   1946:        vfsnum = name[0];
                   1947:        if (vfsnum >= nmountcompatnames ||
                   1948:            mountcompatnames[vfsnum] == NULL)
                   1949:                return (EOPNOTSUPP);
                   1950:        vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
                   1951:        if (vfsp == NULL)
                   1952:                return (EOPNOTSUPP);
                   1953:
                   1954:        vfc.vfc_vfsops = vfsp;
                   1955:        strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
                   1956:        vfc.vfc_typenum = vfsnum;
                   1957:        vfc.vfc_refcount = vfsp->vfs_refcount;
                   1958:        vfc.vfc_flags = 0;
                   1959:        vfc.vfc_mountroot = vfsp->vfs_mountroot;
                   1960:        vfc.vfc_next = NULL;
                   1961:
                   1962:        node = *rnode;
                   1963:        node.sysctl_data = &vfc;
                   1964:        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
                   1965: }
                   1966: #endif
                   1967:
                   1968: /*
1.220     lukem    1969:  * sysctl helper routine to return list of supported fstypes
                   1970:  */
                   1971: static int
                   1972: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
                   1973: {
1.245     christos 1974:        char bf[MFSNAMELEN];
1.220     lukem    1975:        char *where = oldp;
                   1976:        struct vfsops *v;
                   1977:        size_t needed, left, slen;
                   1978:        int error, first;
                   1979:
                   1980:        if (newp != NULL)
                   1981:                return (EPERM);
                   1982:        if (namelen != 0)
                   1983:                return (EINVAL);
                   1984:
                   1985:        first = 1;
                   1986:        error = 0;
                   1987:        needed = 0;
                   1988:        left = *oldlenp;
                   1989:
                   1990:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   1991:                if (where == NULL)
                   1992:                        needed += strlen(v->vfs_name) + 1;
                   1993:                else {
1.245     christos 1994:                        memset(bf, 0, sizeof(bf));
1.220     lukem    1995:                        if (first) {
1.245     christos 1996:                                strncpy(bf, v->vfs_name, sizeof(bf));
1.220     lukem    1997:                                first = 0;
                   1998:                        } else {
1.245     christos 1999:                                bf[0] = ' ';
                   2000:                                strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220     lukem    2001:                        }
1.245     christos 2002:                        bf[sizeof(bf)-1] = '\0';
                   2003:                        slen = strlen(bf);
1.220     lukem    2004:                        if (left < slen + 1)
                   2005:                                break;
                   2006:                        /* +1 to copy out the trailing NUL byte */
1.245     christos 2007:                        error = copyout(bf, where, slen + 1);
1.220     lukem    2008:                        if (error)
                   2009:                                break;
                   2010:                        where += slen;
                   2011:                        needed += slen;
                   2012:                        left -= slen;
                   2013:                }
                   2014:        }
                   2015:        *oldlenp = needed;
                   2016:        return (error);
                   2017: }
                   2018:
                   2019: /*
1.80      fvdl     2020:  * Top level filesystem related information gathering.
                   2021:  */
1.212     atatat   2022: SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1.80      fvdl     2023: {
1.95      thorpej  2024: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.154     jdolecek 2025:        extern int nmountcompatnames;
1.95      thorpej  2026: #endif
1.80      fvdl     2027:
1.218     atatat   2028:        sysctl_createv(clog, 0, NULL, NULL,
                   2029:                       CTLFLAG_PERMANENT,
1.212     atatat   2030:                       CTLTYPE_NODE, "vfs", NULL,
                   2031:                       NULL, 0, NULL, 0,
                   2032:                       CTL_VFS, CTL_EOL);
1.218     atatat   2033:        sysctl_createv(clog, 0, NULL, NULL,
                   2034:                       CTLFLAG_PERMANENT,
1.226     atatat   2035:                       CTLTYPE_NODE, "generic",
                   2036:                       SYSCTL_DESCR("Non-specific vfs related information"),
1.212     atatat   2037:                       NULL, 0, NULL, 0,
                   2038:                       CTL_VFS, VFS_GENERIC, CTL_EOL);
                   2039:
                   2040: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.218     atatat   2041:        sysctl_createv(clog, 0, NULL, NULL,
                   2042:                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
1.226     atatat   2043:                       CTLTYPE_INT, "maxtypenum",
                   2044:                       SYSCTL_DESCR("Highest valid filesystem type number"),
1.212     atatat   2045:                       NULL, nmountcompatnames, NULL, 0,
                   2046:                       CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
                   2047: #endif
1.218     atatat   2048:        sysctl_createv(clog, 0, NULL, NULL,
                   2049:                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.226     atatat   2050:                       CTLTYPE_INT, "usermount",
                   2051:                       SYSCTL_DESCR("Whether unprivileged users may mount "
                   2052:                                    "filesystems"),
1.212     atatat   2053:                       NULL, 0, &dovfsusermount, 0,
                   2054:                       CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1.220     lukem    2055:        sysctl_createv(clog, 0, NULL, NULL,
                   2056:                       CTLFLAG_PERMANENT,
                   2057:                       CTLTYPE_STRING, "fstypes",
                   2058:                       SYSCTL_DESCR("List of file systems present"),
                   2059:                       sysctl_vfs_generic_fstypes, 0, NULL, 0,
                   2060:                       CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1.95      thorpej  2061: #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1.218     atatat   2062:        sysctl_createv(clog, 0, NULL, NULL,
                   2063:                       CTLFLAG_PERMANENT,
1.226     atatat   2064:                       CTLTYPE_STRUCT, "conf",
                   2065:                       SYSCTL_DESCR("Filesystem configuration information"),
1.212     atatat   2066:                       sysctl_vfs_generic_conf, 0, NULL,
                   2067:                       sizeof(struct vfsconf),
                   2068:                       CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
1.95      thorpej  2069: #endif
1.80      fvdl     2070: }
                   2071:
1.212     atatat   2072:
1.29      cgd      2073: int kinfo_vdebug = 1;
                   2074: int kinfo_vgetfailed;
                   2075: #define KINFO_VNODESLOP        10
                   2076: /*
                   2077:  * Dump vnode list (via sysctl).
                   2078:  * Copyout address of vnode followed by vnode.
                   2079:  */
                   2080: /* ARGSUSED */
1.50      christos 2081: int
1.212     atatat   2082: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29      cgd      2083: {
1.212     atatat   2084:        char *where = oldp;
                   2085:        size_t *sizep = oldlenp;
1.80      fvdl     2086:        struct mount *mp, *nmp;
                   2087:        struct vnode *nvp, *vp;
                   2088:        char *bp = where, *savebp;
1.29      cgd      2089:        char *ewhere;
                   2090:        int error;
1.212     atatat   2091:
                   2092:        if (namelen != 0)
                   2093:                return (EOPNOTSUPP);
                   2094:        if (newp != NULL)
                   2095:                return (EPERM);
1.29      cgd      2096:
1.90      perry    2097: #define VPTRSZ sizeof(struct vnode *)
                   2098: #define VNODESZ        sizeof(struct vnode)
1.29      cgd      2099:        if (where == NULL) {
                   2100:                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                   2101:                return (0);
                   2102:        }
                   2103:        ewhere = where + *sizep;
1.80      fvdl     2104:
                   2105:        simple_lock(&mountlist_slock);
1.177     matt     2106:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
                   2107:             mp = nmp) {
1.80      fvdl     2108:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177     matt     2109:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      2110:                        continue;
1.80      fvdl     2111:                }
1.29      cgd      2112:                savebp = bp;
                   2113: again:
1.80      fvdl     2114:                simple_lock(&mntvnode_slock);
1.177     matt     2115:                for (vp = LIST_FIRST(&mp->mnt_vnodelist);
1.29      cgd      2116:                     vp != NULL;
1.80      fvdl     2117:                     vp = nvp) {
1.29      cgd      2118:                        /*
                   2119:                         * Check that the vp is still associated with
                   2120:                         * this filesystem.  RACE: could have been
                   2121:                         * recycled onto the same filesystem.
                   2122:                         */
                   2123:                        if (vp->v_mount != mp) {
1.80      fvdl     2124:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2125:                                if (kinfo_vdebug)
1.57      christos 2126:                                        printf("kinfo: vp changed\n");
1.29      cgd      2127:                                bp = savebp;
                   2128:                                goto again;
                   2129:                        }
1.177     matt     2130:                        nvp = LIST_NEXT(vp, v_mntvnodes);
1.29      cgd      2131:                        if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80      fvdl     2132:                                simple_unlock(&mntvnode_slock);
1.29      cgd      2133:                                *sizep = bp - where;
                   2134:                                return (ENOMEM);
                   2135:                        }
1.80      fvdl     2136:                        simple_unlock(&mntvnode_slock);
1.29      cgd      2137:                        if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
                   2138:                           (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
                   2139:                                return (error);
                   2140:                        bp += VPTRSZ + VNODESZ;
1.80      fvdl     2141:                        simple_lock(&mntvnode_slock);
1.29      cgd      2142:                }
1.80      fvdl     2143:                simple_unlock(&mntvnode_slock);
                   2144:                simple_lock(&mountlist_slock);
1.177     matt     2145:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      2146:                vfs_unbusy(mp);
                   2147:        }
1.80      fvdl     2148:        simple_unlock(&mountlist_slock);
1.29      cgd      2149:
                   2150:        *sizep = bp - where;
                   2151:        return (0);
1.30      mycroft  2152: }
                   2153:
                   2154: /*
                   2155:  * Check to see if a filesystem is mounted on a block device.
                   2156:  */
                   2157: int
1.247     thorpej  2158: vfs_mountedon(struct vnode *vp)
1.30      mycroft  2159: {
1.80      fvdl     2160:        struct vnode *vq;
                   2161:        int error = 0;
1.30      mycroft  2162:
1.113     fvdl     2163:        if (vp->v_specmountpoint != NULL)
1.30      mycroft  2164:                return (EBUSY);
                   2165:        if (vp->v_flag & VALIASED) {
1.80      fvdl     2166:                simple_lock(&spechash_slock);
1.30      mycroft  2167:                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   2168:                        if (vq->v_rdev != vp->v_rdev ||
                   2169:                            vq->v_type != vp->v_type)
                   2170:                                continue;
1.113     fvdl     2171:                        if (vq->v_specmountpoint != NULL) {
1.80      fvdl     2172:                                error = EBUSY;
                   2173:                                break;
                   2174:                        }
1.30      mycroft  2175:                }
1.80      fvdl     2176:                simple_unlock(&spechash_slock);
1.30      mycroft  2177:        }
1.80      fvdl     2178:        return (error);
1.30      mycroft  2179: }
                   2180:
1.195     christos 2181: static int
                   2182: sacheck(struct sockaddr *sa)
                   2183: {
                   2184:        switch (sa->sa_family) {
                   2185: #ifdef INET
                   2186:        case AF_INET: {
                   2187:                struct sockaddr_in *sin = (struct sockaddr_in *)sa;
                   2188:                char *p = (char *)sin->sin_zero;
                   2189:                size_t i;
                   2190:
                   2191:                if (sin->sin_len != sizeof(*sin))
                   2192:                        return -1;
                   2193:                if (sin->sin_port != 0)
                   2194:                        return -1;
                   2195:                for (i = 0; i < sizeof(sin->sin_zero); i++)
                   2196:                        if (*p++ != '\0')
                   2197:                                return -1;
                   2198:                return 0;
                   2199:        }
                   2200: #endif
                   2201: #ifdef INET6
                   2202:        case AF_INET6: {
                   2203:                struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
                   2204:
                   2205:                if (sin6->sin6_len != sizeof(*sin6))
                   2206:                        return -1;
                   2207:                if (sin6->sin6_port != 0)
                   2208:                        return -1;
                   2209:                return 0;
                   2210:        }
                   2211: #endif
                   2212:        default:
                   2213:                return -1;
                   2214:        }
                   2215: }
                   2216:
1.30      mycroft  2217: /*
                   2218:  * Build hash lists of net addresses and hang them off the mount point.
                   2219:  * Called by ufs_mount() to set up the lists of export addresses.
                   2220:  */
                   2221: static int
1.247     thorpej  2222: vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
                   2223:     struct export_args *argp)
1.30      mycroft  2224: {
1.123     augustss 2225:        struct netcred *np, *enp;
                   2226:        struct radix_node_head *rnh;
                   2227:        int i;
1.30      mycroft  2228:        struct sockaddr *saddr, *smask = 0;
                   2229:        struct domain *dom;
                   2230:        int error;
                   2231:
                   2232:        if (argp->ex_addrlen == 0) {
                   2233:                if (mp->mnt_flag & MNT_DEFEXPORTED)
                   2234:                        return (EPERM);
                   2235:                np = &nep->ne_defexported;
                   2236:                np->netc_exflags = argp->ex_flags;
1.163     christos 2237:                crcvt(&np->netc_anon, &argp->ex_anon);
1.30      mycroft  2238:                np->netc_anon.cr_ref = 1;
                   2239:                mp->mnt_flag |= MNT_DEFEXPORTED;
                   2240:                return (0);
                   2241:        }
1.156     jdolecek 2242:
1.195     christos 2243:        if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
1.156     jdolecek 2244:                return (EINVAL);
                   2245:
1.30      mycroft  2246:        i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
                   2247:        np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1.91      perry    2248:        memset((caddr_t)np, 0, i);
1.30      mycroft  2249:        saddr = (struct sockaddr *)(np + 1);
1.50      christos 2250:        error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
                   2251:        if (error)
1.30      mycroft  2252:                goto out;
                   2253:        if (saddr->sa_len > argp->ex_addrlen)
                   2254:                saddr->sa_len = argp->ex_addrlen;
1.195     christos 2255:        if (sacheck(saddr) == -1)
                   2256:                return EINVAL;
1.30      mycroft  2257:        if (argp->ex_masklen) {
                   2258:                smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1.66      mycroft  2259:                error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1.30      mycroft  2260:                if (error)
                   2261:                        goto out;
                   2262:                if (smask->sa_len > argp->ex_masklen)
                   2263:                        smask->sa_len = argp->ex_masklen;
1.195     christos 2264:                if (smask->sa_family != saddr->sa_family)
                   2265:                        return EINVAL;
                   2266:                if (sacheck(smask) == -1)
                   2267:                        return EINVAL;
1.30      mycroft  2268:        }
                   2269:        i = saddr->sa_family;
                   2270:        if ((rnh = nep->ne_rtable[i]) == 0) {
                   2271:                /*
                   2272:                 * Seems silly to initialize every AF when most are not
                   2273:                 * used, do so on demand here
                   2274:                 */
1.241     matt     2275:                DOMAIN_FOREACH(dom) {
1.30      mycroft  2276:                        if (dom->dom_family == i && dom->dom_rtattach) {
                   2277:                                dom->dom_rtattach((void **)&nep->ne_rtable[i],
                   2278:                                        dom->dom_rtoffset);
                   2279:                                break;
                   2280:                        }
1.241     matt     2281:                }
1.30      mycroft  2282:                if ((rnh = nep->ne_rtable[i]) == 0) {
                   2283:                        error = ENOBUFS;
                   2284:                        goto out;
                   2285:                }
                   2286:        }
1.195     christos 2287:
                   2288:        enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
                   2289:            np->netc_rnodes);
                   2290:        if (enp != np) {
                   2291:                if (enp == NULL) {
1.72      fvdl     2292:                        enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
1.195     christos 2293:                            smask, rnh);
                   2294:                        if (enp == NULL) {
1.72      fvdl     2295:                                error = EPERM;
                   2296:                                goto out;
                   2297:                        }
1.195     christos 2298:                } else
                   2299:                        enp->netc_refcnt++;
                   2300:
                   2301:                goto check;
                   2302:        } else
                   2303:                enp->netc_refcnt = 1;
1.72      fvdl     2304:
1.30      mycroft  2305:        np->netc_exflags = argp->ex_flags;
1.163     christos 2306:        crcvt(&np->netc_anon, &argp->ex_anon);
1.30      mycroft  2307:        np->netc_anon.cr_ref = 1;
1.195     christos 2308:        return 0;
                   2309: check:
                   2310:        if (enp->netc_exflags != argp->ex_flags ||
                   2311:            crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
                   2312:                error = EPERM;
                   2313:        else
                   2314:                error = 0;
1.30      mycroft  2315: out:
                   2316:        free(np, M_NETADDR);
1.195     christos 2317:        return error;
1.30      mycroft  2318: }
                   2319:
                   2320: /* ARGSUSED */
                   2321: static int
1.247     thorpej  2322: vfs_free_netcred(struct radix_node *rn, void *w)
1.30      mycroft  2323: {
1.123     augustss 2324:        struct radix_node_head *rnh = (struct radix_node_head *)w;
1.195     christos 2325:        struct netcred *np = (struct netcred *)(void *)rn;
1.30      mycroft  2326:
                   2327:        (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1.195     christos 2328:        if (--(np->netc_refcnt) <= 0)
                   2329:                free(np, M_NETADDR);
1.30      mycroft  2330:        return (0);
                   2331: }
                   2332:
                   2333: /*
                   2334:  * Free the net address hash lists that are hanging off the mount points.
                   2335:  */
                   2336: static void
1.247     thorpej  2337: vfs_free_addrlist(struct netexport *nep)
1.30      mycroft  2338: {
1.123     augustss 2339:        int i;
                   2340:        struct radix_node_head *rnh;
1.30      mycroft  2341:
                   2342:        for (i = 0; i <= AF_MAX; i++)
1.50      christos 2343:                if ((rnh = nep->ne_rtable[i]) != NULL) {
                   2344:                        (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1.30      mycroft  2345:                        free((caddr_t)rnh, M_RTABLE);
                   2346:                        nep->ne_rtable[i] = 0;
                   2347:                }
                   2348: }
                   2349:
                   2350: int
1.247     thorpej  2351: vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
1.30      mycroft  2352: {
                   2353:        int error;
                   2354:
                   2355:        if (argp->ex_flags & MNT_DELEXPORT) {
1.71      fvdl     2356:                if (mp->mnt_flag & MNT_EXPUBLIC) {
                   2357:                        vfs_setpublicfs(NULL, NULL, NULL);
                   2358:                        mp->mnt_flag &= ~MNT_EXPUBLIC;
                   2359:                }
1.30      mycroft  2360:                vfs_free_addrlist(nep);
                   2361:                mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
                   2362:        }
                   2363:        if (argp->ex_flags & MNT_EXPORTED) {
1.71      fvdl     2364:                if (argp->ex_flags & MNT_EXPUBLIC) {
                   2365:                        if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
                   2366:                                return (error);
                   2367:                        mp->mnt_flag |= MNT_EXPUBLIC;
                   2368:                }
1.50      christos 2369:                if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1.30      mycroft  2370:                        return (error);
                   2371:                mp->mnt_flag |= MNT_EXPORTED;
                   2372:        }
1.71      fvdl     2373:        return (0);
                   2374: }
                   2375:
                   2376: /*
                   2377:  * Set the publicly exported filesystem (WebNFS). Currently, only
                   2378:  * one public filesystem is possible in the spec (RFC 2054 and 2055)
                   2379:  */
                   2380: int
1.247     thorpej  2381: vfs_setpublicfs(struct mount *mp, struct netexport *nep,
                   2382:     struct export_args *argp)
1.71      fvdl     2383: {
                   2384:        int error;
                   2385:        struct vnode *rvp;
                   2386:        char *cp;
                   2387:
                   2388:        /*
                   2389:         * mp == NULL -> invalidate the current info, the FS is
                   2390:         * no longer exported. May be called from either vfs_export
                   2391:         * or unmount, so check if it hasn't already been done.
                   2392:         */
                   2393:        if (mp == NULL) {
                   2394:                if (nfs_pub.np_valid) {
                   2395:                        nfs_pub.np_valid = 0;
                   2396:                        if (nfs_pub.np_index != NULL) {
                   2397:                                FREE(nfs_pub.np_index, M_TEMP);
                   2398:                                nfs_pub.np_index = NULL;
                   2399:                        }
                   2400:                }
                   2401:                return (0);
                   2402:        }
                   2403:
                   2404:        /*
                   2405:         * Only one allowed at a time.
                   2406:         */
                   2407:        if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
                   2408:                return (EBUSY);
                   2409:
                   2410:        /*
                   2411:         * Get real filehandle for root of exported FS.
                   2412:         */
1.91      perry    2413:        memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
1.221     christos 2414:        nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsidx;
1.71      fvdl     2415:
1.200     thorpej  2416:        if ((error = VFS_ROOT(mp, &rvp)))
1.71      fvdl     2417:                return (error);
                   2418:
                   2419:        if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
                   2420:                return (error);
                   2421:
                   2422:        vput(rvp);
                   2423:
                   2424:        /*
                   2425:         * If an indexfile was specified, pull it in.
                   2426:         */
                   2427:        if (argp->ex_indexfile != NULL) {
                   2428:                MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
                   2429:                    M_WAITOK);
                   2430:                error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
                   2431:                    MAXNAMLEN, (size_t *)0);
                   2432:                if (!error) {
                   2433:                        /*
                   2434:                         * Check for illegal filenames.
                   2435:                         */
                   2436:                        for (cp = nfs_pub.np_index; *cp; cp++) {
                   2437:                                if (*cp == '/') {
                   2438:                                        error = EINVAL;
                   2439:                                        break;
                   2440:                                }
                   2441:                        }
                   2442:                }
                   2443:                if (error) {
                   2444:                        FREE(nfs_pub.np_index, M_TEMP);
                   2445:                        return (error);
                   2446:                }
                   2447:        }
                   2448:
                   2449:        nfs_pub.np_mount = mp;
                   2450:        nfs_pub.np_valid = 1;
1.30      mycroft  2451:        return (0);
                   2452: }
                   2453:
                   2454: struct netcred *
1.247     thorpej  2455: vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
1.30      mycroft  2456: {
1.123     augustss 2457:        struct netcred *np;
                   2458:        struct radix_node_head *rnh;
1.30      mycroft  2459:        struct sockaddr *saddr;
                   2460:
                   2461:        np = NULL;
                   2462:        if (mp->mnt_flag & MNT_EXPORTED) {
                   2463:                /*
                   2464:                 * Lookup in the export list first.
                   2465:                 */
                   2466:                if (nam != NULL) {
                   2467:                        saddr = mtod(nam, struct sockaddr *);
                   2468:                        rnh = nep->ne_rtable[saddr->sa_family];
                   2469:                        if (rnh != NULL) {
                   2470:                                np = (struct netcred *)
                   2471:                                        (*rnh->rnh_matchaddr)((caddr_t)saddr,
                   2472:                                                              rnh);
                   2473:                                if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
                   2474:                                        np = NULL;
                   2475:                        }
                   2476:                }
                   2477:                /*
                   2478:                 * If no address match, use the default if it exists.
                   2479:                 */
                   2480:                if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
                   2481:                        np = &nep->ne_defexported;
                   2482:        }
                   2483:        return (np);
1.35      ws       2484: }
                   2485:
                   2486: /*
                   2487:  * Do the usual access checking.
                   2488:  * file_mode, uid and gid are from the vnode in question,
                   2489:  * while acc_mode and cred are from the VOP_ACCESS parameter list
                   2490:  */
1.41      mycroft  2491: int
1.247     thorpej  2492: vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
                   2493:     mode_t acc_mode, struct ucred *cred)
1.35      ws       2494: {
                   2495:        mode_t mask;
1.217     junyoung 2496:
1.64      mycroft  2497:        /*
                   2498:         * Super-user always gets read/write access, but execute access depends
                   2499:         * on at least one execute bit being set.
                   2500:         */
                   2501:        if (cred->cr_uid == 0) {
1.69      mycroft  2502:                if ((acc_mode & VEXEC) && type != VDIR &&
1.68      mycroft  2503:                    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1.64      mycroft  2504:                        return (EACCES);
1.68      mycroft  2505:                return (0);
1.64      mycroft  2506:        }
1.217     junyoung 2507:
1.35      ws       2508:        mask = 0;
1.217     junyoung 2509:
1.35      ws       2510:        /* Otherwise, check the owner. */
                   2511:        if (cred->cr_uid == uid) {
1.68      mycroft  2512:                if (acc_mode & VEXEC)
1.35      ws       2513:                        mask |= S_IXUSR;
                   2514:                if (acc_mode & VREAD)
                   2515:                        mask |= S_IRUSR;
                   2516:                if (acc_mode & VWRITE)
                   2517:                        mask |= S_IWUSR;
1.64      mycroft  2518:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2519:        }
1.217     junyoung 2520:
1.35      ws       2521:        /* Otherwise, check the groups. */
1.44      jtc      2522:        if (cred->cr_gid == gid || groupmember(gid, cred)) {
1.68      mycroft  2523:                if (acc_mode & VEXEC)
1.35      ws       2524:                        mask |= S_IXGRP;
                   2525:                if (acc_mode & VREAD)
                   2526:                        mask |= S_IRGRP;
                   2527:                if (acc_mode & VWRITE)
                   2528:                        mask |= S_IWGRP;
1.64      mycroft  2529:                return ((file_mode & mask) == mask ? 0 : EACCES);
1.35      ws       2530:        }
1.217     junyoung 2531:
1.35      ws       2532:        /* Otherwise, check everyone else. */
1.68      mycroft  2533:        if (acc_mode & VEXEC)
1.35      ws       2534:                mask |= S_IXOTH;
                   2535:        if (acc_mode & VREAD)
                   2536:                mask |= S_IROTH;
                   2537:        if (acc_mode & VWRITE)
                   2538:                mask |= S_IWOTH;
1.64      mycroft  2539:        return ((file_mode & mask) == mask ? 0 : EACCES);
1.39      mycroft  2540: }
                   2541:
                   2542: /*
                   2543:  * Unmount all file systems.
                   2544:  * We traverse the list in reverse order under the assumption that doing so
                   2545:  * will avoid needing to worry about dependencies.
                   2546:  */
                   2547: void
1.247     thorpej  2548: vfs_unmountall(struct proc *p)
1.39      mycroft  2549: {
1.123     augustss 2550:        struct mount *mp, *nmp;
1.40      mycroft  2551:        int allerror, error;
1.39      mycroft  2552:
1.235     lukem    2553:        printf("unmounting file systems...");
1.39      mycroft  2554:        for (allerror = 0,
                   2555:             mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
                   2556:                nmp = mp->mnt_list.cqe_prev;
1.54      jtk      2557: #ifdef DEBUG
1.235     lukem    2558:                printf("\nunmounting %s (%s)...",
1.56      christos 2559:                    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54      jtk      2560: #endif
1.149     thorpej  2561:                /*
                   2562:                 * XXX Freeze syncer.  Must do this before locking the
                   2563:                 * mount point.  See dounmount() for details.
                   2564:                 */
                   2565:                lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
                   2566:                if (vfs_busy(mp, 0, 0)) {
                   2567:                        lockmgr(&syncer_lock, LK_RELEASE, NULL);
1.60      fvdl     2568:                        continue;
1.149     thorpej  2569:                }
1.201     fvdl     2570:                if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
1.57      christos 2571:                        printf("unmount of %s failed with error %d\n",
1.40      mycroft  2572:                            mp->mnt_stat.f_mntonname, error);
                   2573:                        allerror = 1;
                   2574:                }
1.39      mycroft  2575:        }
1.235     lukem    2576:        printf(" done\n");
1.39      mycroft  2577:        if (allerror)
1.57      christos 2578:                printf("WARNING: some file systems would not unmount\n");
1.40      mycroft  2579: }
                   2580:
1.205     yamt     2581: extern struct simplelock bqueue_slock; /* XXX */
                   2582:
1.40      mycroft  2583: /*
                   2584:  * Sync and unmount file systems before shutting down.
                   2585:  */
                   2586: void
1.247     thorpej  2587: vfs_shutdown(void)
1.40      mycroft  2588: {
1.184     thorpej  2589:        struct lwp *l = curlwp;
1.185     christos 2590:        struct proc *p;
1.40      mycroft  2591:
1.128     sommerfe 2592:        /* XXX we're certainly not running in proc0's context! */
1.185     christos 2593:        if (l == NULL || (p = l->l_proc) == NULL)
1.128     sommerfe 2594:                p = &proc0;
1.185     christos 2595:
1.70      cgd      2596:        printf("syncing disks... ");
                   2597:
1.138     bouyer   2598:        /* remove user process from run queue */
                   2599:        suspendsched();
1.40      mycroft  2600:        (void) spl0();
                   2601:
1.128     sommerfe 2602:        /* avoid coming back this way again if we panic. */
                   2603:        doing_shutdown = 1;
                   2604:
1.184     thorpej  2605:        sys_sync(l, NULL, NULL);
1.40      mycroft  2606:
                   2607:        /* Wait for sync to finish. */
1.213     pk       2608:        if (buf_syncwait() != 0) {
1.124     augustss 2609: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   2610:                Debugger();
                   2611: #endif
1.57      christos 2612:                printf("giving up\n");
1.84      thorpej  2613:                return;
1.73      thorpej  2614:        } else
1.57      christos 2615:                printf("done\n");
1.73      thorpej  2616:
1.84      thorpej  2617:        /*
                   2618:         * If we've panic'd, don't make the situation potentially
                   2619:         * worse by unmounting the file systems.
                   2620:         */
                   2621:        if (panicstr != NULL)
                   2622:                return;
                   2623:
                   2624:        /* Release inodes held by texts before update. */
1.73      thorpej  2625: #ifdef notdef
1.84      thorpej  2626:        vnshutdown();
1.73      thorpej  2627: #endif
1.84      thorpej  2628:        /* Unmount file systems. */
1.201     fvdl     2629:        vfs_unmountall(p);
1.58      thorpej  2630: }
                   2631:
                   2632: /*
                   2633:  * Mount the root file system.  If the operator didn't specify a
                   2634:  * file system to use, try all possible file systems until one
                   2635:  * succeeds.
                   2636:  */
                   2637: int
1.247     thorpej  2638: vfs_mountroot(void)
1.58      thorpej  2639: {
1.79      thorpej  2640:        struct vfsops *v;
1.239     mycroft  2641:        int error = ENODEV;
1.58      thorpej  2642:
                   2643:        if (root_device == NULL)
                   2644:                panic("vfs_mountroot: root device unknown");
                   2645:
                   2646:        switch (root_device->dv_class) {
                   2647:        case DV_IFNET:
                   2648:                if (rootdev != NODEV)
1.173     thorpej  2649:                        panic("vfs_mountroot: rootdev set for DV_IFNET "
                   2650:                            "(0x%08x -> %d,%d)", rootdev,
                   2651:                            major(rootdev), minor(rootdev));
1.58      thorpej  2652:                break;
                   2653:
                   2654:        case DV_DISK:
                   2655:                if (rootdev == NODEV)
                   2656:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239     mycroft  2657:                if (bdevvp(rootdev, &rootvp))
                   2658:                        panic("vfs_mountroot: can't get vnode for rootdev");
                   2659:                error = VOP_OPEN(rootvp, FREAD, FSCRED, curproc);
                   2660:                if (error) {
                   2661:                        printf("vfs_mountroot: can't open root device\n");
                   2662:                        return (error);
                   2663:                }
1.58      thorpej  2664:                break;
                   2665:
                   2666:        default:
                   2667:                printf("%s: inappropriate for root file system\n",
                   2668:                    root_device->dv_xname);
                   2669:                return (ENODEV);
                   2670:        }
                   2671:
                   2672:        /*
                   2673:         * If user specified a file system, use it.
                   2674:         */
1.239     mycroft  2675:        if (mountroot != NULL) {
                   2676:                error = (*mountroot)();
                   2677:                goto done;
                   2678:        }
1.58      thorpej  2679:
                   2680:        /*
                   2681:         * Try each file system currently configured into the kernel.
                   2682:         */
1.220     lukem    2683:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2684:                if (v->vfs_mountroot == NULL)
1.58      thorpej  2685:                        continue;
                   2686: #ifdef DEBUG
1.197     thorpej  2687:                aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58      thorpej  2688: #endif
1.239     mycroft  2689:                error = (*v->vfs_mountroot)();
                   2690:                if (!error) {
1.197     thorpej  2691:                        aprint_normal("root file system type: %s\n",
                   2692:                            v->vfs_name);
1.79      thorpej  2693:                        break;
1.58      thorpej  2694:                }
                   2695:        }
                   2696:
1.79      thorpej  2697:        if (v == NULL) {
                   2698:                printf("no file system for %s", root_device->dv_xname);
                   2699:                if (root_device->dv_class == DV_DISK)
                   2700:                        printf(" (dev 0x%x)", rootdev);
                   2701:                printf("\n");
1.239     mycroft  2702:                error = EFTYPE;
1.79      thorpej  2703:        }
1.239     mycroft  2704:
                   2705: done:
                   2706:        if (error && root_device->dv_class == DV_DISK) {
                   2707:                VOP_CLOSE(rootvp, FREAD, FSCRED, curproc);
                   2708:                vrele(rootvp);
                   2709:        }
                   2710:        return (error);
1.58      thorpej  2711: }
                   2712:
                   2713: /*
                   2714:  * Given a file system name, look up the vfsops for that
                   2715:  * file system, or return NULL if file system isn't present
                   2716:  * in the kernel.
                   2717:  */
                   2718: struct vfsops *
1.247     thorpej  2719: vfs_getopsbyname(const char *name)
1.58      thorpej  2720: {
1.79      thorpej  2721:        struct vfsops *v;
                   2722:
1.220     lukem    2723:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2724:                if (strcmp(v->vfs_name, name) == 0)
                   2725:                        break;
                   2726:        }
                   2727:
                   2728:        return (v);
                   2729: }
                   2730:
                   2731: /*
                   2732:  * Establish a file system and initialize it.
                   2733:  */
                   2734: int
1.247     thorpej  2735: vfs_attach(struct vfsops *vfs)
1.79      thorpej  2736: {
                   2737:        struct vfsops *v;
                   2738:        int error = 0;
                   2739:
1.58      thorpej  2740:
1.79      thorpej  2741:        /*
                   2742:         * Make sure this file system doesn't already exist.
                   2743:         */
1.157     chs      2744:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2745:                if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
                   2746:                        error = EEXIST;
                   2747:                        goto out;
                   2748:                }
                   2749:        }
                   2750:
                   2751:        /*
                   2752:         * Initialize the vnode operations for this file system.
                   2753:         */
                   2754:        vfs_opv_init(vfs->vfs_opv_descs);
                   2755:
                   2756:        /*
                   2757:         * Now initialize the file system itself.
                   2758:         */
                   2759:        (*vfs->vfs_init)();
                   2760:
                   2761:        /*
                   2762:         * ...and link it into the kernel's list.
                   2763:         */
                   2764:        LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
                   2765:
                   2766:        /*
                   2767:         * Sanity: make sure the reference count is 0.
                   2768:         */
                   2769:        vfs->vfs_refcount = 0;
                   2770:
                   2771:  out:
                   2772:        return (error);
                   2773: }
                   2774:
                   2775: /*
                   2776:  * Remove a file system from the kernel.
                   2777:  */
                   2778: int
1.247     thorpej  2779: vfs_detach(struct vfsops *vfs)
1.79      thorpej  2780: {
                   2781:        struct vfsops *v;
                   2782:
                   2783:        /*
                   2784:         * Make sure no one is using the filesystem.
                   2785:         */
                   2786:        if (vfs->vfs_refcount != 0)
                   2787:                return (EBUSY);
                   2788:
                   2789:        /*
                   2790:         * ...and remove it from the kernel's list.
                   2791:         */
1.157     chs      2792:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  2793:                if (v == vfs) {
                   2794:                        LIST_REMOVE(v, vfs_list);
                   2795:                        break;
                   2796:                }
                   2797:        }
                   2798:
                   2799:        if (v == NULL)
                   2800:                return (ESRCH);
1.121     jdolecek 2801:
                   2802:        /*
                   2803:         * Now run the file system-specific cleanups.
                   2804:         */
                   2805:        (*vfs->vfs_done)();
1.79      thorpej  2806:
                   2807:        /*
                   2808:         * Free the vnode operations vector.
                   2809:         */
                   2810:        vfs_opv_free(vfs->vfs_opv_descs);
                   2811:        return (0);
1.157     chs      2812: }
                   2813:
                   2814: void
                   2815: vfs_reinit(void)
                   2816: {
                   2817:        struct vfsops *vfs;
                   2818:
                   2819:        LIST_FOREACH(vfs, &vfs_list, vfs_list) {
                   2820:                if (vfs->vfs_reinit) {
                   2821:                        (*vfs->vfs_reinit)();
                   2822:                }
                   2823:        }
1.192     christos 2824: }
                   2825:
1.214     hannken  2826: /*
                   2827:  * Request a filesystem to suspend write operations.
                   2828:  */
                   2829: int
                   2830: vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
                   2831: {
                   2832:        struct proc *p = curproc;       /* XXX */
                   2833:        int error;
                   2834:
                   2835:        while ((mp->mnt_iflag & IMNT_SUSPEND)) {
                   2836:                if (slptimeo < 0)
                   2837:                        return EWOULDBLOCK;
                   2838:                error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
                   2839:                if (error)
                   2840:                        return error;
                   2841:        }
                   2842:        mp->mnt_iflag |= IMNT_SUSPEND;
                   2843:
1.224     pk       2844:        simple_lock(&mp->mnt_slock);
1.214     hannken  2845:        if (mp->mnt_writeopcountupper > 0)
1.224     pk       2846:                ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
                   2847:                        0, &mp->mnt_slock);
                   2848:        simple_unlock(&mp->mnt_slock);
1.214     hannken  2849:
                   2850:        error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
                   2851:        if (error) {
                   2852:                vfs_write_resume(mp);
                   2853:                return error;
                   2854:        }
                   2855:        mp->mnt_iflag |= IMNT_SUSPENDLOW;
                   2856:
1.224     pk       2857:        simple_lock(&mp->mnt_slock);
1.214     hannken  2858:        if (mp->mnt_writeopcountlower > 0)
1.224     pk       2859:                ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
                   2860:                        0, &mp->mnt_slock);
1.214     hannken  2861:        mp->mnt_iflag |= IMNT_SUSPENDED;
1.224     pk       2862:        simple_unlock(&mp->mnt_slock);
1.214     hannken  2863:
                   2864:        return 0;
                   2865: }
                   2866:
                   2867: /*
                   2868:  * Request a filesystem to resume write operations.
                   2869:  */
                   2870: void
                   2871: vfs_write_resume(struct mount *mp)
                   2872: {
                   2873:
                   2874:        if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
                   2875:                return;
                   2876:        mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
                   2877:        wakeup(&mp->mnt_flag);
                   2878: }
                   2879:
1.192     christos 2880: void
1.221     christos 2881: copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
1.192     christos 2882: {
1.221     christos 2883:        const struct statvfs *mbp;
1.193     christos 2884:
                   2885:        if (sbp == (mbp = &mp->mnt_stat))
1.192     christos 2886:                return;
1.193     christos 2887:
1.222     enami    2888:        (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
                   2889:        sbp->f_fsid = mbp->f_fsid;
1.193     christos 2890:        sbp->f_owner = mbp->f_owner;
1.221     christos 2891:        sbp->f_flag = mbp->f_flag;
1.193     christos 2892:        sbp->f_syncwrites = mbp->f_syncwrites;
                   2893:        sbp->f_asyncwrites = mbp->f_asyncwrites;
1.221     christos 2894:        sbp->f_syncreads = mbp->f_syncreads;
                   2895:        sbp->f_asyncreads = mbp->f_asyncreads;
                   2896:        (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
1.193     christos 2897:        (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
1.192     christos 2898:            sizeof(sbp->f_fstypename));
1.193     christos 2899:        (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
1.192     christos 2900:            sizeof(sbp->f_mntonname));
                   2901:        (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
                   2902:            sizeof(sbp->f_mntfromname));
1.233     jdolecek 2903:        sbp->f_namemax = mbp->f_namemax;
1.192     christos 2904: }
                   2905:
                   2906: int
1.221     christos 2907: set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
1.201     fvdl     2908:     struct mount *mp, struct proc *p)
1.192     christos 2909: {
                   2910:        int error;
                   2911:        size_t size;
1.221     christos 2912:        struct statvfs *sfs = &mp->mnt_stat;
1.192     christos 2913:        int (*fun)(const void *, void *, size_t, size_t *);
                   2914:
1.217     junyoung 2915:        (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
1.192     christos 2916:            sizeof(mp->mnt_stat.f_fstypename));
                   2917:
                   2918:        if (onp) {
1.201     fvdl     2919:                struct cwdinfo *cwdi = p->p_cwdi;
1.192     christos 2920:                fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
                   2921:                if (cwdi->cwdi_rdir != NULL) {
                   2922:                        size_t len;
                   2923:                        char *bp;
                   2924:                        char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
                   2925:
1.209     dbj      2926:                        if (!path) /* XXX can't happen with M_WAITOK */
1.192     christos 2927:                                return ENOMEM;
                   2928:
                   2929:                        bp = path + MAXPATHLEN;
                   2930:                        *--bp = '\0';
                   2931:                        error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
1.201     fvdl     2932:                            path, MAXPATHLEN / 2, 0, p);
1.192     christos 2933:                        if (error) {
                   2934:                                free(path, M_TEMP);
                   2935:                                return error;
                   2936:                        }
                   2937:
                   2938:                        len = strlen(bp);
                   2939:                        if (len > sizeof(sfs->f_mntonname) - 1)
                   2940:                                len = sizeof(sfs->f_mntonname) - 1;
                   2941:                        (void)strncpy(sfs->f_mntonname, bp, len);
                   2942:                        free(path, M_TEMP);
                   2943:
                   2944:                        if (len < sizeof(sfs->f_mntonname) - 1) {
                   2945:                                error = (*fun)(onp, &sfs->f_mntonname[len],
1.194     christos 2946:                                    sizeof(sfs->f_mntonname) - len - 1, &size);
1.192     christos 2947:                                if (error)
                   2948:                                        return error;
                   2949:                                size += len;
                   2950:                        } else {
                   2951:                                size = len;
                   2952:                        }
                   2953:                } else {
                   2954:                        error = (*fun)(onp, &sfs->f_mntonname,
                   2955:                            sizeof(sfs->f_mntonname) - 1, &size);
                   2956:                        if (error)
                   2957:                                return error;
                   2958:                }
                   2959:                (void)memset(sfs->f_mntonname + size, 0,
                   2960:                    sizeof(sfs->f_mntonname) - size);
                   2961:        }
                   2962:
                   2963:        if (fromp) {
                   2964:                fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
                   2965:                error = (*fun)(fromp, sfs->f_mntfromname,
                   2966:                    sizeof(sfs->f_mntfromname) - 1, &size);
                   2967:                if (error)
                   2968:                        return error;
                   2969:                (void)memset(sfs->f_mntfromname + size, 0,
                   2970:                    sizeof(sfs->f_mntfromname) - size);
                   2971:        }
                   2972:        return 0;
1.29      cgd      2973: }
1.125     chs      2974:
1.238     thorpej  2975: /*
                   2976:  * Default vfs_extattrctl routine for file systems that do not support
                   2977:  * it.
                   2978:  */
                   2979: /*ARGSUSED*/
                   2980: int
                   2981: vfs_stdextattrctl(struct mount *mp, int cmt, struct vnode *vp,
                   2982:     int attrnamespace, const char *attrname, struct proc *p)
                   2983: {
                   2984:
                   2985:        if (vp != NULL)
                   2986:                VOP_UNLOCK(vp, 0);
                   2987:        return (EOPNOTSUPP);
                   2988: }
                   2989:
                   2990: /*
                   2991:  * Credential check based on process requesting service, and per-attribute
                   2992:  * permissions.
                   2993:  *
                   2994:  * NOTE: Vnode must be locked.
                   2995:  */
                   2996: int
                   2997: extattr_check_cred(struct vnode *vp, int attrnamespace,
                   2998:     struct ucred *cred, struct proc *p, int access)
                   2999: {
                   3000:
                   3001:        if (cred == NOCRED)
                   3002:                return (0);
                   3003:
                   3004:        switch (attrnamespace) {
                   3005:        case EXTATTR_NAMESPACE_SYSTEM:
                   3006:                /*
                   3007:                 * Do we really want to allow this, or just require that
                   3008:                 * these requests come from kernel code (NOCRED case above)?
                   3009:                 */
                   3010:                return (suser(cred, &p->p_acflag));
1.242     perry    3011:
1.238     thorpej  3012:        case EXTATTR_NAMESPACE_USER:
                   3013:                return (VOP_ACCESS(vp, access, cred, p));
1.242     perry    3014:
1.238     thorpej  3015:        default:
                   3016:                return (EPERM);
                   3017:        }
                   3018: }
                   3019:
1.125     chs      3020: #ifdef DDB
1.247     thorpej  3021: static const char buf_flagbits[] = BUF_FLAGBITS;
1.125     chs      3022:
                   3023: void
1.247     thorpej  3024: vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
1.125     chs      3025: {
1.245     christos 3026:        char bf[1024];
1.125     chs      3027:
1.198     dbj      3028:        (*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
1.125     chs      3029:                  bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
                   3030:
1.245     christos 3031:        bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf));
                   3032:        (*pr)("  error %d flags 0x%s\n", bp->b_error, bf);
1.125     chs      3033:
1.164     msaitoh  3034:        (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1.125     chs      3035:                  bp->b_bufsize, bp->b_bcount, bp->b_resid);
1.142     chs      3036:        (*pr)("  data %p saveaddr %p dep %p\n",
                   3037:                  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
1.125     chs      3038:        (*pr)("  iodone %p\n", bp->b_iodone);
                   3039: }
                   3040:
                   3041:
                   3042: void
1.247     thorpej  3043: vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
1.125     chs      3044: {
1.245     christos 3045:        char bf[256];
1.125     chs      3046:
1.158     chs      3047:        uvm_object_printit(&vp->v_uobj, full, pr);
1.245     christos 3048:        bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
                   3049:        (*pr)("\nVNODE flags %s\n", bf);
1.158     chs      3050:        (*pr)("mp %p numoutput %d size 0x%llx\n",
                   3051:              vp->v_mount, vp->v_numoutput, vp->v_size);
1.125     chs      3052:
1.164     msaitoh  3053:        (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
1.125     chs      3054:              vp->v_data, vp->v_usecount, vp->v_writecount,
                   3055:              vp->v_holdcnt, vp->v_numoutput);
                   3056:
1.237     christos 3057:        (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
                   3058:              ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
                   3059:              ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1.203     yamt     3060:              vp->v_mount, vp->v_mountedhere);
1.125     chs      3061:
                   3062:        if (full) {
                   3063:                struct buf *bp;
                   3064:
                   3065:                (*pr)("clean bufs:\n");
1.142     chs      3066:                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                   3067:                        (*pr)(" bp %p\n", bp);
1.125     chs      3068:                        vfs_buf_print(bp, full, pr);
                   3069:                }
                   3070:
                   3071:                (*pr)("dirty bufs:\n");
1.142     chs      3072:                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                   3073:                        (*pr)(" bp %p\n", bp);
1.125     chs      3074:                        vfs_buf_print(bp, full, pr);
                   3075:                }
                   3076:        }
                   3077: }
1.210     dbj      3078:
                   3079: void
1.247     thorpej  3080: vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
1.210     dbj      3081: {
                   3082:        char sbuf[256];
                   3083:
                   3084:        (*pr)("vnodecovered = %p syncer = %p data = %p\n",
                   3085:                        mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
                   3086:
1.232     mycroft  3087:        (*pr)("fs_bshift %d dev_bshift = %d\n",
                   3088:                        mp->mnt_fs_bshift,mp->mnt_dev_bshift);
1.210     dbj      3089:
                   3090:        bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
                   3091:        (*pr)("flag = %s\n", sbuf);
                   3092:
                   3093:        bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
                   3094:        (*pr)("iflag = %s\n", sbuf);
                   3095:
                   3096:        /* XXX use lockmgr_printinfo */
                   3097:        if (mp->mnt_lock.lk_sharecount)
                   3098:                (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
                   3099:                    mp->mnt_lock.lk_sharecount);
                   3100:        else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
                   3101:                (*pr)(" lock type %s: EXCL (count %d) by ",
                   3102:                    mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
                   3103:                if (mp->mnt_lock.lk_flags & LK_SPIN)
                   3104:                        (*pr)("processor %lu", mp->mnt_lock.lk_cpu);
                   3105:                else
                   3106:                        (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
                   3107:                            mp->mnt_lock.lk_locklwp);
                   3108:        } else
                   3109:                (*pr)(" not locked");
                   3110:        if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
                   3111:                (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
                   3112:
                   3113:        (*pr)("\n");
                   3114:
                   3115:        if (mp->mnt_unmounter) {
                   3116:                (*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
                   3117:        }
                   3118:        (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
                   3119:                mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
                   3120:
1.221     christos 3121:        (*pr)("statvfs cache:\n");
                   3122:        (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
                   3123:        (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
                   3124:        (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
                   3125:
                   3126:        (*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks);
                   3127:        (*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree);
                   3128:        (*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail);
                   3129:        (*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd);
                   3130:
                   3131:        (*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files);
                   3132:        (*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree);
                   3133:        (*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail);
                   3134:        (*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd);
                   3135:
                   3136:        (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
                   3137:                        mp->mnt_stat.f_fsidx.__fsid_val[0],
                   3138:                        mp->mnt_stat.f_fsidx.__fsid_val[1]);
                   3139:
1.210     dbj      3140:        (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
1.221     christos 3141:        (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
                   3142:
                   3143:        bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
                   3144:            sizeof(sbuf));
                   3145:        (*pr)("\tflag = %s\n",sbuf);
                   3146:        (*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites);
                   3147:        (*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
                   3148:        (*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads);
                   3149:        (*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads);
1.210     dbj      3150:        (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
                   3151:        (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
                   3152:        (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
                   3153:
                   3154:        {
                   3155:                int cnt = 0;
                   3156:                struct vnode *vp;
                   3157:                (*pr)("locked vnodes =");
                   3158:                /* XXX would take mountlist lock, except ddb may not have context */
                   3159:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   3160:                        if (VOP_ISLOCKED(vp)) {
                   3161:                                if ((++cnt % 6) == 0) {
                   3162:                                        (*pr)(" %p,\n\t", vp);
                   3163:                                } else {
                   3164:                                        (*pr)(" %p,", vp);
                   3165:                                }
                   3166:                        }
                   3167:                }
                   3168:                (*pr)("\n");
                   3169:        }
                   3170:
                   3171:        if (full) {
                   3172:                int cnt = 0;
                   3173:                struct vnode *vp;
                   3174:                (*pr)("all vnodes =");
                   3175:                /* XXX would take mountlist lock, except ddb may not have context */
                   3176:                LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
                   3177:                        if (!LIST_NEXT(vp, v_mntvnodes)) {
                   3178:                                (*pr)(" %p", vp);
                   3179:                        } else if ((++cnt % 6) == 0) {
                   3180:                                (*pr)(" %p,\n\t", vp);
                   3181:                        } else {
                   3182:                                (*pr)(" %p,", vp);
                   3183:                        }
                   3184:                }
                   3185:                (*pr)("\n", vp);
                   3186:        }
                   3187: }
1.247     thorpej  3188: #endif /* DDB */

CVSweb <webmaster@jp.NetBSD.org>