[BACK]Return to vfs_subr.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_subr.c, Revision 1.301

1.301   ! hannken     1: /*     $NetBSD: vfs_subr.c,v 1.300 2007/08/14 13:51:31 pooka Exp $     */
1.74      thorpej     2:
                      3: /*-
1.243     mycroft     4:  * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
1.74      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
1.243     mycroft    10:  * This code is derived from software contributed to The NetBSD Foundation
                     11:  * by Charles M. Hannum.
1.74      thorpej    12:  *
                     13:  * Redistribution and use in source and binary forms, with or without
                     14:  * modification, are permitted provided that the following conditions
                     15:  * are met:
                     16:  * 1. Redistributions of source code must retain the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer.
                     18:  * 2. Redistributions in binary form must reproduce the above copyright
                     19:  *    notice, this list of conditions and the following disclaimer in the
                     20:  *    documentation and/or other materials provided with the distribution.
                     21:  * 3. All advertising materials mentioning features or use of this software
                     22:  *    must display the following acknowledgement:
                     23:  *     This product includes software developed by the NetBSD
                     24:  *     Foundation, Inc. and its contributors.
                     25:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     26:  *    contributors may be used to endorse or promote products derived
                     27:  *    from this software without specific prior written permission.
                     28:  *
                     29:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     30:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     31:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     32:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     33:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     34:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     35:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     36:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     37:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     38:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     39:  * POSSIBILITY OF SUCH DAMAGE.
                     40:  */
1.32      cgd        41:
1.29      cgd        42: /*
1.30      mycroft    43:  * Copyright (c) 1989, 1993
                     44:  *     The Regents of the University of California.  All rights reserved.
1.29      cgd        45:  * (c) UNIX System Laboratories, Inc.
                     46:  * All or some portions of this file are derived from material licensed
                     47:  * to the University of California by American Telephone and Telegraph
                     48:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     49:  * the permission of UNIX System Laboratories, Inc.
                     50:  *
                     51:  * Redistribution and use in source and binary forms, with or without
                     52:  * modification, are permitted provided that the following conditions
                     53:  * are met:
                     54:  * 1. Redistributions of source code must retain the above copyright
                     55:  *    notice, this list of conditions and the following disclaimer.
                     56:  * 2. Redistributions in binary form must reproduce the above copyright
                     57:  *    notice, this list of conditions and the following disclaimer in the
                     58:  *    documentation and/or other materials provided with the distribution.
1.204     agc        59:  * 3. Neither the name of the University nor the names of its contributors
1.29      cgd        60:  *    may be used to endorse or promote products derived from this software
                     61:  *    without specific prior written permission.
                     62:  *
                     63:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     64:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     65:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     66:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     67:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     68:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     69:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     70:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     71:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     72:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     73:  * SUCH DAMAGE.
                     74:  *
1.32      cgd        75:  *     @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
1.29      cgd        76:  */
                     77:
                     78: /*
1.296     pooka      79:  * External virtual filesystem routines.
                     80:  *
                     81:  * This file contains vfs subroutines which are heavily dependant on
                     82:  * the kernel and are not suitable for standalone use.  Examples include
                     83:  * routines involved vnode and mountpoint management.
1.29      cgd        84:  */
1.162     lukem      85:
                     86: #include <sys/cdefs.h>
1.301   ! hannken    87: __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.300 2007/08/14 13:51:31 pooka Exp $");
1.78      mrg        88:
1.195     christos   89: #include "opt_inet.h"
1.125     chs        90: #include "opt_ddb.h"
1.95      thorpej    91: #include "opt_compat_netbsd.h"
1.97      christos   92: #include "opt_compat_43.h"
1.29      cgd        93:
                     94: #include <sys/param.h>
1.30      mycroft    95: #include <sys/systm.h>
1.29      cgd        96: #include <sys/proc.h>
1.138     bouyer     97: #include <sys/kernel.h>
1.29      cgd        98: #include <sys/mount.h>
1.46      mycroft    99: #include <sys/fcntl.h>
1.29      cgd       100: #include <sys/vnode.h>
1.30      mycroft   101: #include <sys/stat.h>
1.29      cgd       102: #include <sys/namei.h>
                    103: #include <sys/ucred.h>
                    104: #include <sys/buf.h>
                    105: #include <sys/errno.h>
                    106: #include <sys/malloc.h>
1.51      christos  107: #include <sys/syscallargs.h>
1.58      thorpej   108: #include <sys/device.h>
1.192     christos  109: #include <sys/filedesc.h>
1.266     elad      110: #include <sys/kauth.h>
1.50      christos  111:
1.30      mycroft   112: #include <miscfs/specfs/specdev.h>
1.113     fvdl      113: #include <miscfs/syncfs/syncfs.h>
1.30      mycroft   114:
1.125     chs       115: #include <uvm/uvm.h>
1.255     yamt      116: #include <uvm/uvm_readahead.h>
1.125     chs       117: #include <uvm/uvm_ddb.h>
1.129     mrg       118:
                    119: #include <sys/sysctl.h>
1.77      mrg       120:
1.117     fvdl      121: extern int dovfsusermount;     /* 1 => permit any user to mount filesystems */
1.263     chs       122: extern int vfs_magiclinks;     /* 1 => expand "magic" symlinks */
1.117     fvdl      123:
1.113     fvdl      124: /* TAILQ_HEAD(freelst, vnode) vnode_free_list =        vnode free list (in vnode.h) */
                    125: struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
1.114     enami     126: struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
1.113     fvdl      127:
1.135     sommerfe  128: struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
1.79      thorpej   129:
1.223     simonb    130: POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
1.284     ad        131:     &pool_allocator_nointr, IPL_NONE);
1.186     thorpej   132:
                    133: MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
1.93      thorpej   134:
1.89      kleink    135: /*
                    136:  * Local declarations.
                    137:  */
1.276     hannken   138:
1.260     yamt      139: static void insmntque(struct vnode *, struct mount *);
                    140: static int getdevvp(dev_t, struct vnode **, enum vtype);
                    141: static void vclean(struct vnode *, int, struct lwp *);
1.256     christos  142: static struct vnode *getcleanvnode(struct lwp *);
1.51      christos  143:
1.202     yamt      144: int
1.256     christos  145: vfs_drainvnodes(long target, struct lwp *l)
1.202     yamt      146: {
                    147:
                    148:        simple_lock(&vnode_free_list_slock);
                    149:        while (numvnodes > target) {
                    150:                struct vnode *vp;
                    151:
1.256     christos  152:                vp = getcleanvnode(l);
1.202     yamt      153:                if (vp == NULL)
                    154:                        return EBUSY; /* give up */
                    155:                pool_put(&vnode_pool, vp);
                    156:                simple_lock(&vnode_free_list_slock);
                    157:                numvnodes--;
                    158:        }
                    159:        simple_unlock(&vnode_free_list_slock);
                    160:
                    161:        return 0;
                    162: }
                    163:
                    164: /*
                    165:  * grab a vnode from freelist and clean it.
                    166:  */
                    167: struct vnode *
1.256     christos  168: getcleanvnode(struct lwp *l)
1.202     yamt      169: {
                    170:        struct vnode *vp;
                    171:        struct freelst *listhd;
                    172:
                    173:        LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
1.229     yamt      174:
                    175:        listhd = &vnode_free_list;
                    176: try_nextlist:
                    177:        TAILQ_FOREACH(vp, listhd, v_freelist) {
1.208     hannken   178:                if (!simple_lock_try(&vp->v_interlock))
                    179:                        continue;
1.227     yamt      180:                /*
                    181:                 * as our lwp might hold the underlying vnode locked,
                    182:                 * don't try to reclaim the VLAYER vnode if it's locked.
                    183:                 */
1.228     yamt      184:                if ((vp->v_flag & VXLOCK) == 0 &&
                    185:                    ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
1.285     hannken   186:                        break;
1.202     yamt      187:                }
1.208     hannken   188:                simple_unlock(&vp->v_interlock);
1.202     yamt      189:        }
                    190:
                    191:        if (vp == NULLVP) {
1.229     yamt      192:                if (listhd == &vnode_free_list) {
                    193:                        listhd = &vnode_hold_list;
                    194:                        goto try_nextlist;
                    195:                }
1.202     yamt      196:                simple_unlock(&vnode_free_list_slock);
                    197:                return NULLVP;
                    198:        }
                    199:
                    200:        if (vp->v_usecount)
                    201:                panic("free vnode isn't, vp %p", vp);
                    202:        TAILQ_REMOVE(listhd, vp, v_freelist);
                    203:        /* see comment on why 0xdeadb is set at end of vgone (below) */
                    204:        vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
                    205:        simple_unlock(&vnode_free_list_slock);
                    206:
                    207:        if (vp->v_type != VBAD)
1.256     christos  208:                vgonel(vp, l);
1.202     yamt      209:        else
                    210:                simple_unlock(&vp->v_interlock);
                    211: #ifdef DIAGNOSTIC
                    212:        if (vp->v_data || vp->v_uobj.uo_npages ||
                    213:            TAILQ_FIRST(&vp->v_uobj.memq))
                    214:                panic("cleaned vnode isn't, vp %p", vp);
                    215:        if (vp->v_numoutput)
                    216:                panic("clean vnode has pending I/O's, vp %p", vp);
                    217: #endif
                    218:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                    219:
                    220:        return vp;
                    221: }
                    222:
1.29      cgd       223: /*
1.80      fvdl      224:  * Mark a mount point as busy. Used to synchronize access and to delay
                    225:  * unmounting. Interlock is not released on failure.
1.29      cgd       226:  */
1.50      christos  227: int
1.247     thorpej   228: vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp)
1.29      cgd       229: {
1.80      fvdl      230:        int lkflags;
1.29      cgd       231:
1.207     dbj       232:        while (mp->mnt_iflag & IMNT_UNMOUNT) {
1.224     pk        233:                int gone, n;
1.217     junyoung  234:
1.80      fvdl      235:                if (flags & LK_NOWAIT)
                    236:                        return (ENOENT);
1.113     fvdl      237:                if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
1.256     christos  238:                    && mp->mnt_unmounter == curlwp)
1.113     fvdl      239:                        return (EDEADLK);
1.80      fvdl      240:                if (interlkp)
                    241:                        simple_unlock(interlkp);
                    242:                /*
                    243:                 * Since all busy locks are shared except the exclusive
                    244:                 * lock granted when unmounting, the only place that a
                    245:                 * wakeup needs to be done is at the release of the
                    246:                 * exclusive lock at the end of dounmount.
                    247:                 */
1.224     pk        248:                simple_lock(&mp->mnt_slock);
1.103     sommerfe  249:                mp->mnt_wcnt++;
1.283     christos  250:                ltsleep((void *)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
1.224     pk        251:                n = --mp->mnt_wcnt;
                    252:                simple_unlock(&mp->mnt_slock);
1.207     dbj       253:                gone = mp->mnt_iflag & IMNT_GONE;
1.217     junyoung  254:
1.224     pk        255:                if (n == 0)
1.103     sommerfe  256:                        wakeup(&mp->mnt_wcnt);
1.80      fvdl      257:                if (interlkp)
                    258:                        simple_lock(interlkp);
1.103     sommerfe  259:                if (gone)
                    260:                        return (ENOENT);
1.80      fvdl      261:        }
                    262:        lkflags = LK_SHARED;
                    263:        if (interlkp)
                    264:                lkflags |= LK_INTERLOCK;
                    265:        if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
                    266:                panic("vfs_busy: unexpected lock failure");
1.29      cgd       267:        return (0);
                    268: }
                    269:
                    270: /*
1.80      fvdl      271:  * Free a busy filesystem.
1.29      cgd       272:  */
                    273: void
1.247     thorpej   274: vfs_unbusy(struct mount *mp)
1.29      cgd       275: {
                    276:
1.80      fvdl      277:        lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
1.29      cgd       278: }
                    279:
                    280: /*
1.80      fvdl      281:  * Lookup a filesystem type, and if found allocate and initialize
                    282:  * a mount structure for it.
                    283:  *
                    284:  * Devname is usually updated by mount(8) after booting.
1.29      cgd       285:  */
1.50      christos  286: int
1.247     thorpej   287: vfs_rootmountalloc(const char *fstypename, const char *devname,
                    288:     struct mount **mpp)
1.29      cgd       289: {
1.80      fvdl      290:        struct vfsops *vfsp = NULL;
                    291:        struct mount *mp;
1.29      cgd       292:
1.152     jdolecek  293:        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
1.291     christos  294:                if (!strncmp(vfsp->vfs_name, fstypename,
                    295:                    sizeof(mp->mnt_stat.f_fstypename)))
1.80      fvdl      296:                        break;
                    297:
                    298:        if (vfsp == NULL)
                    299:                return (ENODEV);
                    300:        mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
1.91      perry     301:        memset((char *)mp, 0, (u_long)sizeof(struct mount));
1.80      fvdl      302:        lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
1.224     pk        303:        simple_lock_init(&mp->mnt_slock);
1.80      fvdl      304:        (void)vfs_busy(mp, LK_NOWAIT, 0);
1.272     reinoud   305:        TAILQ_INIT(&mp->mnt_vnodelist);
1.80      fvdl      306:        mp->mnt_op = vfsp;
                    307:        mp->mnt_flag = MNT_RDONLY;
                    308:        mp->mnt_vnodecovered = NULLVP;
                    309:        vfsp->vfs_refcount++;
1.291     christos  310:        (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
                    311:            sizeof(mp->mnt_stat.f_fstypename));
1.80      fvdl      312:        mp->mnt_stat.f_mntonname[0] = '/';
1.291     christos  313:        mp->mnt_stat.f_mntonname[1] = '\0';
                    314:        mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
                    315:            '\0';
                    316:        (void)copystr(devname, mp->mnt_stat.f_mntfromname,
                    317:            sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
1.276     hannken   318:        mount_initspecific(mp);
1.80      fvdl      319:        *mpp = mp;
1.29      cgd       320:        return (0);
                    321: }
                    322:
1.30      mycroft   323:
                    324: /*
                    325:  * Routines having to do with the management of the vnode table.
                    326:  */
1.217     junyoung  327: extern int (**dead_vnodeop_p)(void *);
1.30      mycroft   328:
1.29      cgd       329: /*
                    330:  * Return the next vnode from the free list.
                    331:  */
1.50      christos  332: int
1.247     thorpej   333: getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
                    334:     struct vnode **vpp)
1.29      cgd       335: {
1.142     chs       336:        extern struct uvm_pagerops uvm_vnodeops;
                    337:        struct uvm_object *uobj;
1.256     christos  338:        struct lwp *l = curlwp;         /* XXX */
1.113     fvdl      339:        static int toggle;
1.80      fvdl      340:        struct vnode *vp;
1.153     thorpej   341:        int error = 0, tryalloc;
1.158     chs       342:
1.159     enami     343:  try_again:
1.103     sommerfe  344:        if (mp) {
                    345:                /*
1.106     sommerfe  346:                 * Mark filesystem busy while we're creating a vnode.
                    347:                 * If unmount is in progress, this will wait; if the
                    348:                 * unmount succeeds (only if umount -f), this will
                    349:                 * return an error.  If the unmount fails, we'll keep
                    350:                 * going afterwards.
                    351:                 * (This puts the per-mount vnode list logically under
                    352:                 * the protection of the vfs_busy lock).
1.103     sommerfe  353:                 */
1.113     fvdl      354:                error = vfs_busy(mp, LK_RECURSEFAIL, 0);
                    355:                if (error && error != EDEADLK)
1.103     sommerfe  356:                        return error;
                    357:        }
1.29      cgd       358:
1.113     fvdl      359:        /*
                    360:         * We must choose whether to allocate a new vnode or recycle an
                    361:         * existing one. The criterion for allocating a new one is that
                    362:         * the total number of vnodes is less than the number desired or
                    363:         * there are no vnodes on either free list. Generally we only
                    364:         * want to recycle vnodes that have no buffers associated with
                    365:         * them, so we look first on the vnode_free_list. If it is empty,
                    366:         * we next consider vnodes with referencing buffers on the
                    367:         * vnode_hold_list. The toggle ensures that half the time we
                    368:         * will use a buffer from the vnode_hold_list, and half the time
                    369:         * we will allocate a new one unless the list has grown to twice
                    370:         * the desired size. We are reticent to recycle vnodes from the
                    371:         * vnode_hold_list because we will lose the identity of all its
                    372:         * referencing buffers.
                    373:         */
1.142     chs       374:
1.153     thorpej   375:        vp = NULL;
                    376:
                    377:        simple_lock(&vnode_free_list_slock);
                    378:
1.113     fvdl      379:        toggle ^= 1;
                    380:        if (numvnodes > 2 * desiredvnodes)
                    381:                toggle = 0;
                    382:
1.153     thorpej   383:        tryalloc = numvnodes < desiredvnodes ||
1.159     enami     384:            (TAILQ_FIRST(&vnode_free_list) == NULL &&
                    385:             (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
1.153     thorpej   386:
                    387:        if (tryalloc &&
                    388:            (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
1.206     yamt      389:                numvnodes++;
1.80      fvdl      390:                simple_unlock(&vnode_free_list_slock);
1.142     chs       391:                memset(vp, 0, sizeof(*vp));
1.248     yamt      392:                UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1);
1.225     yamt      393:                /*
                    394:                 * done by memset() above.
                    395:                 *      LIST_INIT(&vp->v_nclist);
                    396:                 *      LIST_INIT(&vp->v_dnclist);
                    397:                 */
1.29      cgd       398:        } else {
1.256     christos  399:                vp = getcleanvnode(l);
1.80      fvdl      400:                /*
                    401:                 * Unless this is a bad time of the month, at most
                    402:                 * the first NCPUS items on the free list are
                    403:                 * locked, so this is close enough to being empty.
                    404:                 */
                    405:                if (vp == NULLVP) {
1.113     fvdl      406:                        if (mp && error != EDEADLK)
                    407:                                vfs_unbusy(mp);
1.153     thorpej   408:                        if (tryalloc) {
                    409:                                printf("WARNING: unable to allocate new "
                    410:                                    "vnode, retrying...\n");
                    411:                                (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
                    412:                                goto try_again;
                    413:                        }
1.132     jdolecek  414:                        tablefull("vnode", "increase kern.maxvnodes or NVNODE");
1.29      cgd       415:                        *vpp = 0;
                    416:                        return (ENFILE);
                    417:                }
1.248     yamt      418:                vp->v_usecount = 1;
1.29      cgd       419:                vp->v_flag = 0;
1.158     chs       420:                vp->v_socket = NULL;
1.29      cgd       421:        }
                    422:        vp->v_type = VNON;
1.104     wrstuden  423:        vp->v_vnlock = &vp->v_lock;
                    424:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.225     yamt      425:        KASSERT(LIST_EMPTY(&vp->v_nclist));
                    426:        KASSERT(LIST_EMPTY(&vp->v_dnclist));
1.29      cgd       427:        vp->v_tag = tag;
                    428:        vp->v_op = vops;
                    429:        insmntque(vp, mp);
1.30      mycroft   430:        *vpp = vp;
                    431:        vp->v_data = 0;
1.240     christos  432:        simple_lock_init(&vp->v_interlock);
1.142     chs       433:
                    434:        /*
                    435:         * initialize uvm_object within vnode.
                    436:         */
                    437:
1.158     chs       438:        uobj = &vp->v_uobj;
                    439:        KASSERT(uobj->pgops == &uvm_vnodeops);
                    440:        KASSERT(uobj->uo_npages == 0);
                    441:        KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
1.288     yamt      442:        vp->v_size = vp->v_writesize = VSIZENOTSET;
1.142     chs       443:
1.113     fvdl      444:        if (mp && error != EDEADLK)
                    445:                vfs_unbusy(mp);
1.29      cgd       446:        return (0);
1.130     fvdl      447: }
                    448:
                    449: /*
                    450:  * This is really just the reverse of getnewvnode(). Needed for
                    451:  * VFS_VGET functions who may need to push back a vnode in case
                    452:  * of a locking race.
                    453:  */
                    454: void
1.247     thorpej   455: ungetnewvnode(struct vnode *vp)
1.130     fvdl      456: {
                    457: #ifdef DIAGNOSTIC
                    458:        if (vp->v_usecount != 1)
1.131     fvdl      459:                panic("ungetnewvnode: busy vnode");
1.130     fvdl      460: #endif
                    461:        vp->v_usecount--;
                    462:        insmntque(vp, NULL);
                    463:        vp->v_type = VBAD;
                    464:
                    465:        simple_lock(&vp->v_interlock);
1.217     junyoung  466:        /*
1.130     fvdl      467:         * Insert at head of LRU list
                    468:         */
                    469:        simple_lock(&vnode_free_list_slock);
                    470:        if (vp->v_holdcnt > 0)
                    471:                TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
                    472:        else
                    473:                TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1.217     junyoung  474:        simple_unlock(&vnode_free_list_slock);
1.130     fvdl      475:        simple_unlock(&vp->v_interlock);
1.29      cgd       476: }
                    477:
                    478: /*
                    479:  * Move a vnode from one mount queue to another.
                    480:  */
1.260     yamt      481: static void
1.247     thorpej   482: insmntque(struct vnode *vp, struct mount *mp)
1.29      cgd       483: {
                    484:
1.103     sommerfe  485: #ifdef DIAGNOSTIC
                    486:        if ((mp != NULL) &&
1.207     dbj       487:            (mp->mnt_iflag & IMNT_UNMOUNT) &&
1.113     fvdl      488:            !(mp->mnt_flag & MNT_SOFTDEP) &&
                    489:            vp->v_tag != VT_VFS) {
1.103     sommerfe  490:                panic("insmntque into dying filesystem");
                    491:        }
                    492: #endif
1.217     junyoung  493:
1.80      fvdl      494:        simple_lock(&mntvnode_slock);
1.29      cgd       495:        /*
                    496:         * Delete from old mount point vnode list, if on one.
                    497:         */
                    498:        if (vp->v_mount != NULL)
1.272     reinoud   499:                TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
1.29      cgd       500:        /*
                    501:         * Insert into list of vnodes for the new mount point, if available.
                    502:         */
1.279     pooka     503:        if ((vp->v_mount = mp) != NULL)
                    504:                TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1.80      fvdl      505:        simple_unlock(&mntvnode_slock);
1.29      cgd       506: }
                    507:
                    508: /*
                    509:  * Create a vnode for a block device.
1.59      thorpej   510:  * Used for root filesystem and swap areas.
1.29      cgd       511:  * Also used for memory file system special devices.
                    512:  */
1.50      christos  513: int
1.247     thorpej   514: bdevvp(dev_t dev, struct vnode **vpp)
1.29      cgd       515: {
1.30      mycroft   516:
                    517:        return (getdevvp(dev, vpp, VBLK));
1.29      cgd       518: }
                    519:
                    520: /*
                    521:  * Create a vnode for a character device.
                    522:  * Used for kernfs and some console handling.
                    523:  */
1.50      christos  524: int
1.247     thorpej   525: cdevvp(dev_t dev, struct vnode **vpp)
1.29      cgd       526: {
1.30      mycroft   527:
                    528:        return (getdevvp(dev, vpp, VCHR));
1.29      cgd       529: }
                    530:
                    531: /*
                    532:  * Create a vnode for a device.
                    533:  * Used by bdevvp (block device) for root file system etc.,
                    534:  * and by cdevvp (character device) for console and kernfs.
                    535:  */
1.260     yamt      536: static int
1.247     thorpej   537: getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
1.29      cgd       538: {
1.123     augustss  539:        struct vnode *vp;
1.29      cgd       540:        struct vnode *nvp;
                    541:        int error;
                    542:
1.80      fvdl      543:        if (dev == NODEV) {
                    544:                *vpp = NULLVP;
1.29      cgd       545:                return (0);
1.80      fvdl      546:        }
1.50      christos  547:        error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1.29      cgd       548:        if (error) {
                    549:                *vpp = NULLVP;
                    550:                return (error);
                    551:        }
                    552:        vp = nvp;
                    553:        vp->v_type = type;
1.297     pooka     554:        uvm_vnp_setsize(vp, 0);
1.50      christos  555:        if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1.29      cgd       556:                vput(vp);
                    557:                vp = nvp;
                    558:        }
                    559:        *vpp = vp;
                    560:        return (0);
                    561: }
                    562:
                    563: /*
                    564:  * Check to see if the new vnode represents a special device
                    565:  * for which we already have a vnode (either because of
                    566:  * bdevvp() or because of a different vnode representing
                    567:  * the same block device). If such an alias exists, deallocate
                    568:  * the existing contents and return the aliased vnode. The
                    569:  * caller is responsible for filling it with its new contents.
                    570:  */
                    571: struct vnode *
1.247     thorpej   572: checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
1.29      cgd       573: {
1.256     christos  574:        struct lwp *l = curlwp;         /* XXX */
1.123     augustss  575:        struct vnode *vp;
1.29      cgd       576:        struct vnode **vpp;
                    577:
                    578:        if (nvp->v_type != VBLK && nvp->v_type != VCHR)
                    579:                return (NULLVP);
                    580:
                    581:        vpp = &speclisth[SPECHASH(nvp_rdev)];
                    582: loop:
1.80      fvdl      583:        simple_lock(&spechash_slock);
1.29      cgd       584:        for (vp = *vpp; vp; vp = vp->v_specnext) {
                    585:                if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
                    586:                        continue;
                    587:                /*
                    588:                 * Alias, but not in use, so flush it out.
                    589:                 */
1.80      fvdl      590:                simple_lock(&vp->v_interlock);
1.231     mycroft   591:                simple_unlock(&spechash_slock);
1.29      cgd       592:                if (vp->v_usecount == 0) {
1.256     christos  593:                        vgonel(vp, l);
1.29      cgd       594:                        goto loop;
                    595:                }
1.231     mycroft   596:                /*
                    597:                 * What we're interested to know here is if someone else has
                    598:                 * removed this vnode from the device hash list while we were
                    599:                 * waiting.  This can only happen if vclean() did it, and
1.259     yamt      600:                 * this requires the vnode to be locked.
1.231     mycroft   601:                 */
1.259     yamt      602:                if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
1.29      cgd       603:                        goto loop;
1.259     yamt      604:                if (vp->v_specinfo == NULL) {
                    605:                        vput(vp);
                    606:                        goto loop;
                    607:                }
1.231     mycroft   608:                simple_lock(&spechash_slock);
1.29      cgd       609:                break;
                    610:        }
1.34      cgd       611:        if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1.29      cgd       612:                MALLOC(nvp->v_specinfo, struct specinfo *,
1.150     thorpej   613:                        sizeof(struct specinfo), M_VNODE, M_NOWAIT);
                    614:                /* XXX Erg. */
                    615:                if (nvp->v_specinfo == NULL) {
                    616:                        simple_unlock(&spechash_slock);
                    617:                        uvm_wait("checkalias");
                    618:                        goto loop;
                    619:                }
                    620:
1.29      cgd       621:                nvp->v_rdev = nvp_rdev;
                    622:                nvp->v_hashchain = vpp;
                    623:                nvp->v_specnext = *vpp;
1.113     fvdl      624:                nvp->v_specmountpoint = NULL;
1.80      fvdl      625:                simple_unlock(&spechash_slock);
1.62      kleink    626:                nvp->v_speclockf = NULL;
1.216     hannken   627:
1.29      cgd       628:                *vpp = nvp;
1.80      fvdl      629:                if (vp != NULLVP) {
1.29      cgd       630:                        nvp->v_flag |= VALIASED;
                    631:                        vp->v_flag |= VALIASED;
                    632:                        vput(vp);
                    633:                }
                    634:                return (NULLVP);
                    635:        }
1.80      fvdl      636:        simple_unlock(&spechash_slock);
                    637:        VOP_UNLOCK(vp, 0);
                    638:        simple_lock(&vp->v_interlock);
1.256     christos  639:        vclean(vp, 0, l);
1.29      cgd       640:        vp->v_op = nvp->v_op;
                    641:        vp->v_tag = nvp->v_tag;
1.104     wrstuden  642:        vp->v_vnlock = &vp->v_lock;
                    643:        lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1.29      cgd       644:        nvp->v_type = VNON;
                    645:        insmntque(vp, mp);
                    646:        return (vp);
                    647: }
                    648:
                    649: /*
                    650:  * Grab a particular vnode from the free list, increment its
1.83      fvdl      651:  * reference count and lock it. If the vnode lock bit is set the
                    652:  * vnode is being eliminated in vgone. In that case, we can not
                    653:  * grab the vnode, so the process is awakened when the transition is
                    654:  * completed, and an error returned to indicate that the vnode is no
                    655:  * longer usable (possibly having been changed to a new file system type).
1.29      cgd       656:  */
1.30      mycroft   657: int
1.247     thorpej   658: vget(struct vnode *vp, int flags)
1.29      cgd       659: {
1.175     perseant  660:        int error;
1.29      cgd       661:
1.30      mycroft   662:        /*
                    663:         * If the vnode is in the process of being cleaned out for
                    664:         * another use, we wait for the cleaning to finish and then
1.80      fvdl      665:         * return failure. Cleaning is determined by checking that
                    666:         * the VXLOCK flag is set.
                    667:         */
1.142     chs       668:
1.80      fvdl      669:        if ((flags & LK_INTERLOCK) == 0)
                    670:                simple_lock(&vp->v_interlock);
1.257     yamt      671:        if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) {
1.142     chs       672:                if (flags & LK_NOWAIT) {
1.143     sommerfe  673:                        simple_unlock(&vp->v_interlock);
1.142     chs       674:                        return EBUSY;
                    675:                }
1.29      cgd       676:                vp->v_flag |= VXWANT;
1.158     chs       677:                ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1.80      fvdl      678:                return (ENOENT);
1.29      cgd       679:        }
1.80      fvdl      680:        if (vp->v_usecount == 0) {
                    681:                simple_lock(&vnode_free_list_slock);
1.113     fvdl      682:                if (vp->v_holdcnt > 0)
                    683:                        TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                    684:                else
                    685:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.80      fvdl      686:                simple_unlock(&vnode_free_list_slock);
                    687:        }
1.29      cgd       688:        vp->v_usecount++;
1.112     mycroft   689: #ifdef DIAGNOSTIC
                    690:        if (vp->v_usecount == 0) {
                    691:                vprint("vget", vp);
1.125     chs       692:                panic("vget: usecount overflow, vp %p", vp);
1.112     mycroft   693:        }
                    694: #endif
1.80      fvdl      695:        if (flags & LK_TYPE_MASK) {
1.113     fvdl      696:                if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1.257     yamt      697:                        vrele(vp);
1.113     fvdl      698:                }
1.80      fvdl      699:                return (error);
                    700:        }
                    701:        simple_unlock(&vp->v_interlock);
1.29      cgd       702:        return (0);
                    703: }
                    704:
                    705: /*
                    706:  * vput(), just unlock and vrele()
                    707:  */
                    708: void
1.247     thorpej   709: vput(struct vnode *vp)
1.29      cgd       710: {
1.256     christos  711:        struct lwp *l = curlwp;         /* XXX */
1.30      mycroft   712:
1.111     mycroft   713: #ifdef DIAGNOSTIC
1.80      fvdl      714:        if (vp == NULL)
                    715:                panic("vput: null vp");
                    716: #endif
                    717:        simple_lock(&vp->v_interlock);
                    718:        vp->v_usecount--;
                    719:        if (vp->v_usecount > 0) {
                    720:                simple_unlock(&vp->v_interlock);
                    721:                VOP_UNLOCK(vp, 0);
                    722:                return;
                    723:        }
                    724: #ifdef DIAGNOSTIC
                    725:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                    726:                vprint("vput: bad ref count", vp);
                    727:                panic("vput: ref cnt");
                    728:        }
                    729: #endif
                    730:        /*
1.87      pk        731:         * Insert at tail of LRU list.
1.80      fvdl      732:         */
                    733:        simple_lock(&vnode_free_list_slock);
1.113     fvdl      734:        if (vp->v_holdcnt > 0)
                    735:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                    736:        else
                    737:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1.80      fvdl      738:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej   739:        if (vp->v_flag & VEXECMAP) {
1.167     chs       740:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                    741:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs       742:        }
1.270     chs       743:        vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED);
1.80      fvdl      744:        simple_unlock(&vp->v_interlock);
1.256     christos  745:        VOP_INACTIVE(vp, l);
1.29      cgd       746: }
                    747:
                    748: /*
                    749:  * Vnode release.
                    750:  * If count drops to zero, call inactive routine and return to freelist.
                    751:  */
1.298     pooka     752: static void
                    753: do_vrele(struct vnode *vp, int doinactive, int onhead)
1.29      cgd       754: {
1.256     christos  755:        struct lwp *l = curlwp;         /* XXX */
1.29      cgd       756:
                    757: #ifdef DIAGNOSTIC
                    758:        if (vp == NULL)
                    759:                panic("vrele: null vp");
                    760: #endif
1.80      fvdl      761:        simple_lock(&vp->v_interlock);
1.29      cgd       762:        vp->v_usecount--;
1.80      fvdl      763:        if (vp->v_usecount > 0) {
                    764:                simple_unlock(&vp->v_interlock);
1.29      cgd       765:                return;
1.80      fvdl      766:        }
1.29      cgd       767: #ifdef DIAGNOSTIC
1.80      fvdl      768:        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1.29      cgd       769:                vprint("vrele: bad ref count", vp);
1.142     chs       770:                panic("vrele: ref cnt vp %p", vp);
1.29      cgd       771:        }
                    772: #endif
1.30      mycroft   773:        /*
1.87      pk        774:         * Insert at tail of LRU list.
1.30      mycroft   775:         */
1.80      fvdl      776:        simple_lock(&vnode_free_list_slock);
1.298     pooka     777:        if (vp->v_holdcnt > 0) {
1.113     fvdl      778:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1.298     pooka     779:        } else {
                    780:                if (onhead)
                    781:                        TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                    782:                else
                    783:                        TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                    784:        }
1.80      fvdl      785:        simple_unlock(&vnode_free_list_slock);
1.161     thorpej   786:        if (vp->v_flag & VEXECMAP) {
1.167     chs       787:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                    788:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs       789:        }
1.270     chs       790:        vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED);
1.298     pooka     791:
                    792:        if (doinactive) {
                    793:                if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
                    794:                        VOP_INACTIVE(vp, l);
                    795:        } else {
                    796:                simple_unlock(&vp->v_interlock);
                    797:        }
                    798: }
                    799:
                    800: void
                    801: vrele(struct vnode *vp)
                    802: {
                    803:
                    804:        do_vrele(vp, 1, 0);
                    805: }
                    806:
                    807: void
                    808: vrele2(struct vnode *vp, int onhead)
                    809: {
                    810:
                    811:        do_vrele(vp, 0, onhead);
1.29      cgd       812: }
                    813:
                    814: /*
                    815:  * Page or buffer structure gets a reference.
1.258     chs       816:  * Called with v_interlock held.
1.29      cgd       817:  */
1.30      mycroft   818: void
1.247     thorpej   819: vholdl(struct vnode *vp)
1.29      cgd       820: {
                    821:
1.113     fvdl      822:        /*
                    823:         * If it is on the freelist and the hold count is currently
                    824:         * zero, move it to the hold list. The test of the back
                    825:         * pointer and the use reference count of zero is because
                    826:         * it will be removed from a free list by getnewvnode,
                    827:         * but will not have its reference count incremented until
                    828:         * after calling vgone. If the reference count were
                    829:         * incremented first, vgone would (incorrectly) try to
                    830:         * close the previous instance of the underlying object.
                    831:         * So, the back pointer is explicitly set to `0xdeadb' in
                    832:         * getnewvnode after removing it from a freelist to ensure
                    833:         * that we do not try to move it here.
                    834:         */
                    835:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                    836:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                    837:                simple_lock(&vnode_free_list_slock);
                    838:                TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
                    839:                TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
                    840:                simple_unlock(&vnode_free_list_slock);
                    841:        }
1.29      cgd       842:        vp->v_holdcnt++;
                    843: }
                    844:
                    845: /*
                    846:  * Page or buffer structure frees a reference.
1.258     chs       847:  * Called with v_interlock held.
1.29      cgd       848:  */
1.30      mycroft   849: void
1.247     thorpej   850: holdrelel(struct vnode *vp)
1.29      cgd       851: {
                    852:
                    853:        if (vp->v_holdcnt <= 0)
1.215     yamt      854:                panic("holdrelel: holdcnt vp %p", vp);
1.29      cgd       855:        vp->v_holdcnt--;
1.142     chs       856:
1.113     fvdl      857:        /*
                    858:         * If it is on the holdlist and the hold count drops to
                    859:         * zero, move it to the free list. The test of the back
                    860:         * pointer and the use reference count of zero is because
                    861:         * it will be removed from a free list by getnewvnode,
                    862:         * but will not have its reference count incremented until
                    863:         * after calling vgone. If the reference count were
                    864:         * incremented first, vgone would (incorrectly) try to
                    865:         * close the previous instance of the underlying object.
                    866:         * So, the back pointer is explicitly set to `0xdeadb' in
                    867:         * getnewvnode after removing it from a freelist to ensure
                    868:         * that we do not try to move it here.
                    869:         */
1.142     chs       870:
1.113     fvdl      871:        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
                    872:            vp->v_holdcnt == 0 && vp->v_usecount == 0) {
                    873:                simple_lock(&vnode_free_list_slock);
                    874:                TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
                    875:                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                    876:                simple_unlock(&vnode_free_list_slock);
                    877:        }
1.81      ross      878: }
                    879:
                    880: /*
                    881:  * Vnode reference.
                    882:  */
                    883: void
1.247     thorpej   884: vref(struct vnode *vp)
1.81      ross      885: {
                    886:
                    887:        simple_lock(&vp->v_interlock);
                    888:        if (vp->v_usecount <= 0)
1.125     chs       889:                panic("vref used where vget required, vp %p", vp);
1.81      ross      890:        vp->v_usecount++;
1.112     mycroft   891: #ifdef DIAGNOSTIC
                    892:        if (vp->v_usecount == 0) {
                    893:                vprint("vref", vp);
1.125     chs       894:                panic("vref: usecount overflow, vp %p", vp);
1.112     mycroft   895:        }
                    896: #endif
1.80      fvdl      897:        simple_unlock(&vp->v_interlock);
1.29      cgd       898: }
                    899:
                    900: /*
                    901:  * Remove any vnodes in the vnode table belonging to mount point mp.
                    902:  *
1.183     yamt      903:  * If FORCECLOSE is not specified, there should not be any active ones,
1.29      cgd       904:  * return error if any are found (nb: this is a user error, not a
1.183     yamt      905:  * system error). If FORCECLOSE is specified, detach any active vnodes
1.29      cgd       906:  * that are found.
1.183     yamt      907:  *
                    908:  * If WRITECLOSE is set, only flush out regular file vnodes open for
                    909:  * writing.
                    910:  *
                    911:  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1.29      cgd       912:  */
1.30      mycroft   913: #ifdef DEBUG
                    914: int busyprt = 0;       /* print out busy vnodes */
                    915: struct ctldebug debug1 = { "busyprt", &busyprt };
                    916: #endif
1.29      cgd       917:
1.50      christos  918: int
1.247     thorpej   919: vflush(struct mount *mp, struct vnode *skipvp, int flags)
1.29      cgd       920: {
1.256     christos  921:        struct lwp *l = curlwp;         /* XXX */
1.273     reinoud   922:        struct vnode *vp, *nvp;
1.29      cgd       923:        int busy = 0;
                    924:
1.80      fvdl      925:        simple_lock(&mntvnode_slock);
1.29      cgd       926: loop:
1.273     reinoud   927:        /*
                    928:         * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
                    929:         * and vclean() are called
                    930:         */
                    931:        for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1.29      cgd       932:                if (vp->v_mount != mp)
                    933:                        goto loop;
1.273     reinoud   934:                nvp = TAILQ_NEXT(vp, v_mntvnodes);
1.29      cgd       935:                /*
                    936:                 * Skip over a selected vnode.
                    937:                 */
                    938:                if (vp == skipvp)
                    939:                        continue;
1.80      fvdl      940:                simple_lock(&vp->v_interlock);
1.29      cgd       941:                /*
                    942:                 * Skip over a vnodes marked VSYSTEM.
                    943:                 */
1.80      fvdl      944:                if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
                    945:                        simple_unlock(&vp->v_interlock);
1.29      cgd       946:                        continue;
1.80      fvdl      947:                }
1.29      cgd       948:                /*
1.30      mycroft   949:                 * If WRITECLOSE is set, only flush out regular file
                    950:                 * vnodes open for writing.
                    951:                 */
                    952:                if ((flags & WRITECLOSE) &&
1.92      thorpej   953:                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
                    954:                        simple_unlock(&vp->v_interlock);
1.30      mycroft   955:                        continue;
1.92      thorpej   956:                }
1.30      mycroft   957:                /*
1.29      cgd       958:                 * With v_usecount == 0, all we need to do is clear
                    959:                 * out the vnode data structures and we are done.
                    960:                 */
                    961:                if (vp->v_usecount == 0) {
1.80      fvdl      962:                        simple_unlock(&mntvnode_slock);
1.256     christos  963:                        vgonel(vp, l);
1.80      fvdl      964:                        simple_lock(&mntvnode_slock);
1.29      cgd       965:                        continue;
                    966:                }
                    967:                /*
1.30      mycroft   968:                 * If FORCECLOSE is set, forcibly close the vnode.
1.29      cgd       969:                 * For block or character devices, revert to an
                    970:                 * anonymous device. For all other files, just kill them.
                    971:                 */
                    972:                if (flags & FORCECLOSE) {
1.80      fvdl      973:                        simple_unlock(&mntvnode_slock);
1.29      cgd       974:                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1.256     christos  975:                                vgonel(vp, l);
1.29      cgd       976:                        } else {
1.256     christos  977:                                vclean(vp, 0, l);
1.30      mycroft   978:                                vp->v_op = spec_vnodeop_p;
1.29      cgd       979:                                insmntque(vp, (struct mount *)0);
                    980:                        }
1.80      fvdl      981:                        simple_lock(&mntvnode_slock);
1.29      cgd       982:                        continue;
                    983:                }
1.30      mycroft   984: #ifdef DEBUG
1.29      cgd       985:                if (busyprt)
                    986:                        vprint("vflush: busy vnode", vp);
1.30      mycroft   987: #endif
1.80      fvdl      988:                simple_unlock(&vp->v_interlock);
1.29      cgd       989:                busy++;
                    990:        }
1.80      fvdl      991:        simple_unlock(&mntvnode_slock);
1.29      cgd       992:        if (busy)
                    993:                return (EBUSY);
                    994:        return (0);
                    995: }
                    996:
                    997: /*
                    998:  * Disassociate the underlying file system from a vnode.
                    999:  */
1.260     yamt     1000: static void
1.256     christos 1001: vclean(struct vnode *vp, int flags, struct lwp *l)
1.29      cgd      1002: {
1.175     perseant 1003:        int active;
1.29      cgd      1004:
1.166     chs      1005:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1006:
1.29      cgd      1007:        /*
                   1008:         * Check to see if the vnode is in use.
                   1009:         * If so we have to reference it before we clean it out
                   1010:         * so that its count cannot fall to zero and generate a
                   1011:         * race against ourselves to recycle it.
                   1012:         */
1.166     chs      1013:
1.112     mycroft  1014:        if ((active = vp->v_usecount) != 0) {
1.87      pk       1015:                vp->v_usecount++;
1.112     mycroft  1016: #ifdef DIAGNOSTIC
                   1017:                if (vp->v_usecount == 0) {
                   1018:                        vprint("vclean", vp);
                   1019:                        panic("vclean: usecount overflow");
                   1020:                }
                   1021: #endif
                   1022:        }
1.87      pk       1023:
1.29      cgd      1024:        /*
                   1025:         * Prevent the vnode from being recycled or
                   1026:         * brought into use while we clean it out.
                   1027:         */
                   1028:        if (vp->v_flag & VXLOCK)
1.125     chs      1029:                panic("vclean: deadlock, vp %p", vp);
1.29      cgd      1030:        vp->v_flag |= VXLOCK;
1.161     thorpej  1031:        if (vp->v_flag & VEXECMAP) {
1.167     chs      1032:                uvmexp.execpages -= vp->v_uobj.uo_npages;
                   1033:                uvmexp.filepages += vp->v_uobj.uo_npages;
1.147     chs      1034:        }
1.161     thorpej  1035:        vp->v_flag &= ~(VTEXT|VEXECMAP);
1.142     chs      1036:
1.29      cgd      1037:        /*
1.80      fvdl     1038:         * Even if the count is zero, the VOP_INACTIVE routine may still
1.289     pooka    1039:         * have the object locked while it cleans it out.  For
                   1040:         * active vnodes, it ensures that no other activity can
1.80      fvdl     1041:         * occur while the underlying object is being cleaned out.
1.289     pooka    1042:         *
1.295     pooka    1043:         * We drain the lock to make sure we are the last one trying to
                   1044:         * get it and immediately resurrect the lock.  Future accesses
                   1045:         * for locking this _vnode_ will be protected by VXLOCK.  However,
                   1046:         * upper layers might be using the _lock_ in case the file system
                   1047:         * exported it and might access it while the vnode lingers in
                   1048:         * deadfs.
1.80      fvdl     1049:         */
1.295     pooka    1050:        VOP_LOCK(vp, LK_DRAIN | LK_RESURRECT | LK_INTERLOCK);
1.80      fvdl     1051:
1.98      wrstuden 1052:        /*
1.142     chs      1053:         * Clean out any cached data associated with the vnode.
1.231     mycroft  1054:         * If special device, remove it from special device alias list.
                   1055:         * if it is on one.
1.29      cgd      1056:         */
1.166     chs      1057:        if (flags & DOCLOSE) {
1.211     dbj      1058:                int error;
1.231     mycroft  1059:                struct vnode *vq, *vx;
                   1060:
1.256     christos 1061:                error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1.211     dbj      1062:                if (error)
1.256     christos 1063:                        error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1.211     dbj      1064:                KASSERT(error == 0);
1.166     chs      1065:                KASSERT((vp->v_flag & VONWORKLST) == 0);
1.231     mycroft  1066:
                   1067:                if (active)
                   1068:                        VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
                   1069:
                   1070:                if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
                   1071:                    vp->v_specinfo != 0) {
                   1072:                        simple_lock(&spechash_slock);
                   1073:                        if (vp->v_hashchain != NULL) {
                   1074:                                if (*vp->v_hashchain == vp) {
                   1075:                                        *vp->v_hashchain = vp->v_specnext;
                   1076:                                } else {
                   1077:                                        for (vq = *vp->v_hashchain; vq;
                   1078:                                             vq = vq->v_specnext) {
                   1079:                                                if (vq->v_specnext != vp)
                   1080:                                                        continue;
                   1081:                                                vq->v_specnext = vp->v_specnext;
                   1082:                                                break;
                   1083:                                        }
                   1084:                                        if (vq == NULL)
                   1085:                                                panic("missing bdev");
                   1086:                                }
                   1087:                                if (vp->v_flag & VALIASED) {
                   1088:                                        vx = NULL;
                   1089:                                                for (vq = *vp->v_hashchain; vq;
                   1090:                                                     vq = vq->v_specnext) {
                   1091:                                                if (vq->v_rdev != vp->v_rdev ||
                   1092:                                                    vq->v_type != vp->v_type)
                   1093:                                                        continue;
                   1094:                                                if (vx)
                   1095:                                                        break;
                   1096:                                                vx = vq;
                   1097:                                        }
                   1098:                                        if (vx == NULL)
                   1099:                                                panic("missing alias");
                   1100:                                        if (vq == NULL)
                   1101:                                                vx->v_flag &= ~VALIASED;
                   1102:                                        vp->v_flag &= ~VALIASED;
                   1103:                                }
                   1104:                        }
                   1105:                        simple_unlock(&spechash_slock);
                   1106:                        FREE(vp->v_specinfo, M_VNODE);
                   1107:                        vp->v_specinfo = NULL;
                   1108:                }
1.166     chs      1109:        }
                   1110:        LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1.80      fvdl     1111:
1.29      cgd      1112:        /*
1.30      mycroft  1113:         * If purging an active vnode, it must be closed and
1.80      fvdl     1114:         * deactivated before being reclaimed. Note that the
                   1115:         * VOP_INACTIVE will unlock the vnode.
1.29      cgd      1116:         */
                   1117:        if (active) {
1.256     christos 1118:                VOP_INACTIVE(vp, l);
1.80      fvdl     1119:        } else {
                   1120:                /*
                   1121:                 * Any other processes trying to obtain this lock must first
                   1122:                 * wait for VXLOCK to clear, then call the new lock operation.
                   1123:                 */
                   1124:                VOP_UNLOCK(vp, 0);
1.29      cgd      1125:        }
                   1126:        /*
                   1127:         * Reclaim the vnode.
                   1128:         */
1.256     christos 1129:        if (VOP_RECLAIM(vp, l))
1.125     chs      1130:                panic("vclean: cannot reclaim, vp %p", vp);
1.87      pk       1131:        if (active) {
                   1132:                /*
                   1133:                 * Inline copy of vrele() since VOP_INACTIVE
                   1134:                 * has already been called.
                   1135:                 */
                   1136:                simple_lock(&vp->v_interlock);
                   1137:                if (--vp->v_usecount <= 0) {
                   1138: #ifdef DIAGNOSTIC
                   1139:                        if (vp->v_usecount < 0 || vp->v_writecount != 0) {
                   1140:                                vprint("vclean: bad ref count", vp);
                   1141:                                panic("vclean: ref cnt");
                   1142:                        }
                   1143: #endif
                   1144:                        /*
                   1145:                         * Insert at tail of LRU list.
                   1146:                         */
1.142     chs      1147:
1.113     fvdl     1148:                        simple_unlock(&vp->v_interlock);
1.87      pk       1149:                        simple_lock(&vnode_free_list_slock);
1.104     wrstuden 1150: #ifdef DIAGNOSTIC
1.113     fvdl     1151:                        if (vp->v_holdcnt > 0)
1.125     chs      1152:                                panic("vclean: not clean, vp %p", vp);
1.104     wrstuden 1153: #endif
1.87      pk       1154:                        TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
                   1155:                        simple_unlock(&vnode_free_list_slock);
1.113     fvdl     1156:                } else
                   1157:                        simple_unlock(&vp->v_interlock);
1.87      pk       1158:        }
1.30      mycroft  1159:
1.169     chs      1160:        KASSERT(vp->v_uobj.uo_npages == 0);
1.255     yamt     1161:        if (vp->v_type == VREG && vp->v_ractx != NULL) {
                   1162:                uvm_ra_freectx(vp->v_ractx);
                   1163:                vp->v_ractx = NULL;
                   1164:        }
1.80      fvdl     1165:        cache_purge(vp);
                   1166:
1.29      cgd      1167:        /*
1.30      mycroft  1168:         * Done with purge, notify sleepers of the grim news.
1.29      cgd      1169:         */
1.30      mycroft  1170:        vp->v_op = dead_vnodeop_p;
                   1171:        vp->v_tag = VT_NON;
1.289     pooka    1172:        vp->v_vnlock = NULL;
1.139     enami    1173:        simple_lock(&vp->v_interlock);
1.181     jdolecek 1174:        VN_KNOTE(vp, NOTE_REVOKE);      /* FreeBSD has this in vn_pollgone() */
1.234     thorpej  1175:        vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1.29      cgd      1176:        if (vp->v_flag & VXWANT) {
                   1177:                vp->v_flag &= ~VXWANT;
1.139     enami    1178:                simple_unlock(&vp->v_interlock);
1.283     christos 1179:                wakeup((void *)vp);
1.139     enami    1180:        } else
                   1181:                simple_unlock(&vp->v_interlock);
1.29      cgd      1182: }
                   1183:
                   1184: /*
1.80      fvdl     1185:  * Recycle an unused vnode to the front of the free list.
                   1186:  * Release the passed interlock if the vnode will be recycled.
1.29      cgd      1187:  */
1.80      fvdl     1188: int
1.256     christos 1189: vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l)
1.217     junyoung 1190: {
                   1191:
1.80      fvdl     1192:        simple_lock(&vp->v_interlock);
                   1193:        if (vp->v_usecount == 0) {
                   1194:                if (inter_lkp)
                   1195:                        simple_unlock(inter_lkp);
1.256     christos 1196:                vgonel(vp, l);
1.80      fvdl     1197:                return (1);
1.29      cgd      1198:        }
1.80      fvdl     1199:        simple_unlock(&vp->v_interlock);
                   1200:        return (0);
1.29      cgd      1201: }
                   1202:
                   1203: /*
                   1204:  * Eliminate all activity associated with a vnode
                   1205:  * in preparation for reuse.
                   1206:  */
                   1207: void
1.247     thorpej  1208: vgone(struct vnode *vp)
1.80      fvdl     1209: {
1.256     christos 1210:        struct lwp *l = curlwp;         /* XXX */
1.80      fvdl     1211:
                   1212:        simple_lock(&vp->v_interlock);
1.256     christos 1213:        vgonel(vp, l);
1.80      fvdl     1214: }
                   1215:
                   1216: /*
                   1217:  * vgone, with the vp interlock held.
                   1218:  */
                   1219: void
1.256     christos 1220: vgonel(struct vnode *vp, struct lwp *l)
1.29      cgd      1221: {
                   1222:
1.166     chs      1223:        LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
                   1224:
1.29      cgd      1225:        /*
                   1226:         * If a vgone (or vclean) is already in progress,
                   1227:         * wait until it is done and return.
                   1228:         */
1.166     chs      1229:
1.29      cgd      1230:        if (vp->v_flag & VXLOCK) {
                   1231:                vp->v_flag |= VXWANT;
1.166     chs      1232:                ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1.29      cgd      1233:                return;
                   1234:        }
1.166     chs      1235:
1.29      cgd      1236:        /*
                   1237:         * Clean out the filesystem specific data.
                   1238:         */
1.166     chs      1239:
1.256     christos 1240:        vclean(vp, DOCLOSE, l);
1.166     chs      1241:        KASSERT((vp->v_flag & VONWORKLST) == 0);
                   1242:
1.29      cgd      1243:        /*
                   1244:         * Delete from old mount point vnode list, if on one.
                   1245:         */
1.166     chs      1246:
1.80      fvdl     1247:        if (vp->v_mount != NULL)
                   1248:                insmntque(vp, (struct mount *)0);
1.166     chs      1249:
1.29      cgd      1250:        /*
1.202     yamt     1251:         * The test of the back pointer and the reference count of
                   1252:         * zero is because it will be removed from the free list by
                   1253:         * getcleanvnode, but will not have its reference count
                   1254:         * incremented until after calling vgone. If the reference
                   1255:         * count were incremented first, vgone would (incorrectly)
                   1256:         * try to close the previous instance of the underlying object.
1.30      mycroft  1257:         * So, the back pointer is explicitly set to `0xdeadb' in
                   1258:         * getnewvnode after removing it from the freelist to ensure
                   1259:         * that we do not try to move it here.
1.29      cgd      1260:         */
1.166     chs      1261:
1.202     yamt     1262:        vp->v_type = VBAD;
1.80      fvdl     1263:        if (vp->v_usecount == 0) {
1.282     thorpej  1264:                bool dofree;
1.202     yamt     1265:
1.80      fvdl     1266:                simple_lock(&vnode_free_list_slock);
1.113     fvdl     1267:                if (vp->v_holdcnt > 0)
1.125     chs      1268:                        panic("vgonel: not clean, vp %p", vp);
1.202     yamt     1269:                /*
                   1270:                 * if it isn't on the freelist, we're called by getcleanvnode
                   1271:                 * and vnode is being re-used.  otherwise, we'll free it.
                   1272:                 */
                   1273:                dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
                   1274:                if (dofree) {
1.80      fvdl     1275:                        TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1.202     yamt     1276:                        numvnodes--;
1.80      fvdl     1277:                }
                   1278:                simple_unlock(&vnode_free_list_slock);
1.202     yamt     1279:                if (dofree)
                   1280:                        pool_put(&vnode_pool, vp);
1.29      cgd      1281:        }
                   1282: }
                   1283:
                   1284: /*
                   1285:  * Lookup a vnode by device number.
                   1286:  */
1.50      christos 1287: int
1.247     thorpej  1288: vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1.29      cgd      1289: {
1.80      fvdl     1290:        struct vnode *vp;
                   1291:        int rc = 0;
1.29      cgd      1292:
1.80      fvdl     1293:        simple_lock(&spechash_slock);
1.29      cgd      1294:        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                   1295:                if (dev != vp->v_rdev || type != vp->v_type)
                   1296:                        continue;
                   1297:                *vpp = vp;
1.80      fvdl     1298:                rc = 1;
                   1299:                break;
1.29      cgd      1300:        }
1.80      fvdl     1301:        simple_unlock(&spechash_slock);
                   1302:        return (rc);
1.96      thorpej  1303: }
                   1304:
                   1305: /*
                   1306:  * Revoke all the vnodes corresponding to the specified minor number
                   1307:  * range (endpoints inclusive) of the specified major.
                   1308:  */
                   1309: void
1.247     thorpej  1310: vdevgone(int maj, int minl, int minh, enum vtype type)
1.96      thorpej  1311: {
                   1312:        struct vnode *vp;
                   1313:        int mn;
                   1314:
1.274     mrg      1315:        vp = NULL;      /* XXX gcc */
                   1316:
1.96      thorpej  1317:        for (mn = minl; mn <= minh; mn++)
                   1318:                if (vfinddev(makedev(maj, mn), type, &vp))
                   1319:                        VOP_REVOKE(vp, REVOKEALL);
1.29      cgd      1320: }
                   1321:
                   1322: /*
                   1323:  * Calculate the total number of references to a special device.
                   1324:  */
1.30      mycroft  1325: int
1.247     thorpej  1326: vcount(struct vnode *vp)
1.29      cgd      1327: {
1.123     augustss 1328:        struct vnode *vq, *vnext;
1.29      cgd      1329:        int count;
                   1330:
                   1331: loop:
                   1332:        if ((vp->v_flag & VALIASED) == 0)
                   1333:                return (vp->v_usecount);
1.80      fvdl     1334:        simple_lock(&spechash_slock);
1.30      mycroft  1335:        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
                   1336:                vnext = vq->v_specnext;
1.29      cgd      1337:                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
                   1338:                        continue;
                   1339:                /*
                   1340:                 * Alias, but not in use, so flush it out.
                   1341:                 */
1.151     wrstuden 1342:                if (vq->v_usecount == 0 && vq != vp &&
                   1343:                    (vq->v_flag & VXLOCK) == 0) {
1.80      fvdl     1344:                        simple_unlock(&spechash_slock);
1.29      cgd      1345:                        vgone(vq);
                   1346:                        goto loop;
                   1347:                }
                   1348:                count += vq->v_usecount;
                   1349:        }
1.80      fvdl     1350:        simple_unlock(&spechash_slock);
1.29      cgd      1351:        return (count);
                   1352: }
                   1353:
                   1354:
1.101     mrg      1355: /*
1.220     lukem    1356:  * sysctl helper routine to return list of supported fstypes
                   1357:  */
                   1358: static int
                   1359: sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
                   1360: {
1.291     christos 1361:        char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
1.220     lukem    1362:        char *where = oldp;
                   1363:        struct vfsops *v;
                   1364:        size_t needed, left, slen;
                   1365:        int error, first;
                   1366:
                   1367:        if (newp != NULL)
                   1368:                return (EPERM);
                   1369:        if (namelen != 0)
                   1370:                return (EINVAL);
                   1371:
                   1372:        first = 1;
                   1373:        error = 0;
                   1374:        needed = 0;
                   1375:        left = *oldlenp;
                   1376:
                   1377:        LIST_FOREACH(v, &vfs_list, vfs_list) {
                   1378:                if (where == NULL)
                   1379:                        needed += strlen(v->vfs_name) + 1;
                   1380:                else {
1.245     christos 1381:                        memset(bf, 0, sizeof(bf));
1.220     lukem    1382:                        if (first) {
1.245     christos 1383:                                strncpy(bf, v->vfs_name, sizeof(bf));
1.220     lukem    1384:                                first = 0;
                   1385:                        } else {
1.245     christos 1386:                                bf[0] = ' ';
                   1387:                                strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1.220     lukem    1388:                        }
1.245     christos 1389:                        bf[sizeof(bf)-1] = '\0';
                   1390:                        slen = strlen(bf);
1.220     lukem    1391:                        if (left < slen + 1)
                   1392:                                break;
                   1393:                        /* +1 to copy out the trailing NUL byte */
1.245     christos 1394:                        error = copyout(bf, where, slen + 1);
1.220     lukem    1395:                        if (error)
                   1396:                                break;
                   1397:                        where += slen;
                   1398:                        needed += slen;
                   1399:                        left -= slen;
                   1400:                }
                   1401:        }
                   1402:        *oldlenp = needed;
                   1403:        return (error);
                   1404: }
                   1405:
                   1406: /*
1.80      fvdl     1407:  * Top level filesystem related information gathering.
                   1408:  */
1.212     atatat   1409: SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1.80      fvdl     1410: {
1.218     atatat   1411:        sysctl_createv(clog, 0, NULL, NULL,
                   1412:                       CTLFLAG_PERMANENT,
1.212     atatat   1413:                       CTLTYPE_NODE, "vfs", NULL,
                   1414:                       NULL, 0, NULL, 0,
                   1415:                       CTL_VFS, CTL_EOL);
1.218     atatat   1416:        sysctl_createv(clog, 0, NULL, NULL,
                   1417:                       CTLFLAG_PERMANENT,
1.226     atatat   1418:                       CTLTYPE_NODE, "generic",
                   1419:                       SYSCTL_DESCR("Non-specific vfs related information"),
1.212     atatat   1420:                       NULL, 0, NULL, 0,
                   1421:                       CTL_VFS, VFS_GENERIC, CTL_EOL);
1.218     atatat   1422:        sysctl_createv(clog, 0, NULL, NULL,
                   1423:                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.226     atatat   1424:                       CTLTYPE_INT, "usermount",
                   1425:                       SYSCTL_DESCR("Whether unprivileged users may mount "
                   1426:                                    "filesystems"),
1.212     atatat   1427:                       NULL, 0, &dovfsusermount, 0,
                   1428:                       CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1.220     lukem    1429:        sysctl_createv(clog, 0, NULL, NULL,
                   1430:                       CTLFLAG_PERMANENT,
                   1431:                       CTLTYPE_STRING, "fstypes",
                   1432:                       SYSCTL_DESCR("List of file systems present"),
                   1433:                       sysctl_vfs_generic_fstypes, 0, NULL, 0,
                   1434:                       CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1.263     chs      1435:        sysctl_createv(clog, 0, NULL, NULL,
                   1436:                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                   1437:                       CTLTYPE_INT, "magiclinks",
                   1438:                       SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"),
                   1439:                       NULL, 0, &vfs_magiclinks, 0,
                   1440:                       CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL);
1.80      fvdl     1441: }
                   1442:
1.212     atatat   1443:
1.29      cgd      1444: int kinfo_vdebug = 1;
                   1445: int kinfo_vgetfailed;
                   1446: #define KINFO_VNODESLOP        10
                   1447: /*
                   1448:  * Dump vnode list (via sysctl).
                   1449:  * Copyout address of vnode followed by vnode.
                   1450:  */
                   1451: /* ARGSUSED */
1.50      christos 1452: int
1.212     atatat   1453: sysctl_kern_vnode(SYSCTLFN_ARGS)
1.29      cgd      1454: {
1.212     atatat   1455:        char *where = oldp;
                   1456:        size_t *sizep = oldlenp;
1.80      fvdl     1457:        struct mount *mp, *nmp;
1.272     reinoud  1458:        struct vnode *vp;
1.80      fvdl     1459:        char *bp = where, *savebp;
1.29      cgd      1460:        char *ewhere;
                   1461:        int error;
1.212     atatat   1462:
                   1463:        if (namelen != 0)
                   1464:                return (EOPNOTSUPP);
                   1465:        if (newp != NULL)
                   1466:                return (EPERM);
1.29      cgd      1467:
1.90      perry    1468: #define VPTRSZ sizeof(struct vnode *)
                   1469: #define VNODESZ        sizeof(struct vnode)
1.29      cgd      1470:        if (where == NULL) {
                   1471:                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                   1472:                return (0);
                   1473:        }
                   1474:        ewhere = where + *sizep;
1.80      fvdl     1475:
                   1476:        simple_lock(&mountlist_slock);
1.177     matt     1477:        for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
                   1478:             mp = nmp) {
1.80      fvdl     1479:                if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1.177     matt     1480:                        nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      1481:                        continue;
1.80      fvdl     1482:                }
1.29      cgd      1483:                savebp = bp;
                   1484: again:
1.80      fvdl     1485:                simple_lock(&mntvnode_slock);
1.272     reinoud  1486:                TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1.29      cgd      1487:                        /*
                   1488:                         * Check that the vp is still associated with
                   1489:                         * this filesystem.  RACE: could have been
                   1490:                         * recycled onto the same filesystem.
                   1491:                         */
                   1492:                        if (vp->v_mount != mp) {
1.80      fvdl     1493:                                simple_unlock(&mntvnode_slock);
1.29      cgd      1494:                                if (kinfo_vdebug)
1.57      christos 1495:                                        printf("kinfo: vp changed\n");
1.29      cgd      1496:                                bp = savebp;
                   1497:                                goto again;
                   1498:                        }
                   1499:                        if (bp + VPTRSZ + VNODESZ > ewhere) {
1.80      fvdl     1500:                                simple_unlock(&mntvnode_slock);
1.29      cgd      1501:                                *sizep = bp - where;
                   1502:                                return (ENOMEM);
                   1503:                        }
1.80      fvdl     1504:                        simple_unlock(&mntvnode_slock);
1.283     christos 1505:                        if ((error = copyout((void *)&vp, bp, VPTRSZ)) ||
                   1506:                           (error = copyout((void *)vp, bp + VPTRSZ, VNODESZ)))
1.29      cgd      1507:                                return (error);
                   1508:                        bp += VPTRSZ + VNODESZ;
1.80      fvdl     1509:                        simple_lock(&mntvnode_slock);
1.29      cgd      1510:                }
1.80      fvdl     1511:                simple_unlock(&mntvnode_slock);
                   1512:                simple_lock(&mountlist_slock);
1.177     matt     1513:                nmp = CIRCLEQ_NEXT(mp, mnt_list);
1.29      cgd      1514:                vfs_unbusy(mp);
                   1515:        }
1.80      fvdl     1516:        simple_unlock(&mountlist_slock);
1.29      cgd      1517:
                   1518:        *sizep = bp - where;
                   1519:        return (0);
1.30      mycroft  1520: }
                   1521:
                   1522: /*
                   1523:  * Check to see if a filesystem is mounted on a block device.
                   1524:  */
                   1525: int
1.247     thorpej  1526: vfs_mountedon(struct vnode *vp)
1.30      mycroft  1527: {
1.80      fvdl     1528:        struct vnode *vq;
                   1529:        int error = 0;
1.30      mycroft  1530:
1.261     reinoud  1531:        if (vp->v_type != VBLK)
                   1532:                return ENOTBLK;
1.113     fvdl     1533:        if (vp->v_specmountpoint != NULL)
1.30      mycroft  1534:                return (EBUSY);
                   1535:        if (vp->v_flag & VALIASED) {
1.80      fvdl     1536:                simple_lock(&spechash_slock);
1.30      mycroft  1537:                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                   1538:                        if (vq->v_rdev != vp->v_rdev ||
                   1539:                            vq->v_type != vp->v_type)
                   1540:                                continue;
1.113     fvdl     1541:                        if (vq->v_specmountpoint != NULL) {
1.80      fvdl     1542:                                error = EBUSY;
                   1543:                                break;
                   1544:                        }
1.30      mycroft  1545:                }
1.80      fvdl     1546:                simple_unlock(&spechash_slock);
1.30      mycroft  1547:        }
1.80      fvdl     1548:        return (error);
1.30      mycroft  1549: }
                   1550:
1.35      ws       1551: /*
1.39      mycroft  1552:  * Unmount all file systems.
                   1553:  * We traverse the list in reverse order under the assumption that doing so
                   1554:  * will avoid needing to worry about dependencies.
                   1555:  */
                   1556: void
1.256     christos 1557: vfs_unmountall(struct lwp *l)
1.39      mycroft  1558: {
1.123     augustss 1559:        struct mount *mp, *nmp;
1.40      mycroft  1560:        int allerror, error;
1.39      mycroft  1561:
1.235     lukem    1562:        printf("unmounting file systems...");
1.39      mycroft  1563:        for (allerror = 0,
                   1564:             mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
                   1565:                nmp = mp->mnt_list.cqe_prev;
1.54      jtk      1566: #ifdef DEBUG
1.235     lukem    1567:                printf("\nunmounting %s (%s)...",
1.56      christos 1568:                    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1.54      jtk      1569: #endif
1.149     thorpej  1570:                /*
                   1571:                 * XXX Freeze syncer.  Must do this before locking the
                   1572:                 * mount point.  See dounmount() for details.
                   1573:                 */
1.281     ad       1574:                mutex_enter(&syncer_mutex);
1.149     thorpej  1575:                if (vfs_busy(mp, 0, 0)) {
1.281     ad       1576:                        mutex_exit(&syncer_mutex);
1.60      fvdl     1577:                        continue;
1.149     thorpej  1578:                }
1.256     christos 1579:                if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
1.57      christos 1580:                        printf("unmount of %s failed with error %d\n",
1.40      mycroft  1581:                            mp->mnt_stat.f_mntonname, error);
                   1582:                        allerror = 1;
                   1583:                }
1.39      mycroft  1584:        }
1.235     lukem    1585:        printf(" done\n");
1.39      mycroft  1586:        if (allerror)
1.57      christos 1587:                printf("WARNING: some file systems would not unmount\n");
1.40      mycroft  1588: }
                   1589:
1.205     yamt     1590: extern struct simplelock bqueue_slock; /* XXX */
                   1591:
1.40      mycroft  1592: /*
                   1593:  * Sync and unmount file systems before shutting down.
                   1594:  */
                   1595: void
1.247     thorpej  1596: vfs_shutdown(void)
1.40      mycroft  1597: {
1.265     skrll    1598:        struct lwp *l;
1.40      mycroft  1599:
1.265     skrll    1600:        /* XXX we're certainly not running in lwp0's context! */
                   1601:        l = curlwp;
                   1602:        if (l == NULL)
                   1603:                l = &lwp0;
1.185     christos 1604:
1.70      cgd      1605:        printf("syncing disks... ");
                   1606:
1.138     bouyer   1607:        /* remove user process from run queue */
                   1608:        suspendsched();
1.40      mycroft  1609:        (void) spl0();
                   1610:
1.128     sommerfe 1611:        /* avoid coming back this way again if we panic. */
                   1612:        doing_shutdown = 1;
                   1613:
1.184     thorpej  1614:        sys_sync(l, NULL, NULL);
1.40      mycroft  1615:
                   1616:        /* Wait for sync to finish. */
1.213     pk       1617:        if (buf_syncwait() != 0) {
1.124     augustss 1618: #if defined(DDB) && defined(DEBUG_HALT_BUSY)
                   1619:                Debugger();
                   1620: #endif
1.57      christos 1621:                printf("giving up\n");
1.84      thorpej  1622:                return;
1.73      thorpej  1623:        } else
1.57      christos 1624:                printf("done\n");
1.73      thorpej  1625:
1.84      thorpej  1626:        /*
                   1627:         * If we've panic'd, don't make the situation potentially
                   1628:         * worse by unmounting the file systems.
                   1629:         */
                   1630:        if (panicstr != NULL)
                   1631:                return;
                   1632:
                   1633:        /* Release inodes held by texts before update. */
1.73      thorpej  1634: #ifdef notdef
1.84      thorpej  1635:        vnshutdown();
1.73      thorpej  1636: #endif
1.84      thorpej  1637:        /* Unmount file systems. */
1.256     christos 1638:        vfs_unmountall(l);
1.58      thorpej  1639: }
                   1640:
                   1641: /*
                   1642:  * Mount the root file system.  If the operator didn't specify a
                   1643:  * file system to use, try all possible file systems until one
                   1644:  * succeeds.
                   1645:  */
                   1646: int
1.247     thorpej  1647: vfs_mountroot(void)
1.58      thorpej  1648: {
1.79      thorpej  1649:        struct vfsops *v;
1.239     mycroft  1650:        int error = ENODEV;
1.58      thorpej  1651:
                   1652:        if (root_device == NULL)
                   1653:                panic("vfs_mountroot: root device unknown");
                   1654:
1.264     thorpej  1655:        switch (device_class(root_device)) {
1.58      thorpej  1656:        case DV_IFNET:
                   1657:                if (rootdev != NODEV)
1.173     thorpej  1658:                        panic("vfs_mountroot: rootdev set for DV_IFNET "
                   1659:                            "(0x%08x -> %d,%d)", rootdev,
                   1660:                            major(rootdev), minor(rootdev));
1.58      thorpej  1661:                break;
                   1662:
                   1663:        case DV_DISK:
                   1664:                if (rootdev == NODEV)
                   1665:                        panic("vfs_mountroot: rootdev not set for DV_DISK");
1.239     mycroft  1666:                if (bdevvp(rootdev, &rootvp))
                   1667:                        panic("vfs_mountroot: can't get vnode for rootdev");
1.256     christos 1668:                error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp);
1.239     mycroft  1669:                if (error) {
                   1670:                        printf("vfs_mountroot: can't open root device\n");
                   1671:                        return (error);
                   1672:                }
1.58      thorpej  1673:                break;
                   1674:
                   1675:        default:
                   1676:                printf("%s: inappropriate for root file system\n",
                   1677:                    root_device->dv_xname);
                   1678:                return (ENODEV);
                   1679:        }
                   1680:
                   1681:        /*
                   1682:         * If user specified a file system, use it.
                   1683:         */
1.239     mycroft  1684:        if (mountroot != NULL) {
                   1685:                error = (*mountroot)();
                   1686:                goto done;
                   1687:        }
1.58      thorpej  1688:
                   1689:        /*
                   1690:         * Try each file system currently configured into the kernel.
                   1691:         */
1.220     lukem    1692:        LIST_FOREACH(v, &vfs_list, vfs_list) {
1.79      thorpej  1693:                if (v->vfs_mountroot == NULL)
1.58      thorpej  1694:                        continue;
                   1695: #ifdef DEBUG
1.197     thorpej  1696:                aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1.58      thorpej  1697: #endif
1.239     mycroft  1698:                error = (*v->vfs_mountroot)();
                   1699:                if (!error) {
1.197     thorpej  1700:                        aprint_normal("root file system type: %s\n",
                   1701:                            v->vfs_name);
1.79      thorpej  1702:                        break;
1.58      thorpej  1703:                }
                   1704:        }
                   1705:
1.79      thorpej  1706:        if (v == NULL) {
                   1707:                printf("no file system for %s", root_device->dv_xname);
1.264     thorpej  1708:                if (device_class(root_device) == DV_DISK)
1.79      thorpej  1709:                        printf(" (dev 0x%x)", rootdev);
                   1710:                printf("\n");
1.239     mycroft  1711:                error = EFTYPE;
1.79      thorpej  1712:        }
1.239     mycroft  1713:
                   1714: done:
1.264     thorpej  1715:        if (error && device_class(root_device) == DV_DISK) {
1.256     christos 1716:                VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp);
1.239     mycroft  1717:                vrele(rootvp);
                   1718:        }
                   1719:        return (error);
1.58      thorpej  1720: }

CVSweb <webmaster@jp.NetBSD.org>