[BACK]Return to genfs_vnops.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / miscfs / genfs

Annotation of src/sys/miscfs/genfs/genfs_vnops.c, Revision 1.151

1.151   ! perseant    1: /*     $NetBSD: genfs_vnops.c,v 1.150 2007/03/04 06:03:14 christos Exp $       */
1.6       fvdl        2:
                      3: /*
                      4:  * Copyright (c) 1982, 1986, 1989, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
1.81      agc        15:  * 3. Neither the name of the University nor the names of its contributors
1.6       fvdl       16:  *    may be used to endorse or promote products derived from this software
                     17:  *    without specific prior written permission.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     20:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     21:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     22:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     23:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     24:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     25:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     26:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     27:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     28:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     29:  * SUCH DAMAGE.
                     30:  *
                     31:  */
1.40      lukem      32:
                     33: #include <sys/cdefs.h>
1.151   ! perseant   34: __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.150 2007/03/04 06:03:14 christos Exp $");
1.8       thorpej    35:
1.1       mycroft    36: #include <sys/param.h>
                     37: #include <sys/systm.h>
1.6       fvdl       38: #include <sys/proc.h>
1.1       mycroft    39: #include <sys/kernel.h>
                     40: #include <sys/mount.h>
                     41: #include <sys/namei.h>
                     42: #include <sys/vnode.h>
1.13      wrstuden   43: #include <sys/fcntl.h>
1.135     yamt       44: #include <sys/kmem.h>
1.3       mycroft    45: #include <sys/poll.h>
1.37      chs        46: #include <sys/mman.h>
1.66      jdolecek   47: #include <sys/file.h>
1.125     elad       48: #include <sys/kauth.h>
1.143     hannken    49: #include <sys/fstrans.h>
1.1       mycroft    50:
                     51: #include <miscfs/genfs/genfs.h>
1.37      chs        52: #include <miscfs/genfs/genfs_node.h>
1.6       fvdl       53: #include <miscfs/specfs/specdev.h>
1.1       mycroft    54:
1.21      chs        55: #include <uvm/uvm.h>
                     56: #include <uvm/uvm_pager.h>
                     57:
1.130     chs        58: static int genfs_do_directio(struct vmspace *, vaddr_t, size_t, struct vnode *,
                     59:     off_t, enum uio_rw);
                     60: static void genfs_dio_iodone(struct buf *);
                     61:
                     62: static int genfs_do_io(struct vnode *, off_t, vaddr_t, size_t, int, enum uio_rw,
                     63:     void (*)(struct buf *));
1.118     perry      64: static inline void genfs_rel_pages(struct vm_page **, int);
1.70      christos   65: static void filt_genfsdetach(struct knote *);
                     66: static int filt_genfsread(struct knote *, long);
                     67: static int filt_genfsvnode(struct knote *, long);
                     68:
1.110     yamt       69: #define MAX_READ_PAGES 16      /* XXXUBC 16 */
1.41      christos   70:
1.130     chs        71: int genfs_maxdio = MAXPHYS;
                     72:
1.1       mycroft    73: int
1.53      enami      74: genfs_poll(void *v)
1.1       mycroft    75: {
1.3       mycroft    76:        struct vop_poll_args /* {
1.1       mycroft    77:                struct vnode *a_vp;
1.3       mycroft    78:                int a_events;
1.116     christos   79:                struct lwp *a_l;
1.1       mycroft    80:        } */ *ap = v;
                     81:
1.3       mycroft    82:        return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1.1       mycroft    83: }
                     84:
                     85: int
1.53      enami      86: genfs_seek(void *v)
1.4       kleink     87: {
                     88:        struct vop_seek_args /* {
                     89:                struct vnode *a_vp;
                     90:                off_t a_oldoff;
                     91:                off_t a_newoff;
1.125     elad       92:                kauth_cred_t cred;
1.4       kleink     93:        } */ *ap = v;
                     94:
                     95:        if (ap->a_newoff < 0)
                     96:                return (EINVAL);
                     97:
                     98:        return (0);
                     99: }
                    100:
                    101: int
1.53      enami     102: genfs_abortop(void *v)
1.1       mycroft   103: {
                    104:        struct vop_abortop_args /* {
                    105:                struct vnode *a_dvp;
                    106:                struct componentname *a_cnp;
                    107:        } */ *ap = v;
1.53      enami     108:
1.1       mycroft   109:        if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1.19      thorpej   110:                PNBUF_PUT(ap->a_cnp->cn_pnbuf);
1.1       mycroft   111:        return (0);
1.13      wrstuden  112: }
                    113:
                    114: int
1.53      enami     115: genfs_fcntl(void *v)
1.13      wrstuden  116: {
                    117:        struct vop_fcntl_args /* {
                    118:                struct vnode *a_vp;
                    119:                u_int a_command;
1.150     christos  120:                void *a_data;
1.13      wrstuden  121:                int a_fflag;
1.125     elad      122:                kauth_cred_t a_cred;
1.116     christos  123:                struct lwp *a_l;
1.13      wrstuden  124:        } */ *ap = v;
                    125:
                    126:        if (ap->a_command == F_SETFL)
                    127:                return (0);
                    128:        else
                    129:                return (EOPNOTSUPP);
1.1       mycroft   130: }
                    131:
                    132: /*ARGSUSED*/
                    133: int
1.138     christos  134: genfs_badop(void *v)
1.1       mycroft   135: {
                    136:
                    137:        panic("genfs: bad op");
                    138: }
                    139:
                    140: /*ARGSUSED*/
                    141: int
1.138     christos  142: genfs_nullop(void *v)
1.1       mycroft   143: {
                    144:
                    145:        return (0);
1.10      kleink    146: }
                    147:
                    148: /*ARGSUSED*/
                    149: int
1.138     christos  150: genfs_einval(void *v)
1.10      kleink    151: {
                    152:
                    153:        return (EINVAL);
1.1       mycroft   154: }
                    155:
1.12      wrstuden  156: /*
1.74      jdolecek  157:  * Called when an fs doesn't support a particular vop.
                    158:  * This takes care to vrele, vput, or vunlock passed in vnodes.
1.12      wrstuden  159:  */
                    160: int
1.75      jdolecek  161: genfs_eopnotsupp(void *v)
1.12      wrstuden  162: {
                    163:        struct vop_generic_args /*
                    164:                struct vnodeop_desc *a_desc;
1.53      enami     165:                / * other random data follows, presumably * /
1.12      wrstuden  166:        } */ *ap = v;
                    167:        struct vnodeop_desc *desc = ap->a_desc;
1.74      jdolecek  168:        struct vnode *vp, *vp_last = NULL;
1.12      wrstuden  169:        int flags, i, j, offset;
                    170:
                    171:        flags = desc->vdesc_flags;
                    172:        for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
                    173:                if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
                    174:                        break;  /* stop at end of list */
                    175:                if ((j = flags & VDESC_VP0_WILLPUT)) {
1.53      enami     176:                        vp = *VOPARG_OFFSETTO(struct vnode **, offset, ap);
1.74      jdolecek  177:
                    178:                        /* Skip if NULL */
                    179:                        if (!vp)
                    180:                                continue;
                    181:
1.12      wrstuden  182:                        switch (j) {
                    183:                        case VDESC_VP0_WILLPUT:
1.74      jdolecek  184:                                /* Check for dvp == vp cases */
                    185:                                if (vp == vp_last)
                    186:                                        vrele(vp);
                    187:                                else {
                    188:                                        vput(vp);
                    189:                                        vp_last = vp;
                    190:                                }
1.12      wrstuden  191:                                break;
                    192:                        case VDESC_VP0_WILLUNLOCK:
                    193:                                VOP_UNLOCK(vp, 0);
                    194:                                break;
                    195:                        case VDESC_VP0_WILLRELE:
                    196:                                vrele(vp);
                    197:                                break;
                    198:                        }
                    199:                }
                    200:        }
                    201:
                    202:        return (EOPNOTSUPP);
                    203: }
                    204:
1.1       mycroft   205: /*ARGSUSED*/
                    206: int
1.138     christos  207: genfs_ebadf(void *v)
1.1       mycroft   208: {
                    209:
                    210:        return (EBADF);
1.9       matthias  211: }
                    212:
                    213: /* ARGSUSED */
                    214: int
1.138     christos  215: genfs_enoioctl(void *v)
1.9       matthias  216: {
                    217:
1.51      atatat    218:        return (EPASSTHROUGH);
1.6       fvdl      219: }
                    220:
                    221:
                    222: /*
1.15      fvdl      223:  * Eliminate all activity associated with the requested vnode
1.6       fvdl      224:  * and with all vnodes aliased to the requested vnode.
                    225:  */
                    226: int
1.53      enami     227: genfs_revoke(void *v)
1.6       fvdl      228: {
                    229:        struct vop_revoke_args /* {
                    230:                struct vnode *a_vp;
                    231:                int a_flags;
                    232:        } */ *ap = v;
                    233:        struct vnode *vp, *vq;
1.116     christos  234:        struct lwp *l = curlwp;         /* XXX */
1.6       fvdl      235:
                    236: #ifdef DIAGNOSTIC
                    237:        if ((ap->a_flags & REVOKEALL) == 0)
                    238:                panic("genfs_revoke: not revokeall");
                    239: #endif
                    240:
                    241:        vp = ap->a_vp;
                    242:        simple_lock(&vp->v_interlock);
                    243:
                    244:        if (vp->v_flag & VALIASED) {
                    245:                /*
                    246:                 * If a vgone (or vclean) is already in progress,
                    247:                 * wait until it is done and return.
                    248:                 */
                    249:                if (vp->v_flag & VXLOCK) {
                    250:                        vp->v_flag |= VXWANT;
1.83      pk        251:                        ltsleep(vp, PINOD|PNORELOCK, "vop_revokeall", 0,
                    252:                                &vp->v_interlock);
1.6       fvdl      253:                        return (0);
                    254:                }
                    255:                /*
                    256:                 * Ensure that vp will not be vgone'd while we
                    257:                 * are eliminating its aliases.
                    258:                 */
                    259:                vp->v_flag |= VXLOCK;
                    260:                simple_unlock(&vp->v_interlock);
                    261:                while (vp->v_flag & VALIASED) {
                    262:                        simple_lock(&spechash_slock);
                    263:                        for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                    264:                                if (vq->v_rdev != vp->v_rdev ||
                    265:                                    vq->v_type != vp->v_type || vp == vq)
                    266:                                        continue;
                    267:                                simple_unlock(&spechash_slock);
                    268:                                vgone(vq);
                    269:                                break;
                    270:                        }
                    271:                        if (vq == NULLVP)
                    272:                                simple_unlock(&spechash_slock);
                    273:                }
                    274:                /*
                    275:                 * Remove the lock so that vgone below will
                    276:                 * really eliminate the vnode after which time
                    277:                 * vgone will awaken any sleepers.
                    278:                 */
                    279:                simple_lock(&vp->v_interlock);
                    280:                vp->v_flag &= ~VXLOCK;
                    281:        }
1.116     christos  282:        vgonel(vp, l);
1.6       fvdl      283:        return (0);
                    284: }
                    285:
                    286: /*
1.12      wrstuden  287:  * Lock the node.
1.6       fvdl      288:  */
                    289: int
1.53      enami     290: genfs_lock(void *v)
1.6       fvdl      291: {
                    292:        struct vop_lock_args /* {
                    293:                struct vnode *a_vp;
                    294:                int a_flags;
                    295:        } */ *ap = v;
                    296:        struct vnode *vp = ap->a_vp;
                    297:
1.86      hannken   298:        return (lockmgr(vp->v_vnlock, ap->a_flags, &vp->v_interlock));
1.6       fvdl      299: }
                    300:
                    301: /*
1.12      wrstuden  302:  * Unlock the node.
1.6       fvdl      303:  */
                    304: int
1.53      enami     305: genfs_unlock(void *v)
1.6       fvdl      306: {
                    307:        struct vop_unlock_args /* {
                    308:                struct vnode *a_vp;
                    309:                int a_flags;
                    310:        } */ *ap = v;
                    311:        struct vnode *vp = ap->a_vp;
                    312:
1.86      hannken   313:        return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE,
1.53      enami     314:            &vp->v_interlock));
1.6       fvdl      315: }
                    316:
                    317: /*
1.12      wrstuden  318:  * Return whether or not the node is locked.
1.6       fvdl      319:  */
                    320: int
1.53      enami     321: genfs_islocked(void *v)
1.6       fvdl      322: {
                    323:        struct vop_islocked_args /* {
                    324:                struct vnode *a_vp;
                    325:        } */ *ap = v;
                    326:        struct vnode *vp = ap->a_vp;
                    327:
1.86      hannken   328:        return (lockstatus(vp->v_vnlock));
1.12      wrstuden  329: }
                    330:
                    331: /*
                    332:  * Stubs to use when there is no locking to be done on the underlying object.
                    333:  */
                    334: int
1.53      enami     335: genfs_nolock(void *v)
1.12      wrstuden  336: {
                    337:        struct vop_lock_args /* {
                    338:                struct vnode *a_vp;
                    339:                int a_flags;
1.116     christos  340:                struct lwp *a_l;
1.12      wrstuden  341:        } */ *ap = v;
                    342:
                    343:        /*
                    344:         * Since we are not using the lock manager, we must clear
                    345:         * the interlock here.
                    346:         */
                    347:        if (ap->a_flags & LK_INTERLOCK)
                    348:                simple_unlock(&ap->a_vp->v_interlock);
                    349:        return (0);
                    350: }
                    351:
                    352: int
1.138     christos  353: genfs_nounlock(void *v)
1.12      wrstuden  354: {
1.53      enami     355:
1.12      wrstuden  356:        return (0);
                    357: }
                    358:
                    359: int
1.138     christos  360: genfs_noislocked(void *v)
1.12      wrstuden  361: {
1.53      enami     362:
1.12      wrstuden  363:        return (0);
1.8       thorpej   364: }
                    365:
                    366: /*
1.142     yamt      367:  * Local lease check.
1.8       thorpej   368:  */
                    369: int
1.53      enami     370: genfs_lease_check(void *v)
1.8       thorpej   371: {
                    372:
                    373:        return (0);
1.34      chs       374: }
                    375:
                    376: int
1.138     christos  377: genfs_mmap(void *v)
1.34      chs       378: {
1.53      enami     379:
                    380:        return (0);
1.21      chs       381: }
                    382:
1.118     perry     383: static inline void
1.63      enami     384: genfs_rel_pages(struct vm_page **pgs, int npages)
                    385: {
                    386:        int i;
                    387:
                    388:        for (i = 0; i < npages; i++) {
                    389:                struct vm_page *pg = pgs[i];
                    390:
1.127     yamt      391:                if (pg == NULL || pg == PGO_DONTCARE)
1.63      enami     392:                        continue;
                    393:                if (pg->flags & PG_FAKE) {
                    394:                        pg->flags |= PG_RELEASED;
                    395:                }
                    396:        }
1.64      enami     397:        uvm_lock_pageq();
1.63      enami     398:        uvm_page_unbusy(pgs, npages);
1.64      enami     399:        uvm_unlock_pageq();
1.63      enami     400: }
                    401:
1.21      chs       402: /*
                    403:  * generic VM getpages routine.
                    404:  * Return PG_BUSY pages for the given range,
                    405:  * reading from backing store if necessary.
                    406:  */
                    407:
                    408: int
1.53      enami     409: genfs_getpages(void *v)
1.21      chs       410: {
                    411:        struct vop_getpages_args /* {
                    412:                struct vnode *a_vp;
                    413:                voff_t a_offset;
1.33      chs       414:                struct vm_page **a_m;
1.21      chs       415:                int *a_count;
                    416:                int a_centeridx;
                    417:                vm_prot_t a_access_type;
                    418:                int a_advice;
                    419:                int a_flags;
                    420:        } */ *ap = v;
                    421:
1.30      chs       422:        off_t newsize, diskeof, memeof;
1.124     yamt      423:        off_t offset, origoffset, startoffset, endoffset;
1.21      chs       424:        daddr_t lbn, blkno;
1.120     yamt      425:        int i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
1.37      chs       426:        int fs_bshift, fs_bsize, dev_bshift;
1.21      chs       427:        int flags = ap->a_flags;
                    428:        size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
                    429:        vaddr_t kva;
                    430:        struct buf *bp, *mbp;
                    431:        struct vnode *vp = ap->a_vp;
1.36      chs       432:        struct vnode *devvp;
1.37      chs       433:        struct genfs_node *gp = VTOG(vp);
                    434:        struct uvm_object *uobj = &vp->v_uobj;
1.110     yamt      435:        struct vm_page *pg, **pgs, *pgs_onstack[MAX_READ_PAGES];
1.77      yamt      436:        int pgs_size;
1.128     ad        437:        kauth_cred_t cred = curlwp->l_cred;             /* XXXUBC curlwp */
1.148     thorpej   438:        bool async = (flags & PGO_SYNCIO) == 0;
                    439:        bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.149     thorpej   440:        bool sawhole = false;
                    441:        bool has_trans = false;
1.148     thorpej   442:        bool overwrite = (flags & PGO_OVERWRITE) != 0;
                    443:        bool blockalloc = write && (flags & PGO_NOBLOCKALLOC) == 0;
1.126     yamt      444:        voff_t origvsize;
1.21      chs       445:        UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
                    446:
1.30      chs       447:        UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
1.53      enami     448:            vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
1.30      chs       449:
1.121     reinoud   450:        KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
                    451:            vp->v_type == VLNK || vp->v_type == VBLK);
1.109     yamt      452:
1.21      chs       453:        /* XXXUBC temp limit */
1.110     yamt      454:        if (*ap->a_count > MAX_READ_PAGES) {
1.37      chs       455:                panic("genfs_getpages: too many pages");
1.21      chs       456:        }
                    457:
1.143     hannken   458:        pgs = pgs_onstack;
                    459:        pgs_size = sizeof(pgs_onstack);
                    460:
1.126     yamt      461: startover:
1.26      chs       462:        error = 0;
1.126     yamt      463:        origvsize = vp->v_size;
1.26      chs       464:        origoffset = ap->a_offset;
                    465:        orignpages = *ap->a_count;
1.123     yamt      466:        GOP_SIZE(vp, vp->v_size, &diskeof, 0);
1.26      chs       467:        if (flags & PGO_PASTEOF) {
1.37      chs       468:                newsize = MAX(vp->v_size,
1.53      enami     469:                    origoffset + (orignpages << PAGE_SHIFT));
1.123     yamt      470:                GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM);
1.26      chs       471:        } else {
1.123     yamt      472:                GOP_SIZE(vp, vp->v_size, &memeof, GOP_SIZE_MEM);
1.21      chs       473:        }
1.30      chs       474:        KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
                    475:        KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
                    476:        KASSERT(orignpages > 0);
1.95      chs       477:
                    478:        /*
                    479:         * Bounds-check the request.
                    480:         */
                    481:
                    482:        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
                    483:                if ((flags & PGO_LOCKED) == 0) {
                    484:                        simple_unlock(&uobj->vmobjlock);
                    485:                }
                    486:                UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
                    487:                    origoffset, *ap->a_count, memeof,0);
1.143     hannken   488:                error = EINVAL;
                    489:                goto out_err;
1.95      chs       490:        }
1.21      chs       491:
1.99      yamt      492:        /* uobj is locked */
                    493:
1.103     yamt      494:        if ((flags & PGO_NOTIMESTAMP) == 0 &&
1.121     reinoud   495:            (vp->v_type != VBLK ||
1.103     yamt      496:            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                    497:                int updflags = 0;
                    498:
                    499:                if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
                    500:                        updflags = GOP_UPDATE_ACCESSED;
                    501:                }
                    502:                if (write) {
                    503:                        updflags |= GOP_UPDATE_MODIFIED;
                    504:                }
                    505:                if (updflags != 0) {
                    506:                        GOP_MARKUPDATE(vp, updflags);
                    507:                }
                    508:        }
                    509:
1.101     yamt      510:        if (write) {
                    511:                gp->g_dirtygen++;
                    512:                if ((vp->v_flag & VONWORKLST) == 0) {
                    513:                        vn_syncer_add_to_worklist(vp, filedelay);
                    514:                }
1.103     yamt      515:                if ((vp->v_flag & (VWRITEMAP|VWRITEMAPDIRTY)) == VWRITEMAP) {
                    516:                        vp->v_flag |= VWRITEMAPDIRTY;
                    517:                }
1.99      yamt      518:        }
                    519:
1.21      chs       520:        /*
                    521:         * For PGO_LOCKED requests, just return whatever's in memory.
                    522:         */
                    523:
                    524:        if (flags & PGO_LOCKED) {
1.127     yamt      525:                int nfound;
                    526:
                    527:                npages = *ap->a_count;
                    528: #if defined(DEBUG)
                    529:                for (i = 0; i < npages; i++) {
                    530:                        pg = ap->a_m[i];
                    531:                        KASSERT(pg == NULL || pg == PGO_DONTCARE);
                    532:                }
                    533: #endif /* defined(DEBUG) */
                    534:                nfound = uvn_findpages(uobj, origoffset, &npages,
                    535:                    ap->a_m, UFP_NOWAIT|UFP_NOALLOC|(write ? UFP_NORDONLY : 0));
                    536:                KASSERT(npages == *ap->a_count);
                    537:                if (nfound == 0) {
1.143     hannken   538:                        error = EBUSY;
                    539:                        goto out_err;
1.127     yamt      540:                }
1.146     ad        541:                if (!rw_tryenter(&gp->g_glock, RW_READER)) {
1.127     yamt      542:                        genfs_rel_pages(ap->a_m, npages);
                    543:
                    544:                        /*
                    545:                         * restore the array.
                    546:                         */
                    547:
                    548:                        for (i = 0; i < npages; i++) {
                    549:                                pg = ap->a_m[i];
1.21      chs       550:
1.127     yamt      551:                                if (pg != NULL || pg != PGO_DONTCARE) {
                    552:                                        ap->a_m[i] = NULL;
                    553:                                }
                    554:                        }
                    555:                } else {
1.146     ad        556:                        rw_exit(&gp->g_glock);
1.127     yamt      557:                }
1.143     hannken   558:                error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
                    559:                goto out_err;
1.21      chs       560:        }
1.126     yamt      561:        simple_unlock(&uobj->vmobjlock);
1.21      chs       562:
                    563:        /*
                    564:         * find the requested pages and make some simple checks.
                    565:         * leave space in the page array for a whole block.
                    566:         */
                    567:
1.121     reinoud   568:        if (vp->v_type != VBLK) {
1.36      chs       569:                fs_bshift = vp->v_mount->mnt_fs_bshift;
                    570:                dev_bshift = vp->v_mount->mnt_dev_bshift;
                    571:        } else {
                    572:                fs_bshift = DEV_BSHIFT;
                    573:                dev_bshift = DEV_BSHIFT;
                    574:        }
1.21      chs       575:        fs_bsize = 1 << fs_bshift;
                    576:
1.30      chs       577:        orignpages = MIN(orignpages,
                    578:            round_page(memeof - origoffset) >> PAGE_SHIFT);
1.21      chs       579:        npages = orignpages;
                    580:        startoffset = origoffset & ~(fs_bsize - 1);
1.53      enami     581:        endoffset = round_page((origoffset + (npages << PAGE_SHIFT) +
                    582:            fs_bsize - 1) & ~(fs_bsize - 1));
1.30      chs       583:        endoffset = MIN(endoffset, round_page(memeof));
1.21      chs       584:        ridx = (origoffset - startoffset) >> PAGE_SHIFT;
                    585:
1.77      yamt      586:        pgs_size = sizeof(struct vm_page *) *
                    587:            ((endoffset - startoffset) >> PAGE_SHIFT);
                    588:        if (pgs_size > sizeof(pgs_onstack)) {
1.135     yamt      589:                pgs = kmem_zalloc(pgs_size, async ? KM_NOSLEEP : KM_SLEEP);
1.78      simonb    590:                if (pgs == NULL) {
1.143     hannken   591:                        pgs = pgs_onstack;
                    592:                        error = ENOMEM;
                    593:                        goto out_err;
1.78      simonb    594:                }
1.77      yamt      595:        } else {
1.143     hannken   596:                /* pgs == pgs_onstack */
1.77      yamt      597:                memset(pgs, 0, pgs_size);
                    598:        }
1.63      enami     599:        UVMHIST_LOG(ubchist, "ridx %d npages %d startoff %ld endoff %ld",
                    600:            ridx, npages, startoffset, endoffset);
1.126     yamt      601:
1.143     hannken   602:        if (!has_trans &&
1.144     hannken   603:            (error = fstrans_start(vp->v_mount, FSTRANS_SHARED)) != 0) {
1.143     hannken   604:                goto out_err;
                    605:        }
1.149     thorpej   606:        has_trans = true;
1.143     hannken   607:
1.126     yamt      608:        /*
                    609:         * hold g_glock to prevent a race with truncate.
                    610:         *
                    611:         * check if our idea of v_size is still valid.
                    612:         */
                    613:
                    614:        if (blockalloc) {
1.146     ad        615:                rw_enter(&gp->g_glock, RW_WRITER);
1.126     yamt      616:        } else {
1.146     ad        617:                rw_enter(&gp->g_glock, RW_READER);
1.126     yamt      618:        }
                    619:        simple_lock(&uobj->vmobjlock);
                    620:        if (vp->v_size < origvsize) {
1.146     ad        621:                rw_exit(&gp->g_glock);
1.126     yamt      622:                if (pgs != pgs_onstack)
1.135     yamt      623:                        kmem_free(pgs, pgs_size);
1.126     yamt      624:                goto startover;
                    625:        }
                    626:
1.63      enami     627:        if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx],
                    628:            async ? UFP_NOWAIT : UFP_ALL) != orignpages) {
1.146     ad        629:                rw_exit(&gp->g_glock);
1.63      enami     630:                KASSERT(async != 0);
                    631:                genfs_rel_pages(&pgs[ridx], orignpages);
                    632:                simple_unlock(&uobj->vmobjlock);
1.143     hannken   633:                error = EBUSY;
                    634:                goto out_err;
1.63      enami     635:        }
1.21      chs       636:
                    637:        /*
                    638:         * if the pages are already resident, just return them.
                    639:         */
                    640:
                    641:        for (i = 0; i < npages; i++) {
1.97      christos  642:                struct vm_page *pg1 = pgs[ridx + i];
1.21      chs       643:
1.97      christos  644:                if ((pg1->flags & PG_FAKE) ||
1.100     yamt      645:                    (blockalloc && (pg1->flags & PG_RDONLY))) {
1.21      chs       646:                        break;
                    647:                }
                    648:        }
                    649:        if (i == npages) {
1.146     ad        650:                rw_exit(&gp->g_glock);
1.21      chs       651:                UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
1.26      chs       652:                npages += ridx;
1.110     yamt      653:                goto out;
1.21      chs       654:        }
                    655:
                    656:        /*
1.37      chs       657:         * if PGO_OVERWRITE is set, don't bother reading the pages.
                    658:         */
                    659:
1.124     yamt      660:        if (overwrite) {
1.146     ad        661:                rw_exit(&gp->g_glock);
1.37      chs       662:                UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
                    663:
                    664:                for (i = 0; i < npages; i++) {
1.97      christos  665:                        struct vm_page *pg1 = pgs[ridx + i];
1.37      chs       666:
1.97      christos  667:                        pg1->flags &= ~(PG_RDONLY|PG_CLEAN);
1.37      chs       668:                }
                    669:                npages += ridx;
                    670:                goto out;
                    671:        }
                    672:
                    673:        /*
1.21      chs       674:         * the page wasn't resident and we're not overwriting,
                    675:         * so we're going to have to do some i/o.
                    676:         * find any additional pages needed to cover the expanded range.
                    677:         */
                    678:
1.35      chs       679:        npages = (endoffset - startoffset) >> PAGE_SHIFT;
                    680:        if (startoffset != origoffset || npages != orignpages) {
1.21      chs       681:
                    682:                /*
1.37      chs       683:                 * we need to avoid deadlocks caused by locking
1.21      chs       684:                 * additional pages at lower offsets than pages we
1.37      chs       685:                 * already have locked.  unlock them all and start over.
1.21      chs       686:                 */
                    687:
1.63      enami     688:                genfs_rel_pages(&pgs[ridx], orignpages);
1.77      yamt      689:                memset(pgs, 0, pgs_size);
1.21      chs       690:
                    691:                UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
1.53      enami     692:                    startoffset, endoffset, 0,0);
1.21      chs       693:                npgs = npages;
1.63      enami     694:                if (uvn_findpages(uobj, startoffset, &npgs, pgs,
                    695:                    async ? UFP_NOWAIT : UFP_ALL) != npages) {
1.146     ad        696:                        rw_exit(&gp->g_glock);
1.63      enami     697:                        KASSERT(async != 0);
                    698:                        genfs_rel_pages(pgs, npages);
                    699:                        simple_unlock(&uobj->vmobjlock);
1.143     hannken   700:                        error = EBUSY;
                    701:                        goto out_err;
1.63      enami     702:                }
1.21      chs       703:        }
                    704:        simple_unlock(&uobj->vmobjlock);
                    705:
                    706:        /*
                    707:         * read the desired page(s).
                    708:         */
                    709:
                    710:        totalbytes = npages << PAGE_SHIFT;
1.30      chs       711:        bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
1.21      chs       712:        tailbytes = totalbytes - bytes;
                    713:        skipbytes = 0;
                    714:
1.53      enami     715:        kva = uvm_pagermapin(pgs, npages,
                    716:            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1.21      chs       717:
1.119     yamt      718:        mbp = getiobuf();
1.21      chs       719:        mbp->b_bufsize = totalbytes;
                    720:        mbp->b_data = (void *)kva;
                    721:        mbp->b_resid = mbp->b_bcount = bytes;
1.65      fvdl      722:        mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
1.37      chs       723:        mbp->b_iodone = (async ? uvm_aio_biodone : 0);
1.21      chs       724:        mbp->b_vp = vp;
1.120     yamt      725:        if (async)
                    726:                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
                    727:        else
                    728:                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
1.21      chs       729:
                    730:        /*
1.31      chs       731:         * if EOF is in the middle of the range, zero the part past EOF.
1.38      chs       732:         * if the page including EOF is not PG_FAKE, skip over it since
                    733:         * in that case it has valid data that we need to preserve.
1.21      chs       734:         */
                    735:
1.31      chs       736:        if (tailbytes > 0) {
1.38      chs       737:                size_t tailstart = bytes;
                    738:
                    739:                if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
                    740:                        tailstart = round_page(tailstart);
                    741:                        tailbytes -= tailstart - bytes;
                    742:                }
1.37      chs       743:                UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
1.53      enami     744:                    kva, tailstart, tailbytes,0);
1.38      chs       745:                memset((void *)(kva + tailstart), 0, tailbytes);
1.21      chs       746:        }
                    747:
                    748:        /*
                    749:         * now loop over the pages, reading as needed.
                    750:         */
                    751:
                    752:        bp = NULL;
                    753:        for (offset = startoffset;
1.53      enami     754:            bytes > 0;
                    755:            offset += iobytes, bytes -= iobytes) {
1.21      chs       756:
                    757:                /*
                    758:                 * skip pages which don't need to be read.
                    759:                 */
                    760:
                    761:                pidx = (offset - startoffset) >> PAGE_SHIFT;
1.100     yamt      762:                while ((pgs[pidx]->flags & PG_FAKE) == 0) {
1.21      chs       763:                        size_t b;
                    764:
1.24      chs       765:                        KASSERT((offset & (PAGE_SIZE - 1)) == 0);
1.100     yamt      766:                        if ((pgs[pidx]->flags & PG_RDONLY)) {
1.149     thorpej   767:                                sawhole = true;
1.100     yamt      768:                        }
1.26      chs       769:                        b = MIN(PAGE_SIZE, bytes);
1.21      chs       770:                        offset += b;
                    771:                        bytes -= b;
                    772:                        skipbytes += b;
                    773:                        pidx++;
                    774:                        UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
1.53      enami     775:                            offset, 0,0,0);
1.21      chs       776:                        if (bytes == 0) {
                    777:                                goto loopdone;
                    778:                        }
                    779:                }
                    780:
                    781:                /*
                    782:                 * bmap the file to find out the blkno to read from and
                    783:                 * how much we can read in one i/o.  if bmap returns an error,
                    784:                 * skip the rest of the top-level i/o.
                    785:                 */
                    786:
                    787:                lbn = offset >> fs_bshift;
1.36      chs       788:                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21      chs       789:                if (error) {
                    790:                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
1.53      enami     791:                            lbn, error,0,0);
1.21      chs       792:                        skipbytes += bytes;
                    793:                        goto loopdone;
                    794:                }
                    795:
                    796:                /*
                    797:                 * see how many pages can be read with this i/o.
                    798:                 * reduce the i/o size if necessary to avoid
                    799:                 * overwriting pages with valid data.
                    800:                 */
                    801:
1.26      chs       802:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                    803:                    bytes);
1.21      chs       804:                if (offset + iobytes > round_page(offset)) {
                    805:                        pcount = 1;
                    806:                        while (pidx + pcount < npages &&
1.53      enami     807:                            pgs[pidx + pcount]->flags & PG_FAKE) {
1.21      chs       808:                                pcount++;
                    809:                        }
1.26      chs       810:                        iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
1.53      enami     811:                            (offset - trunc_page(offset)));
1.21      chs       812:                }
                    813:
                    814:                /*
1.53      enami     815:                 * if this block isn't allocated, zero it instead of
1.100     yamt      816:                 * reading it.  unless we are going to allocate blocks,
                    817:                 * mark the pages we zeroed PG_RDONLY.
1.21      chs       818:                 */
                    819:
                    820:                if (blkno < 0) {
1.53      enami     821:                        int holepages = (round_page(offset + iobytes) -
                    822:                            trunc_page(offset)) >> PAGE_SHIFT;
1.21      chs       823:                        UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
                    824:
1.149     thorpej   825:                        sawhole = true;
1.21      chs       826:                        memset((char *)kva + (offset - startoffset), 0,
1.53      enami     827:                            iobytes);
1.21      chs       828:                        skipbytes += iobytes;
                    829:
1.35      chs       830:                        for (i = 0; i < holepages; i++) {
                    831:                                if (write) {
                    832:                                        pgs[pidx + i]->flags &= ~PG_CLEAN;
1.100     yamt      833:                                }
                    834:                                if (!blockalloc) {
1.21      chs       835:                                        pgs[pidx + i]->flags |= PG_RDONLY;
                    836:                                }
                    837:                        }
                    838:                        continue;
                    839:                }
                    840:
                    841:                /*
                    842:                 * allocate a sub-buf for this piece of the i/o
                    843:                 * (or just use mbp if there's only 1 piece),
                    844:                 * and start it going.
                    845:                 */
                    846:
                    847:                if (offset == startoffset && iobytes == bytes) {
                    848:                        bp = mbp;
                    849:                } else {
1.119     yamt      850:                        bp = getiobuf();
1.120     yamt      851:                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
1.21      chs       852:                }
1.112     yamt      853:                bp->b_lblkno = 0;
1.21      chs       854:
                    855:                /* adjust physical blkno for partial blocks */
1.25      fvdl      856:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.53      enami     857:                    dev_bshift);
1.21      chs       858:
1.53      enami     859:                UVMHIST_LOG(ubchist,
                    860:                    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
                    861:                    bp, offset, iobytes, bp->b_blkno);
1.21      chs       862:
1.109     yamt      863:                VOP_STRATEGY(devvp, bp);
1.21      chs       864:        }
                    865:
                    866: loopdone:
1.120     yamt      867:        nestiobuf_done(mbp, skipbytes, error);
1.21      chs       868:        if (async) {
1.32      chs       869:                UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
1.146     ad        870:                rw_exit(&gp->g_glock);
1.143     hannken   871:                error = 0;
                    872:                goto out_err;
1.21      chs       873:        }
                    874:        if (bp != NULL) {
                    875:                error = biowait(mbp);
                    876:        }
1.119     yamt      877:        putiobuf(mbp);
1.21      chs       878:        uvm_pagermapout(kva, npages);
                    879:
                    880:        /*
                    881:         * if this we encountered a hole then we have to do a little more work.
                    882:         * for read faults, we marked the page PG_RDONLY so that future
                    883:         * write accesses to the page will fault again.
                    884:         * for write faults, we must make sure that the backing store for
                    885:         * the page is completely allocated while the pages are locked.
                    886:         */
                    887:
1.100     yamt      888:        if (!error && sawhole && blockalloc) {
1.37      chs       889:                error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
1.53      enami     890:                    cred);
1.37      chs       891:                UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
                    892:                    startoffset, npages << PAGE_SHIFT, error,0);
1.100     yamt      893:                if (!error) {
                    894:                        for (i = 0; i < npages; i++) {
                    895:                                if (pgs[i] == NULL) {
                    896:                                        continue;
                    897:                                }
                    898:                                pgs[i]->flags &= ~(PG_CLEAN|PG_RDONLY);
                    899:                                UVMHIST_LOG(ubchist, "mark dirty pg %p",
                    900:                                    pgs[i],0,0,0);
                    901:                        }
                    902:                }
1.21      chs       903:        }
1.146     ad        904:        rw_exit(&gp->g_glock);
1.21      chs       905:        simple_lock(&uobj->vmobjlock);
                    906:
                    907:        /*
                    908:         * we're almost done!  release the pages...
                    909:         * for errors, we free the pages.
                    910:         * otherwise we activate them and mark them as valid and clean.
                    911:         * also, unbusy pages that were not actually requested.
                    912:         */
                    913:
                    914:        if (error) {
                    915:                for (i = 0; i < npages; i++) {
                    916:                        if (pgs[i] == NULL) {
                    917:                                continue;
                    918:                        }
                    919:                        UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.53      enami     920:                            pgs[i], pgs[i]->flags, 0,0);
1.26      chs       921:                        if (pgs[i]->flags & PG_FAKE) {
1.37      chs       922:                                pgs[i]->flags |= PG_RELEASED;
1.21      chs       923:                        }
                    924:                }
1.37      chs       925:                uvm_lock_pageq();
                    926:                uvm_page_unbusy(pgs, npages);
1.21      chs       927:                uvm_unlock_pageq();
                    928:                simple_unlock(&uobj->vmobjlock);
                    929:                UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
1.143     hannken   930:                goto out_err;
1.21      chs       931:        }
                    932:
1.37      chs       933: out:
1.21      chs       934:        UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
1.143     hannken   935:        error = 0;
1.26      chs       936:        uvm_lock_pageq();
1.21      chs       937:        for (i = 0; i < npages; i++) {
1.37      chs       938:                pg = pgs[i];
                    939:                if (pg == NULL) {
1.21      chs       940:                        continue;
                    941:                }
                    942:                UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.53      enami     943:                    pg, pg->flags, 0,0);
1.37      chs       944:                if (pg->flags & PG_FAKE && !overwrite) {
                    945:                        pg->flags &= ~(PG_FAKE);
1.21      chs       946:                        pmap_clear_modify(pgs[i]);
                    947:                }
1.100     yamt      948:                KASSERT(!write || !blockalloc || (pg->flags & PG_RDONLY) == 0);
1.21      chs       949:                if (i < ridx || i >= ridx + orignpages || async) {
                    950:                        UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
1.53      enami     951:                            pg, pg->offset,0,0);
1.37      chs       952:                        if (pg->flags & PG_WANTED) {
                    953:                                wakeup(pg);
                    954:                        }
                    955:                        if (pg->flags & PG_FAKE) {
                    956:                                KASSERT(overwrite);
                    957:                                uvm_pagezero(pg);
                    958:                        }
                    959:                        if (pg->flags & PG_RELEASED) {
                    960:                                uvm_pagefree(pg);
1.26      chs       961:                                continue;
1.21      chs       962:                        }
1.129     yamt      963:                        uvm_pageenqueue(pg);
1.37      chs       964:                        pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
                    965:                        UVM_PAGE_OWN(pg, NULL);
1.21      chs       966:                }
                    967:        }
1.26      chs       968:        uvm_unlock_pageq();
1.21      chs       969:        simple_unlock(&uobj->vmobjlock);
                    970:        if (ap->a_m != NULL) {
                    971:                memcpy(ap->a_m, &pgs[ridx],
1.53      enami     972:                    orignpages * sizeof(struct vm_page *));
1.21      chs       973:        }
1.143     hannken   974:
                    975: out_err:
1.77      yamt      976:        if (pgs != pgs_onstack)
1.135     yamt      977:                kmem_free(pgs, pgs_size);
1.143     hannken   978:        if (has_trans)
                    979:                fstrans_done(vp->v_mount);
                    980:        return (error);
1.21      chs       981: }
                    982:
                    983: /*
                    984:  * generic VM putpages routine.
                    985:  * Write the given range of pages to backing store.
1.37      chs       986:  *
                    987:  * => "offhi == 0" means flush all pages at or after "offlo".
1.140     pooka     988:  * => object should be locked by caller.  we return with the
                    989:  *      object unlocked.
1.37      chs       990:  * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
                    991:  *     thus, a caller might want to unlock higher level resources
                    992:  *     (e.g. vm_map) before calling flush.
1.140     pooka     993:  * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, we will not block
1.37      chs       994:  * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
                    995:  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
                    996:  *     that new pages are inserted on the tail end of the list.   thus,
                    997:  *     we can make a complete pass through the object in one go by starting
                    998:  *     at the head and working towards the tail (new pages are put in
                    999:  *     front of us).
                   1000:  * => NOTE: we are allowed to lock the page queues, so the caller
                   1001:  *     must not be holding the page queue lock.
                   1002:  *
                   1003:  * note on "cleaning" object and PG_BUSY pages:
                   1004:  *     this routine is holding the lock on the object.   the only time
                   1005:  *     that it can run into a PG_BUSY page that it does not own is if
                   1006:  *     some other process has started I/O on the page (e.g. either
                   1007:  *     a pagein, or a pageout).    if the PG_BUSY page is being paged
                   1008:  *     in, then it can not be dirty (!PG_CLEAN) because no one has
                   1009:  *     had a chance to modify it yet.    if the PG_BUSY page is being
                   1010:  *     paged out then it means that someone else has already started
1.53      enami    1011:  *     cleaning the page for us (how nice!).    in this case, if we
1.37      chs      1012:  *     have syncio specified, then after we make our pass through the
1.53      enami    1013:  *     object we need to wait for the other PG_BUSY pages to clear
1.37      chs      1014:  *     off (i.e. we need to do an iosync).   also note that once a
                   1015:  *     page is PG_BUSY it must stay in its object until it is un-busyed.
                   1016:  *
                   1017:  * note on page traversal:
                   1018:  *     we can traverse the pages in an object either by going down the
                   1019:  *     linked list in "uobj->memq", or we can go over the address range
                   1020:  *     by page doing hash table lookups for each address.    depending
1.53      enami    1021:  *     on how many pages are in the object it may be cheaper to do one
1.37      chs      1022:  *     or the other.   we set "by_list" to true if we are using memq.
                   1023:  *     if the cost of a hash lookup was equal to the cost of the list
                   1024:  *     traversal we could compare the number of pages in the start->stop
                   1025:  *     range to the total number of pages in the object.   however, it
                   1026:  *     seems that a hash table lookup is more expensive than the linked
1.53      enami    1027:  *     list traversal, so we multiply the number of pages in the
1.37      chs      1028:  *     range by an estimate of the relatively higher cost of the hash lookup.
1.21      chs      1029:  */
                   1030:
                   1031: int
1.53      enami    1032: genfs_putpages(void *v)
1.21      chs      1033: {
                   1034:        struct vop_putpages_args /* {
                   1035:                struct vnode *a_vp;
1.37      chs      1036:                voff_t a_offlo;
                   1037:                voff_t a_offhi;
1.21      chs      1038:                int a_flags;
                   1039:        } */ *ap = v;
1.151   ! perseant 1040:
        !          1041:        return genfs_do_putpages(ap->a_vp, ap->a_offlo, ap->a_offhi,
        !          1042:            ap->a_flags, NULL);
        !          1043: }
        !          1044:
        !          1045: int
        !          1046: genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
        !          1047:        struct vm_page **busypg)
        !          1048: {
1.37      chs      1049:        struct uvm_object *uobj = &vp->v_uobj;
1.46      chs      1050:        struct simplelock *slock = &uobj->vmobjlock;
1.37      chs      1051:        off_t off;
1.76      tls      1052:        /* Even for strange MAXPHYS, the shift rounds down to a page */
1.139     christos 1053: #define maxpages (MAXPHYS >> PAGE_SHIFT)
1.37      chs      1054:        int i, s, error, npages, nback;
                   1055:        int freeflag;
1.60      enami    1056:        struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
1.148     thorpej  1057:        bool wasclean, by_list, needs_clean, yld;
                   1058:        bool async = (flags & PGO_SYNCIO) == 0;
                   1059:        bool pagedaemon = curproc == uvm.pagedaemon_proc;
1.70      christos 1060:        struct lwp *l = curlwp ? curlwp : &lwp0;
1.101     yamt     1061:        struct genfs_node *gp = VTOG(vp);
                   1062:        int dirtygen;
1.149     thorpej  1063:        bool modified = false;
                   1064:        bool has_trans = false;
1.148     thorpej  1065:        bool cleanall;
1.70      christos 1066:
1.37      chs      1067:        UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
                   1068:
                   1069:        KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
                   1070:        KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
                   1071:        KASSERT(startoff < endoff || endoff == 0);
                   1072:
                   1073:        UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
                   1074:            vp, uobj->uo_npages, startoff, endoff - startoff);
1.103     yamt     1075:
                   1076:        KASSERT((vp->v_flag & VONWORKLST) != 0 ||
                   1077:            (vp->v_flag & VWRITEMAPDIRTY) == 0);
1.37      chs      1078:        if (uobj->uo_npages == 0) {
1.62      perseant 1079:                s = splbio();
1.103     yamt     1080:                if (vp->v_flag & VONWORKLST) {
                   1081:                        vp->v_flag &= ~VWRITEMAPDIRTY;
1.137     reinoud  1082:                        if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                   1083:                                vn_syncer_remove_from_worklist(vp);
1.37      chs      1084:                }
1.62      perseant 1085:                splx(s);
1.46      chs      1086:                simple_unlock(slock);
1.53      enami    1087:                return (0);
1.37      chs      1088:        }
                   1089:
                   1090:        /*
                   1091:         * the vnode has pages, set up to process the request.
                   1092:         */
                   1093:
1.143     hannken  1094:        if ((flags & PGO_CLEANIT) != 0) {
                   1095:                simple_unlock(slock);
                   1096:                if (pagedaemon)
1.144     hannken  1097:                        error = fstrans_start_nowait(vp->v_mount, FSTRANS_LAZY);
1.143     hannken  1098:                else
1.144     hannken  1099:                        error = fstrans_start(vp->v_mount, FSTRANS_LAZY);
1.143     hannken  1100:                if (error)
                   1101:                        return error;
1.149     thorpej  1102:                has_trans = true;
1.143     hannken  1103:                simple_lock(slock);
                   1104:        }
                   1105:
1.37      chs      1106:        error = 0;
1.44      chs      1107:        s = splbio();
1.71      pk       1108:        simple_lock(&global_v_numoutput_slock);
1.44      chs      1109:        wasclean = (vp->v_numoutput == 0);
1.71      pk       1110:        simple_unlock(&global_v_numoutput_slock);
1.44      chs      1111:        splx(s);
1.37      chs      1112:        off = startoff;
                   1113:        if (endoff == 0 || flags & PGO_ALLPAGES) {
                   1114:                endoff = trunc_page(LLONG_MAX);
                   1115:        }
                   1116:        by_list = (uobj->uo_npages <=
                   1117:            ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
                   1118:
1.102     yamt     1119: #if !defined(DEBUG)
                   1120:        /*
                   1121:         * if this vnode is known not to have dirty pages,
                   1122:         * don't bother to clean it out.
                   1123:         */
                   1124:
                   1125:        if ((vp->v_flag & VONWORKLST) == 0) {
                   1126:                if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) {
                   1127:                        goto skip_scan;
                   1128:                }
                   1129:                flags &= ~PGO_CLEANIT;
                   1130:        }
                   1131: #endif /* !defined(DEBUG) */
                   1132:
1.37      chs      1133:        /*
                   1134:         * start the loop.  when scanning by list, hold the last page
                   1135:         * in the list before we start.  pages allocated after we start
                   1136:         * will be added to the end of the list, so we can stop at the
                   1137:         * current last page.
                   1138:         */
                   1139:
1.104     yamt     1140:        cleanall = (flags & PGO_CLEANIT) != 0 && wasclean &&
                   1141:            startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
                   1142:            (vp->v_flag & VONWORKLST) != 0;
1.101     yamt     1143:        dirtygen = gp->g_dirtygen;
1.56      enami    1144:        freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED;
1.37      chs      1145:        if (by_list) {
1.113     yamt     1146:                curmp.uobject = uobj;
                   1147:                curmp.offset = (voff_t)-1;
                   1148:                curmp.flags = PG_BUSY;
                   1149:                endmp.uobject = uobj;
                   1150:                endmp.offset = (voff_t)-1;
                   1151:                endmp.flags = PG_BUSY;
1.37      chs      1152:                pg = TAILQ_FIRST(&uobj->memq);
                   1153:                TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
1.70      christos 1154:                PHOLD(l);
1.37      chs      1155:        } else {
                   1156:                pg = uvm_pagelookup(uobj, off);
                   1157:        }
                   1158:        nextpg = NULL;
                   1159:        while (by_list || off < endoff) {
                   1160:
                   1161:                /*
                   1162:                 * if the current page is not interesting, move on to the next.
                   1163:                 */
                   1164:
                   1165:                KASSERT(pg == NULL || pg->uobject == uobj);
                   1166:                KASSERT(pg == NULL ||
1.53      enami    1167:                    (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
                   1168:                    (pg->flags & PG_BUSY) != 0);
1.37      chs      1169:                if (by_list) {
                   1170:                        if (pg == &endmp) {
                   1171:                                break;
                   1172:                        }
                   1173:                        if (pg->offset < startoff || pg->offset >= endoff ||
                   1174:                            pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1.101     yamt     1175:                                if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1.149     thorpej  1176:                                        wasclean = false;
1.101     yamt     1177:                                }
1.37      chs      1178:                                pg = TAILQ_NEXT(pg, listq);
                   1179:                                continue;
                   1180:                        }
                   1181:                        off = pg->offset;
1.101     yamt     1182:                } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
                   1183:                        if (pg != NULL) {
1.149     thorpej  1184:                                wasclean = false;
1.101     yamt     1185:                        }
1.37      chs      1186:                        off += PAGE_SIZE;
                   1187:                        if (off < endoff) {
                   1188:                                pg = uvm_pagelookup(uobj, off);
                   1189:                        }
                   1190:                        continue;
                   1191:                }
1.21      chs      1192:
1.37      chs      1193:                /*
                   1194:                 * if the current page needs to be cleaned and it's busy,
                   1195:                 * wait for it to become unbusy.
                   1196:                 */
                   1197:
1.97      christos 1198:                yld = (l->l_cpu->ci_schedstate.spc_flags &
1.56      enami    1199:                    SPCF_SHOULDYIELD) && !pagedaemon;
1.97      christos 1200:                if (pg->flags & PG_BUSY || yld) {
1.72      perseant 1201:                        UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
                   1202:                        if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) {
                   1203:                                UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0);
                   1204:                                error = EDEADLK;
1.151   ! perseant 1205:                                if (busypg != NULL)
        !          1206:                                        *busypg = pg;
1.72      perseant 1207:                                break;
                   1208:                        }
1.56      enami    1209:                        KASSERT(!pagedaemon);
1.37      chs      1210:                        if (by_list) {
                   1211:                                TAILQ_INSERT_BEFORE(pg, &curmp, listq);
                   1212:                                UVMHIST_LOG(ubchist, "curmp next %p",
1.53      enami    1213:                                    TAILQ_NEXT(&curmp, listq), 0,0,0);
1.37      chs      1214:                        }
1.97      christos 1215:                        if (yld) {
1.49      chs      1216:                                simple_unlock(slock);
1.145     ad       1217:                                preempt();
1.49      chs      1218:                                simple_lock(slock);
                   1219:                        } else {
                   1220:                                pg->flags |= PG_WANTED;
                   1221:                                UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
                   1222:                                simple_lock(slock);
                   1223:                        }
1.37      chs      1224:                        if (by_list) {
                   1225:                                UVMHIST_LOG(ubchist, "after next %p",
1.53      enami    1226:                                    TAILQ_NEXT(&curmp, listq), 0,0,0);
1.37      chs      1227:                                pg = TAILQ_NEXT(&curmp, listq);
                   1228:                                TAILQ_REMOVE(&uobj->memq, &curmp, listq);
                   1229:                        } else {
                   1230:                                pg = uvm_pagelookup(uobj, off);
                   1231:                        }
                   1232:                        continue;
1.49      chs      1233:                }
                   1234:
                   1235:                /*
                   1236:                 * if we're freeing, remove all mappings of the page now.
                   1237:                 * if we're cleaning, check if the page is needs to be cleaned.
                   1238:                 */
                   1239:
                   1240:                if (flags & PGO_FREE) {
                   1241:                        pmap_page_protect(pg, VM_PROT_NONE);
1.101     yamt     1242:                } else if (flags & PGO_CLEANIT) {
                   1243:
                   1244:                        /*
                   1245:                         * if we still have some hope to pull this vnode off
                   1246:                         * from the syncer queue, write-protect the page.
                   1247:                         */
                   1248:
1.104     yamt     1249:                        if (cleanall && wasclean &&
                   1250:                            gp->g_dirtygen == dirtygen) {
                   1251:
                   1252:                                /*
                   1253:                                 * uobj pages get wired only by uvm_fault
                   1254:                                 * where uobj is locked.
                   1255:                                 */
                   1256:
                   1257:                                if (pg->wire_count == 0) {
                   1258:                                        pmap_page_protect(pg,
                   1259:                                            VM_PROT_READ|VM_PROT_EXECUTE);
                   1260:                                } else {
1.149     thorpej  1261:                                        cleanall = false;
1.104     yamt     1262:                                }
1.101     yamt     1263:                        }
1.49      chs      1264:                }
1.101     yamt     1265:
1.49      chs      1266:                if (flags & PGO_CLEANIT) {
                   1267:                        needs_clean = pmap_clear_modify(pg) ||
1.53      enami    1268:                            (pg->flags & PG_CLEAN) == 0;
1.49      chs      1269:                        pg->flags |= PG_CLEAN;
                   1270:                } else {
1.149     thorpej  1271:                        needs_clean = false;
1.37      chs      1272:                }
                   1273:
                   1274:                /*
                   1275:                 * if we're cleaning, build a cluster.
                   1276:                 * the cluster will consist of pages which are currently dirty,
                   1277:                 * but they will be returned to us marked clean.
                   1278:                 * if not cleaning, just operate on the one page.
                   1279:                 */
                   1280:
                   1281:                if (needs_clean) {
1.101     yamt     1282:                        KDASSERT((vp->v_flag & VONWORKLST));
1.149     thorpej  1283:                        wasclean = false;
1.37      chs      1284:                        memset(pgs, 0, sizeof(pgs));
                   1285:                        pg->flags |= PG_BUSY;
                   1286:                        UVM_PAGE_OWN(pg, "genfs_putpages");
                   1287:
                   1288:                        /*
                   1289:                         * first look backward.
                   1290:                         */
                   1291:
1.60      enami    1292:                        npages = MIN(maxpages >> 1, off >> PAGE_SHIFT);
1.37      chs      1293:                        nback = npages;
                   1294:                        uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
                   1295:                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
                   1296:                        if (nback) {
                   1297:                                memmove(&pgs[0], &pgs[npages - nback],
                   1298:                                    nback * sizeof(pgs[0]));
1.47      enami    1299:                                if (npages - nback < nback)
                   1300:                                        memset(&pgs[nback], 0,
                   1301:                                            (npages - nback) * sizeof(pgs[0]));
                   1302:                                else
                   1303:                                        memset(&pgs[npages - nback], 0,
                   1304:                                            nback * sizeof(pgs[0]));
1.37      chs      1305:                        }
                   1306:
                   1307:                        /*
                   1308:                         * then plug in our page of interest.
                   1309:                         */
                   1310:
                   1311:                        pgs[nback] = pg;
                   1312:
                   1313:                        /*
                   1314:                         * then look forward to fill in the remaining space in
                   1315:                         * the array of pages.
                   1316:                         */
                   1317:
1.60      enami    1318:                        npages = maxpages - nback - 1;
1.37      chs      1319:                        uvn_findpages(uobj, off + PAGE_SIZE, &npages,
                   1320:                            &pgs[nback + 1],
                   1321:                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
                   1322:                        npages += nback + 1;
                   1323:                } else {
                   1324:                        pgs[0] = pg;
                   1325:                        npages = 1;
1.61      enami    1326:                        nback = 0;
1.37      chs      1327:                }
                   1328:
                   1329:                /*
                   1330:                 * apply FREE or DEACTIVATE options if requested.
                   1331:                 */
                   1332:
                   1333:                if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
                   1334:                        uvm_lock_pageq();
                   1335:                }
                   1336:                for (i = 0; i < npages; i++) {
                   1337:                        tpg = pgs[i];
                   1338:                        KASSERT(tpg->uobject == uobj);
1.59      enami    1339:                        if (by_list && tpg == TAILQ_NEXT(pg, listq))
                   1340:                                pg = tpg;
1.91      enami    1341:                        if (tpg->offset < startoff || tpg->offset >= endoff)
                   1342:                                continue;
1.141     yamt     1343:                        if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) {
1.37      chs      1344:                                (void) pmap_clear_reference(tpg);
                   1345:                                uvm_pagedeactivate(tpg);
                   1346:                        } else if (flags & PGO_FREE) {
                   1347:                                pmap_page_protect(tpg, VM_PROT_NONE);
                   1348:                                if (tpg->flags & PG_BUSY) {
                   1349:                                        tpg->flags |= freeflag;
1.56      enami    1350:                                        if (pagedaemon) {
1.37      chs      1351:                                                uvmexp.paging++;
                   1352:                                                uvm_pagedequeue(tpg);
                   1353:                                        }
                   1354:                                } else {
1.59      enami    1355:
                   1356:                                        /*
                   1357:                                         * ``page is not busy''
                   1358:                                         * implies that npages is 1
                   1359:                                         * and needs_clean is false.
                   1360:                                         */
                   1361:
1.37      chs      1362:                                        nextpg = TAILQ_NEXT(tpg, listq);
                   1363:                                        uvm_pagefree(tpg);
1.89      enami    1364:                                        if (pagedaemon)
                   1365:                                                uvmexp.pdfreed++;
1.37      chs      1366:                                }
                   1367:                        }
                   1368:                }
                   1369:                if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
                   1370:                        uvm_unlock_pageq();
                   1371:                }
                   1372:                if (needs_clean) {
1.149     thorpej  1373:                        modified = true;
1.37      chs      1374:
                   1375:                        /*
                   1376:                         * start the i/o.  if we're traversing by list,
                   1377:                         * keep our place in the list with a marker page.
                   1378:                         */
                   1379:
                   1380:                        if (by_list) {
                   1381:                                TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
                   1382:                                    listq);
                   1383:                        }
1.46      chs      1384:                        simple_unlock(slock);
1.37      chs      1385:                        error = GOP_WRITE(vp, pgs, npages, flags);
1.46      chs      1386:                        simple_lock(slock);
1.37      chs      1387:                        if (by_list) {
                   1388:                                pg = TAILQ_NEXT(&curmp, listq);
                   1389:                                TAILQ_REMOVE(&uobj->memq, &curmp, listq);
                   1390:                        }
                   1391:                        if (error) {
                   1392:                                break;
                   1393:                        }
                   1394:                        if (by_list) {
                   1395:                                continue;
                   1396:                        }
                   1397:                }
                   1398:
                   1399:                /*
                   1400:                 * find the next page and continue if there was no error.
                   1401:                 */
                   1402:
                   1403:                if (by_list) {
                   1404:                        if (nextpg) {
                   1405:                                pg = nextpg;
                   1406:                                nextpg = NULL;
                   1407:                        } else {
                   1408:                                pg = TAILQ_NEXT(pg, listq);
                   1409:                        }
                   1410:                } else {
1.61      enami    1411:                        off += (npages - nback) << PAGE_SHIFT;
1.37      chs      1412:                        if (off < endoff) {
                   1413:                                pg = uvm_pagelookup(uobj, off);
                   1414:                        }
                   1415:                }
                   1416:        }
                   1417:        if (by_list) {
                   1418:                TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1.70      christos 1419:                PRELE(l);
1.37      chs      1420:        }
                   1421:
1.103     yamt     1422:        if (modified && (vp->v_flag & VWRITEMAPDIRTY) != 0 &&
1.121     reinoud  1423:            (vp->v_type != VBLK ||
1.103     yamt     1424:            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                   1425:                GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED);
                   1426:        }
                   1427:
1.37      chs      1428:        /*
                   1429:         * if we're cleaning and there was nothing to clean,
                   1430:         * take us off the syncer list.  if we started any i/o
                   1431:         * and we're doing sync i/o, wait for all writes to finish.
                   1432:         */
                   1433:
1.62      perseant 1434:        s = splbio();
1.104     yamt     1435:        if (cleanall && wasclean && gp->g_dirtygen == dirtygen &&
                   1436:            (vp->v_flag & VONWORKLST) != 0) {
1.103     yamt     1437:                vp->v_flag &= ~VWRITEMAPDIRTY;
1.137     reinoud  1438:                if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                   1439:                        vn_syncer_remove_from_worklist(vp);
1.37      chs      1440:        }
1.62      perseant 1441:        splx(s);
1.102     yamt     1442:
                   1443: #if !defined(DEBUG)
                   1444: skip_scan:
                   1445: #endif /* !defined(DEBUG) */
1.37      chs      1446:        if (!wasclean && !async) {
                   1447:                s = splbio();
1.71      pk       1448:                /*
                   1449:                 * XXX - we want simple_unlock(&global_v_numoutput_slock);
                   1450:                 *       but the slot in ltsleep() is taken!
                   1451:                 * XXX - try to recover from missed wakeups with a timeout..
                   1452:                 *       must think of something better.
                   1453:                 */
1.37      chs      1454:                while (vp->v_numoutput != 0) {
                   1455:                        vp->v_flag |= VBWAIT;
1.149     thorpej  1456:                        UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, false,
1.71      pk       1457:                            "genput2", hz);
1.46      chs      1458:                        simple_lock(slock);
1.37      chs      1459:                }
                   1460:                splx(s);
                   1461:        }
1.140     pooka    1462:        simple_unlock(slock);
1.143     hannken  1463:
                   1464:        if (has_trans)
                   1465:                fstrans_done(vp->v_mount);
                   1466:
1.53      enami    1467:        return (error);
1.37      chs      1468: }
                   1469:
                   1470: int
                   1471: genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
                   1472: {
1.130     chs      1473:        off_t off;
                   1474:        vaddr_t kva;
                   1475:        size_t len;
                   1476:        int error;
                   1477:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                   1478:
                   1479:        UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
                   1480:            vp, pgs, npages, flags);
                   1481:
                   1482:        off = pgs[0]->offset;
                   1483:        kva = uvm_pagermapin(pgs, npages,
                   1484:            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
                   1485:        len = npages << PAGE_SHIFT;
                   1486:
                   1487:        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                   1488:                            uvm_aio_biodone);
                   1489:
                   1490:        return error;
                   1491: }
                   1492:
                   1493: /*
                   1494:  * Backend routine for doing I/O to vnode pages.  Pages are already locked
                   1495:  * and mapped into kernel memory.  Here we just look up the underlying
                   1496:  * device block addresses and call the strategy routine.
                   1497:  */
                   1498:
                   1499: static int
                   1500: genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
                   1501:     enum uio_rw rw, void (*iodone)(struct buf *))
                   1502: {
1.37      chs      1503:        int s, error, run;
                   1504:        int fs_bshift, dev_bshift;
1.21      chs      1505:        off_t eof, offset, startoffset;
                   1506:        size_t bytes, iobytes, skipbytes;
                   1507:        daddr_t lbn, blkno;
                   1508:        struct buf *mbp, *bp;
1.36      chs      1509:        struct vnode *devvp;
1.148     thorpej  1510:        bool async = (flags & PGO_SYNCIO) == 0;
                   1511:        bool write = rw == UIO_WRITE;
1.130     chs      1512:        int brw = write ? B_WRITE : B_READ;
                   1513:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
1.21      chs      1514:
1.130     chs      1515:        UVMHIST_LOG(ubchist, "vp %p kva %p len 0x%x flags 0x%x",
                   1516:            vp, kva, len, flags);
1.21      chs      1517:
1.123     yamt     1518:        GOP_SIZE(vp, vp->v_size, &eof, 0);
1.121     reinoud  1519:        if (vp->v_type != VBLK) {
1.36      chs      1520:                fs_bshift = vp->v_mount->mnt_fs_bshift;
                   1521:                dev_bshift = vp->v_mount->mnt_dev_bshift;
                   1522:        } else {
                   1523:                fs_bshift = DEV_BSHIFT;
                   1524:                dev_bshift = DEV_BSHIFT;
                   1525:        }
1.37      chs      1526:        error = 0;
1.130     chs      1527:        startoffset = off;
                   1528:        bytes = MIN(len, eof - startoffset);
1.21      chs      1529:        skipbytes = 0;
                   1530:        KASSERT(bytes != 0);
                   1531:
1.130     chs      1532:        if (write) {
                   1533:                s = splbio();
                   1534:                simple_lock(&global_v_numoutput_slock);
                   1535:                vp->v_numoutput += 2;
                   1536:                simple_unlock(&global_v_numoutput_slock);
                   1537:                splx(s);
                   1538:        }
1.119     yamt     1539:        mbp = getiobuf();
1.21      chs      1540:        UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1.53      enami    1541:            vp, mbp, vp->v_numoutput, bytes);
1.130     chs      1542:        mbp->b_bufsize = len;
1.21      chs      1543:        mbp->b_data = (void *)kva;
                   1544:        mbp->b_resid = mbp->b_bcount = bytes;
1.130     chs      1545:        mbp->b_flags = B_BUSY | brw | B_AGE | (async ? (B_CALL | B_ASYNC) : 0);
                   1546:        mbp->b_iodone = iodone;
1.21      chs      1547:        mbp->b_vp = vp;
1.120     yamt     1548:        if (curproc == uvm.pagedaemon_proc)
                   1549:                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
                   1550:        else if (async)
                   1551:                BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
                   1552:        else
                   1553:                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
1.21      chs      1554:
                   1555:        bp = NULL;
                   1556:        for (offset = startoffset;
1.53      enami    1557:            bytes > 0;
                   1558:            offset += iobytes, bytes -= iobytes) {
1.21      chs      1559:                lbn = offset >> fs_bshift;
1.36      chs      1560:                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21      chs      1561:                if (error) {
                   1562:                        UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
                   1563:                        skipbytes += bytes;
                   1564:                        bytes = 0;
                   1565:                        break;
                   1566:                }
                   1567:
1.26      chs      1568:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                   1569:                    bytes);
1.21      chs      1570:                if (blkno == (daddr_t)-1) {
1.130     chs      1571:                        if (!write) {
                   1572:                                memset((char *)kva + (offset - startoffset), 0,
                   1573:                                   iobytes);
                   1574:                        }
1.21      chs      1575:                        skipbytes += iobytes;
                   1576:                        continue;
                   1577:                }
                   1578:
                   1579:                /* if it's really one i/o, don't make a second buf */
                   1580:                if (offset == startoffset && iobytes == bytes) {
                   1581:                        bp = mbp;
                   1582:                } else {
                   1583:                        UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1.53      enami    1584:                            vp, bp, vp->v_numoutput, 0);
1.120     yamt     1585:                        bp = getiobuf();
1.130     chs      1586:                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
1.21      chs      1587:                }
                   1588:                bp->b_lblkno = 0;
                   1589:
                   1590:                /* adjust physical blkno for partial blocks */
1.25      fvdl     1591:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.53      enami    1592:                    dev_bshift);
                   1593:                UVMHIST_LOG(ubchist,
                   1594:                    "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
                   1595:                    vp, offset, bp->b_bcount, bp->b_blkno);
1.114     yamt     1596:
                   1597:                VOP_STRATEGY(devvp, bp);
1.21      chs      1598:        }
                   1599:        if (skipbytes) {
1.29      chs      1600:                UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1.21      chs      1601:        }
1.120     yamt     1602:        nestiobuf_done(mbp, skipbytes, error);
1.21      chs      1603:        if (async) {
1.32      chs      1604:                UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1.53      enami    1605:                return (0);
1.21      chs      1606:        }
1.37      chs      1607:        UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
                   1608:        error = biowait(mbp);
1.134     yamt     1609:        s = splbio();
1.130     chs      1610:        (*iodone)(mbp);
1.134     yamt     1611:        splx(s);
1.21      chs      1612:        UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1.53      enami    1613:        return (error);
1.42      chs      1614: }
                   1615:
                   1616: /*
                   1617:  * VOP_PUTPAGES() for vnodes which never have pages.
                   1618:  */
                   1619:
                   1620: int
                   1621: genfs_null_putpages(void *v)
                   1622: {
                   1623:        struct vop_putpages_args /* {
                   1624:                struct vnode *a_vp;
                   1625:                voff_t a_offlo;
                   1626:                voff_t a_offhi;
                   1627:                int a_flags;
                   1628:        } */ *ap = v;
                   1629:        struct vnode *vp = ap->a_vp;
                   1630:
                   1631:        KASSERT(vp->v_uobj.uo_npages == 0);
                   1632:        simple_unlock(&vp->v_interlock);
                   1633:        return (0);
1.21      chs      1634: }
                   1635:
1.37      chs      1636: void
1.98      yamt     1637: genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
1.37      chs      1638: {
                   1639:        struct genfs_node *gp = VTOG(vp);
                   1640:
1.146     ad       1641:        rw_init(&gp->g_glock);
1.37      chs      1642:        gp->g_op = ops;
                   1643: }
                   1644:
                   1645: void
1.147     ad       1646: genfs_node_destroy(struct vnode *vp)
                   1647: {
                   1648:        struct genfs_node *gp = VTOG(vp);
                   1649:
                   1650:        rw_destroy(&gp->g_glock);
                   1651: }
                   1652:
                   1653: void
1.138     christos 1654: genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
1.21      chs      1655: {
                   1656:        int bsize;
                   1657:
1.37      chs      1658:        bsize = 1 << vp->v_mount->mnt_fs_bshift;
                   1659:        *eobp = (size + bsize - 1) & ~(bsize - 1);
1.43      chs      1660: }
                   1661:
                   1662: int
                   1663: genfs_compat_getpages(void *v)
                   1664: {
                   1665:        struct vop_getpages_args /* {
                   1666:                struct vnode *a_vp;
                   1667:                voff_t a_offset;
                   1668:                struct vm_page **a_m;
                   1669:                int *a_count;
                   1670:                int a_centeridx;
                   1671:                vm_prot_t a_access_type;
                   1672:                int a_advice;
                   1673:                int a_flags;
                   1674:        } */ *ap = v;
                   1675:
                   1676:        off_t origoffset;
                   1677:        struct vnode *vp = ap->a_vp;
                   1678:        struct uvm_object *uobj = &vp->v_uobj;
                   1679:        struct vm_page *pg, **pgs;
                   1680:        vaddr_t kva;
                   1681:        int i, error, orignpages, npages;
                   1682:        struct iovec iov;
                   1683:        struct uio uio;
1.128     ad       1684:        kauth_cred_t cred = curlwp->l_cred;
1.148     thorpej  1685:        bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.43      chs      1686:
                   1687:        error = 0;
                   1688:        origoffset = ap->a_offset;
                   1689:        orignpages = *ap->a_count;
                   1690:        pgs = ap->a_m;
                   1691:
                   1692:        if (write && (vp->v_flag & VONWORKLST) == 0) {
                   1693:                vn_syncer_add_to_worklist(vp, filedelay);
                   1694:        }
                   1695:        if (ap->a_flags & PGO_LOCKED) {
                   1696:                uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1.54      enami    1697:                    UFP_NOWAIT|UFP_NOALLOC| (write ? UFP_NORDONLY : 0));
1.43      chs      1698:
1.53      enami    1699:                return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1.43      chs      1700:        }
                   1701:        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
                   1702:                simple_unlock(&uobj->vmobjlock);
1.53      enami    1703:                return (EINVAL);
1.43      chs      1704:        }
1.115     yamt     1705:        if ((ap->a_flags & PGO_SYNCIO) == 0) {
1.117     yamt     1706:                simple_unlock(&uobj->vmobjlock);
1.115     yamt     1707:                return 0;
                   1708:        }
1.43      chs      1709:        npages = orignpages;
                   1710:        uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
                   1711:        simple_unlock(&uobj->vmobjlock);
1.53      enami    1712:        kva = uvm_pagermapin(pgs, npages,
                   1713:            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1.43      chs      1714:        for (i = 0; i < npages; i++) {
                   1715:                pg = pgs[i];
                   1716:                if ((pg->flags & PG_FAKE) == 0) {
                   1717:                        continue;
                   1718:                }
                   1719:                iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
                   1720:                iov.iov_len = PAGE_SIZE;
                   1721:                uio.uio_iov = &iov;
                   1722:                uio.uio_iovcnt = 1;
                   1723:                uio.uio_offset = origoffset + (i << PAGE_SHIFT);
                   1724:                uio.uio_rw = UIO_READ;
                   1725:                uio.uio_resid = PAGE_SIZE;
1.122     yamt     1726:                UIO_SETUP_SYSSPACE(&uio);
1.87      yamt     1727:                /* XXX vn_lock */
1.43      chs      1728:                error = VOP_READ(vp, &uio, 0, cred);
                   1729:                if (error) {
                   1730:                        break;
1.52      chs      1731:                }
                   1732:                if (uio.uio_resid) {
                   1733:                        memset(iov.iov_base, 0, uio.uio_resid);
1.43      chs      1734:                }
                   1735:        }
                   1736:        uvm_pagermapout(kva, npages);
                   1737:        simple_lock(&uobj->vmobjlock);
                   1738:        uvm_lock_pageq();
                   1739:        for (i = 0; i < npages; i++) {
                   1740:                pg = pgs[i];
                   1741:                if (error && (pg->flags & PG_FAKE) != 0) {
                   1742:                        pg->flags |= PG_RELEASED;
                   1743:                } else {
                   1744:                        pmap_clear_modify(pg);
                   1745:                        uvm_pageactivate(pg);
                   1746:                }
                   1747:        }
                   1748:        if (error) {
                   1749:                uvm_page_unbusy(pgs, npages);
                   1750:        }
                   1751:        uvm_unlock_pageq();
                   1752:        simple_unlock(&uobj->vmobjlock);
1.53      enami    1753:        return (error);
1.43      chs      1754: }
                   1755:
                   1756: int
                   1757: genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
1.138     christos 1758:     int flags)
1.43      chs      1759: {
                   1760:        off_t offset;
                   1761:        struct iovec iov;
                   1762:        struct uio uio;
1.128     ad       1763:        kauth_cred_t cred = curlwp->l_cred;
1.43      chs      1764:        struct buf *bp;
                   1765:        vaddr_t kva;
                   1766:        int s, error;
                   1767:
                   1768:        offset = pgs[0]->offset;
1.53      enami    1769:        kva = uvm_pagermapin(pgs, npages,
                   1770:            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1.43      chs      1771:
                   1772:        iov.iov_base = (void *)kva;
                   1773:        iov.iov_len = npages << PAGE_SHIFT;
                   1774:        uio.uio_iov = &iov;
1.68      yamt     1775:        uio.uio_iovcnt = 1;
1.43      chs      1776:        uio.uio_offset = offset;
                   1777:        uio.uio_rw = UIO_WRITE;
                   1778:        uio.uio_resid = npages << PAGE_SHIFT;
1.122     yamt     1779:        UIO_SETUP_SYSSPACE(&uio);
1.87      yamt     1780:        /* XXX vn_lock */
1.43      chs      1781:        error = VOP_WRITE(vp, &uio, 0, cred);
                   1782:
                   1783:        s = splbio();
1.71      pk       1784:        V_INCR_NUMOUTPUT(vp);
1.43      chs      1785:        splx(s);
                   1786:
1.119     yamt     1787:        bp = getiobuf();
1.43      chs      1788:        bp->b_flags = B_BUSY | B_WRITE | B_AGE;
                   1789:        bp->b_vp = vp;
                   1790:        bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
                   1791:        bp->b_data = (char *)kva;
                   1792:        bp->b_bcount = npages << PAGE_SHIFT;
                   1793:        bp->b_bufsize = npages << PAGE_SHIFT;
                   1794:        bp->b_resid = 0;
                   1795:        if (error) {
                   1796:                bp->b_flags |= B_ERROR;
                   1797:                bp->b_error = error;
                   1798:        }
                   1799:        uvm_aio_aiodone(bp);
1.53      enami    1800:        return (error);
1.66      jdolecek 1801: }
                   1802:
1.130     chs      1803: /*
                   1804:  * Process a uio using direct I/O.  If we reach a part of the request
                   1805:  * which cannot be processed in this fashion for some reason, just return.
                   1806:  * The caller must handle some additional part of the request using
                   1807:  * buffered I/O before trying direct I/O again.
                   1808:  */
                   1809:
                   1810: void
1.138     christos 1811: genfs_directio(struct vnode *vp, struct uio *uio, int ioflag)
1.130     chs      1812: {
                   1813:        struct vmspace *vs;
                   1814:        struct iovec *iov;
                   1815:        vaddr_t va;
                   1816:        size_t len;
                   1817:        const int mask = DEV_BSIZE - 1;
                   1818:        int error;
                   1819:
                   1820:        /*
                   1821:         * We only support direct I/O to user space for now.
                   1822:         */
                   1823:
                   1824:        if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
                   1825:                return;
                   1826:        }
                   1827:
                   1828:        /*
                   1829:         * If the vnode is mapped, we would need to get the getpages lock
                   1830:         * to stabilize the bmap, but then we would get into trouble whil e
                   1831:         * locking the pages if the pages belong to this same vnode (or a
                   1832:         * multi-vnode cascade to the same effect).  Just fall back to
                   1833:         * buffered I/O if the vnode is mapped to avoid this mess.
                   1834:         */
                   1835:
                   1836:        if (vp->v_flag & VMAPPED) {
                   1837:                return;
                   1838:        }
                   1839:
                   1840:        /*
                   1841:         * Do as much of the uio as possible with direct I/O.
                   1842:         */
                   1843:
                   1844:        vs = uio->uio_vmspace;
                   1845:        while (uio->uio_resid) {
                   1846:                iov = uio->uio_iov;
                   1847:                if (iov->iov_len == 0) {
                   1848:                        uio->uio_iov++;
                   1849:                        uio->uio_iovcnt--;
                   1850:                        continue;
                   1851:                }
                   1852:                va = (vaddr_t)iov->iov_base;
                   1853:                len = MIN(iov->iov_len, genfs_maxdio);
                   1854:                len &= ~mask;
                   1855:
                   1856:                /*
                   1857:                 * If the next chunk is smaller than DEV_BSIZE or extends past
                   1858:                 * the current EOF, then fall back to buffered I/O.
                   1859:                 */
                   1860:
                   1861:                if (len == 0 || uio->uio_offset + len > vp->v_size) {
                   1862:                        return;
                   1863:                }
                   1864:
                   1865:                /*
                   1866:                 * Check alignment.  The file offset must be at least
                   1867:                 * sector-aligned.  The exact constraint on memory alignment
                   1868:                 * is very hardware-dependent, but requiring sector-aligned
                   1869:                 * addresses there too is safe.
                   1870:                 */
                   1871:
                   1872:                if (uio->uio_offset & mask || va & mask) {
                   1873:                        return;
                   1874:                }
                   1875:                error = genfs_do_directio(vs, va, len, vp, uio->uio_offset,
                   1876:                                          uio->uio_rw);
                   1877:                if (error) {
                   1878:                        break;
                   1879:                }
1.150     christos 1880:                iov->iov_base = (char *)iov->iov_base + len;
1.130     chs      1881:                iov->iov_len -= len;
                   1882:                uio->uio_offset += len;
                   1883:                uio->uio_resid -= len;
                   1884:        }
                   1885: }
                   1886:
                   1887: /*
                   1888:  * Iodone routine for direct I/O.  We don't do much here since the request is
                   1889:  * always synchronous, so the caller will do most of the work after biowait().
                   1890:  */
                   1891:
                   1892: static void
                   1893: genfs_dio_iodone(struct buf *bp)
                   1894: {
                   1895:        int s;
                   1896:
                   1897:        KASSERT((bp->b_flags & B_ASYNC) == 0);
                   1898:        s = splbio();
                   1899:        if ((bp->b_flags & (B_READ | B_AGE)) == B_AGE) {
                   1900:                vwakeup(bp);
                   1901:        }
                   1902:        putiobuf(bp);
                   1903:        splx(s);
                   1904: }
                   1905:
                   1906: /*
                   1907:  * Process one chunk of a direct I/O request.
                   1908:  */
                   1909:
                   1910: static int
                   1911: genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
                   1912:     off_t off, enum uio_rw rw)
                   1913: {
                   1914:        struct vm_map *map;
                   1915:        struct pmap *upm, *kpm;
                   1916:        size_t klen = round_page(uva + len) - trunc_page(uva);
                   1917:        off_t spoff, epoff;
                   1918:        vaddr_t kva, puva;
                   1919:        paddr_t pa;
                   1920:        vm_prot_t prot;
                   1921:        int error, rv, poff, koff;
                   1922:        const int pgoflags = PGO_CLEANIT | PGO_SYNCIO |
                   1923:                (rw == UIO_WRITE ? PGO_FREE : 0);
                   1924:
                   1925:        /*
                   1926:         * For writes, verify that this range of the file already has fully
                   1927:         * allocated backing store.  If there are any holes, just punt and
                   1928:         * make the caller take the buffered write path.
                   1929:         */
                   1930:
                   1931:        if (rw == UIO_WRITE) {
                   1932:                daddr_t lbn, elbn, blkno;
                   1933:                int bsize, bshift, run;
                   1934:
                   1935:                bshift = vp->v_mount->mnt_fs_bshift;
                   1936:                bsize = 1 << bshift;
                   1937:                lbn = off >> bshift;
                   1938:                elbn = (off + len + bsize - 1) >> bshift;
                   1939:                while (lbn < elbn) {
                   1940:                        error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
                   1941:                        if (error) {
                   1942:                                return error;
                   1943:                        }
                   1944:                        if (blkno == (daddr_t)-1) {
                   1945:                                return ENOSPC;
                   1946:                        }
                   1947:                        lbn += 1 + run;
                   1948:                }
                   1949:        }
                   1950:
                   1951:        /*
                   1952:         * Flush any cached pages for parts of the file that we're about to
                   1953:         * access.  If we're writing, invalidate pages as well.
                   1954:         */
                   1955:
                   1956:        spoff = trunc_page(off);
                   1957:        epoff = round_page(off + len);
                   1958:        simple_lock(&vp->v_interlock);
                   1959:        error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags);
                   1960:        if (error) {
                   1961:                return error;
                   1962:        }
                   1963:
                   1964:        /*
                   1965:         * Wire the user pages and remap them into kernel memory.
                   1966:         */
                   1967:
                   1968:        prot = rw == UIO_READ ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
                   1969:        error = uvm_vslock(vs, (void *)uva, len, prot);
                   1970:        if (error) {
                   1971:                return error;
                   1972:        }
                   1973:
                   1974:        map = &vs->vm_map;
                   1975:        upm = vm_map_pmap(map);
                   1976:        kpm = vm_map_pmap(kernel_map);
                   1977:        kva = uvm_km_alloc(kernel_map, klen, 0,
                   1978:                           UVM_KMF_VAONLY | UVM_KMF_WAITVA);
                   1979:        puva = trunc_page(uva);
                   1980:        for (poff = 0; poff < klen; poff += PAGE_SIZE) {
                   1981:                rv = pmap_extract(upm, puva + poff, &pa);
                   1982:                KASSERT(rv);
                   1983:                pmap_enter(kpm, kva + poff, pa, prot, prot | PMAP_WIRED);
                   1984:        }
                   1985:        pmap_update(kpm);
                   1986:
                   1987:        /*
                   1988:         * Do the I/O.
                   1989:         */
                   1990:
                   1991:        koff = uva - trunc_page(uva);
                   1992:        error = genfs_do_io(vp, off, kva + koff, len, PGO_SYNCIO, rw,
                   1993:                            genfs_dio_iodone);
                   1994:
                   1995:        /*
                   1996:         * Tear down the kernel mapping.
                   1997:         */
                   1998:
                   1999:        pmap_remove(kpm, kva, kva + klen);
                   2000:        pmap_update(kpm);
                   2001:        uvm_km_free(kernel_map, kva, klen, UVM_KMF_VAONLY);
                   2002:
                   2003:        /*
                   2004:         * Unwire the user pages.
                   2005:         */
                   2006:
                   2007:        uvm_vsunlock(vs, (void *)uva, len);
                   2008:        return error;
                   2009: }
                   2010:
                   2011:
1.66      jdolecek 2012: static void
                   2013: filt_genfsdetach(struct knote *kn)
                   2014: {
                   2015:        struct vnode *vp = (struct vnode *)kn->kn_hook;
                   2016:
                   2017:        /* XXXLUKEM lock the struct? */
                   2018:        SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext);
                   2019: }
                   2020:
                   2021: static int
                   2022: filt_genfsread(struct knote *kn, long hint)
                   2023: {
                   2024:        struct vnode *vp = (struct vnode *)kn->kn_hook;
                   2025:
                   2026:        /*
                   2027:         * filesystem is gone, so set the EOF flag and schedule
                   2028:         * the knote for deletion.
                   2029:         */
                   2030:        if (hint == NOTE_REVOKE) {
                   2031:                kn->kn_flags |= (EV_EOF | EV_ONESHOT);
                   2032:                return (1);
                   2033:        }
                   2034:
                   2035:        /* XXXLUKEM lock the struct? */
                   2036:        kn->kn_data = vp->v_size - kn->kn_fp->f_offset;
                   2037:         return (kn->kn_data != 0);
                   2038: }
                   2039:
                   2040: static int
                   2041: filt_genfsvnode(struct knote *kn, long hint)
                   2042: {
                   2043:
                   2044:        if (kn->kn_sfflags & hint)
                   2045:                kn->kn_fflags |= hint;
                   2046:        if (hint == NOTE_REVOKE) {
                   2047:                kn->kn_flags |= EV_EOF;
                   2048:                return (1);
                   2049:        }
                   2050:        return (kn->kn_fflags != 0);
                   2051: }
                   2052:
1.96      perry    2053: static const struct filterops genfsread_filtops =
1.66      jdolecek 2054:        { 1, NULL, filt_genfsdetach, filt_genfsread };
1.96      perry    2055: static const struct filterops genfsvnode_filtops =
1.66      jdolecek 2056:        { 1, NULL, filt_genfsdetach, filt_genfsvnode };
                   2057:
                   2058: int
                   2059: genfs_kqfilter(void *v)
                   2060: {
                   2061:        struct vop_kqfilter_args /* {
                   2062:                struct vnode    *a_vp;
                   2063:                struct knote    *a_kn;
                   2064:        } */ *ap = v;
                   2065:        struct vnode *vp;
                   2066:        struct knote *kn;
                   2067:
                   2068:        vp = ap->a_vp;
                   2069:        kn = ap->a_kn;
                   2070:        switch (kn->kn_filter) {
                   2071:        case EVFILT_READ:
                   2072:                kn->kn_fop = &genfsread_filtops;
                   2073:                break;
                   2074:        case EVFILT_VNODE:
                   2075:                kn->kn_fop = &genfsvnode_filtops;
                   2076:                break;
                   2077:        default:
                   2078:                return (1);
                   2079:        }
                   2080:
                   2081:        kn->kn_hook = vp;
                   2082:
                   2083:        /* XXXLUKEM lock the struct? */
                   2084:        SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
                   2085:
                   2086:        return (0);
1.1       mycroft  2087: }
1.136     yamt     2088:
                   2089: void
                   2090: genfs_node_wrlock(struct vnode *vp)
                   2091: {
                   2092:        struct genfs_node *gp = VTOG(vp);
                   2093:
1.146     ad       2094:        rw_enter(&gp->g_glock, RW_WRITER);
1.136     yamt     2095: }
                   2096:
                   2097: void
                   2098: genfs_node_rdlock(struct vnode *vp)
                   2099: {
                   2100:        struct genfs_node *gp = VTOG(vp);
                   2101:
1.146     ad       2102:        rw_enter(&gp->g_glock, RW_READER);
1.136     yamt     2103: }
                   2104:
                   2105: void
                   2106: genfs_node_unlock(struct vnode *vp)
                   2107: {
                   2108:        struct genfs_node *gp = VTOG(vp);
                   2109:
1.146     ad       2110:        rw_exit(&gp->g_glock);
1.136     yamt     2111: }

CVSweb <webmaster@jp.NetBSD.org>