src/sys/miscfs/genfs/genfs_vnops.c - annotate

Return to genfs_vnops.c CVS log
Up to [cvs.NetBSD.org] / src / sys / miscfs / genfs
Annotation of src/sys/miscfs/genfs/genfs_vnops.c, Revision 1.28

1.27      fvdl        1: /*     $NetBSD: genfs_vnops.c,v 1.26 2001/02/05 12:26:08 chs Exp $     */
1.6       fvdl        2:
                      3: /*
                      4:  * Copyright (c) 1982, 1986, 1989, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  * 3. All advertising materials mentioning features or use of this software
                     16:  *    must display the following acknowledgement:
                     17:  *     This product includes software developed by the University of
                     18:  *     California, Berkeley and its contributors.
                     19:  * 4. Neither the name of the University nor the names of its contributors
                     20:  *    may be used to endorse or promote products derived from this software
                     21:  *    without specific prior written permission.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     24:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     25:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     26:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     27:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     28:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     29:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     30:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     31:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     32:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     33:  * SUCH DAMAGE.
                     34:  *
                     35:  */
1.5       perry      36:
1.8       thorpej    37: #include "opt_nfsserver.h"
                     38:
1.1       mycroft    39: #include <sys/param.h>
                     40: #include <sys/systm.h>
1.6       fvdl       41: #include <sys/proc.h>
1.1       mycroft    42: #include <sys/kernel.h>
                     43: #include <sys/mount.h>
                     44: #include <sys/namei.h>
                     45: #include <sys/vnode.h>
1.13      wrstuden   46: #include <sys/fcntl.h>
1.1       mycroft    47: #include <sys/malloc.h>
1.3       mycroft    48: #include <sys/poll.h>
1.1       mycroft    49:
                     50: #include <miscfs/genfs/genfs.h>
1.6       fvdl       51: #include <miscfs/specfs/specdev.h>
1.1       mycroft    52:
1.21      chs        53: #include <uvm/uvm.h>
                     54: #include <uvm/uvm_pager.h>
                     55:
1.8       thorpej    56: #ifdef NFSSERVER
                     57: #include <nfs/rpcv2.h>
                     58: #include <nfs/nfsproto.h>
                     59: #include <nfs/nfs.h>
                     60: #include <nfs/nqnfs.h>
                     61: #include <nfs/nfs_var.h>
                     62: #endif
                     63:
1.1       mycroft    64: int
1.3       mycroft    65: genfs_poll(v)
1.1       mycroft    66:        void *v;
                     67: {
1.3       mycroft    68:        struct vop_poll_args /* {
1.1       mycroft    69:                struct vnode *a_vp;
1.3       mycroft    70:                int a_events;
1.1       mycroft    71:                struct proc *a_p;
                     72:        } */ *ap = v;
                     73:
1.3       mycroft    74:        return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1.1       mycroft    75: }
                     76:
                     77: int
                     78: genfs_fsync(v)
                     79:        void *v;
                     80: {
                     81:        struct vop_fsync_args /* {
                     82:                struct vnode *a_vp;
                     83:                struct ucred *a_cred;
1.7       kleink     84:                int a_flags;
1.20      fvdl       85:                off_t offlo;
                     86:                off_t offhi;
1.1       mycroft    87:                struct proc *a_p;
                     88:        } */ *ap = v;
1.16      augustss   89:        struct vnode *vp = ap->a_vp;
1.11      mycroft    90:        int wait;
1.1       mycroft    91:
1.11      mycroft    92:        wait = (ap->a_flags & FSYNC_WAIT) != 0;
                     93:        vflushbuf(vp, wait);
                     94:        if ((ap->a_flags & FSYNC_DATAONLY) != 0)
1.7       kleink     95:                return (0);
1.11      mycroft    96:        else
1.18      mycroft    97:                return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0));
1.1       mycroft    98: }
                     99:
                    100: int
1.4       kleink    101: genfs_seek(v)
                    102:        void *v;
                    103: {
                    104:        struct vop_seek_args /* {
                    105:                struct vnode *a_vp;
                    106:                off_t a_oldoff;
                    107:                off_t a_newoff;
                    108:                struct ucred *a_ucred;
                    109:        } */ *ap = v;
                    110:
                    111:        if (ap->a_newoff < 0)
                    112:                return (EINVAL);
                    113:
                    114:        return (0);
                    115: }
                    116:
                    117: int
1.1       mycroft   118: genfs_abortop(v)
                    119:        void *v;
                    120: {
                    121:        struct vop_abortop_args /* {
                    122:                struct vnode *a_dvp;
                    123:                struct componentname *a_cnp;
                    124:        } */ *ap = v;
                    125:
                    126:        if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1.19      thorpej   127:                PNBUF_PUT(ap->a_cnp->cn_pnbuf);
1.1       mycroft   128:        return (0);
1.13      wrstuden  129: }
                    130:
                    131: int
                    132: genfs_fcntl(v)
                    133:        void *v;
                    134: {
                    135:        struct vop_fcntl_args /* {
                    136:                struct vnode *a_vp;
                    137:                u_int a_command;
                    138:                caddr_t a_data;
                    139:                int a_fflag;
                    140:                struct ucred *a_cred;
                    141:                struct proc *a_p;
                    142:        } */ *ap = v;
                    143:
                    144:        if (ap->a_command == F_SETFL)
                    145:                return (0);
                    146:        else
                    147:                return (EOPNOTSUPP);
1.1       mycroft   148: }
                    149:
                    150: /*ARGSUSED*/
                    151: int
                    152: genfs_badop(v)
                    153:        void *v;
                    154: {
                    155:
                    156:        panic("genfs: bad op");
                    157: }
                    158:
                    159: /*ARGSUSED*/
                    160: int
                    161: genfs_nullop(v)
                    162:        void *v;
                    163: {
                    164:
                    165:        return (0);
1.10      kleink    166: }
                    167:
                    168: /*ARGSUSED*/
                    169: int
                    170: genfs_einval(v)
                    171:        void *v;
                    172: {
                    173:
                    174:        return (EINVAL);
1.1       mycroft   175: }
                    176:
                    177: /*ARGSUSED*/
                    178: int
                    179: genfs_eopnotsupp(v)
                    180:        void *v;
                    181: {
                    182:
                    183:        return (EOPNOTSUPP);
                    184: }
                    185:
1.12      wrstuden  186: /*
                    187:  * Called when an fs doesn't support a particular vop but the vop needs to
                    188:  * vrele, vput, or vunlock passed in vnodes.
                    189:  */
                    190: int
                    191: genfs_eopnotsupp_rele(v)
                    192:        void *v;
                    193: {
                    194:        struct vop_generic_args /*
                    195:                struct vnodeop_desc *a_desc;
                    196:                / * other random data follows, presumably * /
                    197:        } */ *ap = v;
                    198:        struct vnodeop_desc *desc = ap->a_desc;
                    199:        struct vnode *vp;
                    200:        int flags, i, j, offset;
                    201:
                    202:        flags = desc->vdesc_flags;
                    203:        for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
                    204:                if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
                    205:                        break;  /* stop at end of list */
                    206:                if ((j = flags & VDESC_VP0_WILLPUT)) {
                    207:                        vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap);
                    208:                        switch (j) {
                    209:                        case VDESC_VP0_WILLPUT:
                    210:                                vput(vp);
                    211:                                break;
                    212:                        case VDESC_VP0_WILLUNLOCK:
                    213:                                VOP_UNLOCK(vp, 0);
                    214:                                break;
                    215:                        case VDESC_VP0_WILLRELE:
                    216:                                vrele(vp);
                    217:                                break;
                    218:                        }
                    219:                }
                    220:        }
                    221:
                    222:        return (EOPNOTSUPP);
                    223: }
                    224:
1.1       mycroft   225: /*ARGSUSED*/
                    226: int
                    227: genfs_ebadf(v)
                    228:        void *v;
                    229: {
                    230:
                    231:        return (EBADF);
1.9       matthias  232: }
                    233:
                    234: /* ARGSUSED */
                    235: int
                    236: genfs_enoioctl(v)
                    237:        void *v;
                    238: {
                    239:
                    240:        return (ENOTTY);
1.6       fvdl      241: }
                    242:
                    243:
                    244: /*
1.15      fvdl      245:  * Eliminate all activity associated with the requested vnode
1.6       fvdl      246:  * and with all vnodes aliased to the requested vnode.
                    247:  */
                    248: int
                    249: genfs_revoke(v)
                    250:        void *v;
                    251: {
                    252:        struct vop_revoke_args /* {
                    253:                struct vnode *a_vp;
                    254:                int a_flags;
                    255:        } */ *ap = v;
                    256:        struct vnode *vp, *vq;
                    257:        struct proc *p = curproc;       /* XXX */
                    258:
                    259: #ifdef DIAGNOSTIC
                    260:        if ((ap->a_flags & REVOKEALL) == 0)
                    261:                panic("genfs_revoke: not revokeall");
                    262: #endif
                    263:
                    264:        vp = ap->a_vp;
                    265:        simple_lock(&vp->v_interlock);
                    266:
                    267:        if (vp->v_flag & VALIASED) {
                    268:                /*
                    269:                 * If a vgone (or vclean) is already in progress,
                    270:                 * wait until it is done and return.
                    271:                 */
                    272:                if (vp->v_flag & VXLOCK) {
                    273:                        vp->v_flag |= VXWANT;
                    274:                        simple_unlock(&vp->v_interlock);
                    275:                        tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
                    276:                        return (0);
                    277:                }
                    278:                /*
                    279:                 * Ensure that vp will not be vgone'd while we
                    280:                 * are eliminating its aliases.
                    281:                 */
                    282:                vp->v_flag |= VXLOCK;
                    283:                simple_unlock(&vp->v_interlock);
                    284:                while (vp->v_flag & VALIASED) {
                    285:                        simple_lock(&spechash_slock);
                    286:                        for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
                    287:                                if (vq->v_rdev != vp->v_rdev ||
                    288:                                    vq->v_type != vp->v_type || vp == vq)
                    289:                                        continue;
                    290:                                simple_unlock(&spechash_slock);
                    291:                                vgone(vq);
                    292:                                break;
                    293:                        }
                    294:                        if (vq == NULLVP)
                    295:                                simple_unlock(&spechash_slock);
                    296:                }
                    297:                /*
                    298:                 * Remove the lock so that vgone below will
                    299:                 * really eliminate the vnode after which time
                    300:                 * vgone will awaken any sleepers.
                    301:                 */
                    302:                simple_lock(&vp->v_interlock);
                    303:                vp->v_flag &= ~VXLOCK;
                    304:        }
                    305:        vgonel(vp, p);
                    306:        return (0);
                    307: }
                    308:
                    309: /*
1.12      wrstuden  310:  * Lock the node.
1.6       fvdl      311:  */
                    312: int
1.12      wrstuden  313: genfs_lock(v)
1.6       fvdl      314:        void *v;
                    315: {
                    316:        struct vop_lock_args /* {
                    317:                struct vnode *a_vp;
                    318:                int a_flags;
                    319:        } */ *ap = v;
                    320:        struct vnode *vp = ap->a_vp;
                    321:
1.12      wrstuden  322:        return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock));
1.6       fvdl      323: }
                    324:
                    325: /*
1.12      wrstuden  326:  * Unlock the node.
1.6       fvdl      327:  */
                    328: int
1.12      wrstuden  329: genfs_unlock(v)
1.6       fvdl      330:        void *v;
                    331: {
                    332:        struct vop_unlock_args /* {
                    333:                struct vnode *a_vp;
                    334:                int a_flags;
                    335:        } */ *ap = v;
                    336:        struct vnode *vp = ap->a_vp;
                    337:
1.12      wrstuden  338:        return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE,
                    339:                &vp->v_interlock));
1.6       fvdl      340: }
                    341:
                    342: /*
1.12      wrstuden  343:  * Return whether or not the node is locked.
1.6       fvdl      344:  */
                    345: int
1.12      wrstuden  346: genfs_islocked(v)
1.6       fvdl      347:        void *v;
                    348: {
                    349:        struct vop_islocked_args /* {
                    350:                struct vnode *a_vp;
                    351:        } */ *ap = v;
                    352:        struct vnode *vp = ap->a_vp;
                    353:
1.12      wrstuden  354:        return (lockstatus(&vp->v_lock));
                    355: }
                    356:
                    357: /*
                    358:  * Stubs to use when there is no locking to be done on the underlying object.
                    359:  */
                    360: int
                    361: genfs_nolock(v)
                    362:        void *v;
                    363: {
                    364:        struct vop_lock_args /* {
                    365:                struct vnode *a_vp;
                    366:                int a_flags;
                    367:                struct proc *a_p;
                    368:        } */ *ap = v;
                    369:
                    370:        /*
                    371:         * Since we are not using the lock manager, we must clear
                    372:         * the interlock here.
                    373:         */
                    374:        if (ap->a_flags & LK_INTERLOCK)
                    375:                simple_unlock(&ap->a_vp->v_interlock);
                    376:        return (0);
                    377: }
                    378:
                    379: int
                    380: genfs_nounlock(v)
                    381:        void *v;
                    382: {
                    383:        return (0);
                    384: }
                    385:
                    386: int
                    387: genfs_noislocked(v)
                    388:        void *v;
                    389: {
                    390:        return (0);
1.8       thorpej   391: }
                    392:
                    393: /*
                    394:  * Local lease check for NFS servers.  Just set up args and let
                    395:  * nqsrv_getlease() do the rest.  If NFSSERVER is not in the kernel,
                    396:  * this is a null operation.
                    397:  */
                    398: int
                    399: genfs_lease_check(v)
                    400:        void *v;
                    401: {
                    402: #ifdef NFSSERVER
                    403:        struct vop_lease_args /* {
                    404:                struct vnode *a_vp;
                    405:                struct proc *a_p;
                    406:                struct ucred *a_cred;
                    407:                int a_flag;
                    408:        } */ *ap = v;
                    409:        u_int32_t duration = 0;
                    410:        int cache;
                    411:        u_quad_t frev;
                    412:
                    413:        (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
                    414:            NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
                    415:        return (0);
                    416: #else
                    417:        return (0);
                    418: #endif /* NFSSERVER */
1.21      chs       419: }
                    420:
                    421: /*
                    422:  * generic VM getpages routine.
                    423:  * Return PG_BUSY pages for the given range,
                    424:  * reading from backing store if necessary.
                    425:  */
                    426:
                    427: int
                    428: genfs_getpages(v)
                    429:        void *v;
                    430: {
                    431:        struct vop_getpages_args /* {
                    432:                struct vnode *a_vp;
                    433:                voff_t a_offset;
                    434:                vm_page_t *a_m;
                    435:                int *a_count;
                    436:                int a_centeridx;
                    437:                vm_prot_t a_access_type;
                    438:                int a_advice;
                    439:                int a_flags;
                    440:        } */ *ap = v;
                    441:
1.26      chs       442:        off_t newsize, eof;
                    443:        off_t offset, origoffset, startoffset, endoffset, raoffset;
1.21      chs       444:        daddr_t lbn, blkno;
                    445:        int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
                    446:        int fs_bshift, fs_bsize, dev_bshift, dev_bsize;
                    447:        int flags = ap->a_flags;
                    448:        size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
                    449:        vaddr_t kva;
                    450:        struct buf *bp, *mbp;
                    451:        struct vnode *vp = ap->a_vp;
                    452:        struct uvm_object *uobj = &vp->v_uvm.u_obj;
                    453:        struct vm_page *pgs[16];                        /* XXXUBC 16 */
                    454:        struct ucred *cred = curproc->p_ucred;          /* XXXUBC curproc */
                    455:        boolean_t async = (flags & PGO_SYNCIO) == 0;
                    456:        boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
                    457:        boolean_t sawhole = FALSE;
                    458:        UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
                    459:
                    460:        /* XXXUBC temp limit */
                    461:        if (*ap->a_count > 16) {
                    462:                return EINVAL;
                    463:        }
                    464:
1.26      chs       465:        error = 0;
                    466:        origoffset = ap->a_offset;
                    467:        orignpages = *ap->a_count;
                    468:        if (flags & PGO_PASTEOF) {
                    469:                newsize = MAX(vp->v_uvm.u_size,
                    470:                              origoffset + (orignpages << PAGE_SHIFT));
                    471:        } else {
                    472:                newsize = vp->v_uvm.u_size;
                    473:        }
                    474:        error = VOP_SIZE(vp, newsize, &eof);
1.21      chs       475:        if (error) {
                    476:                return error;
                    477:        }
                    478:
                    479: #ifdef DIAGNOSTIC
                    480:        if (ap->a_centeridx < 0 || ap->a_centeridx > *ap->a_count) {
                    481:                panic("genfs_getpages: centeridx %d out of range",
                    482:                      ap->a_centeridx);
                    483:        }
1.26      chs       484:        if (origoffset & (PAGE_SIZE - 1) || origoffset < 0) {
1.21      chs       485:                panic("genfs_getpages: offset 0x%x", (int)ap->a_offset);
                    486:        }
                    487:        if (*ap->a_count < 0) {
                    488:                panic("genfs_getpages: count %d < 0", *ap->a_count);
                    489:        }
                    490: #endif
                    491:
                    492:        /*
                    493:         * Bounds-check the request.
                    494:         */
                    495:
                    496:        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= eof &&
                    497:            (flags & PGO_PASTEOF) == 0) {
                    498:                if ((flags & PGO_LOCKED) == 0) {
                    499:                        simple_unlock(&uobj->vmobjlock);
                    500:                }
                    501:                UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
                    502:                            origoffset, *ap->a_count, eof,0);
                    503:                return EINVAL;
                    504:        }
                    505:
                    506:        /*
                    507:         * For PGO_LOCKED requests, just return whatever's in memory.
                    508:         */
                    509:
                    510:        if (flags & PGO_LOCKED) {
                    511:                uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
                    512:                              UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
                    513:
                    514:                return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
                    515:        }
                    516:
                    517:        /* vnode is VOP_LOCKed, uobj is locked */
                    518:
                    519:        if (write && (vp->v_flag & VONWORKLST) == 0) {
                    520:                vn_syncer_add_to_worklist(vp, filedelay);
                    521:        }
                    522:
                    523:        /*
                    524:         * find the requested pages and make some simple checks.
                    525:         * leave space in the page array for a whole block.
                    526:         */
                    527:
                    528:        fs_bshift = vp->v_mount->mnt_fs_bshift;
                    529:        fs_bsize = 1 << fs_bshift;
                    530:        dev_bshift = vp->v_mount->mnt_dev_bshift;
                    531:        dev_bsize = 1 << dev_bshift;
                    532:        KASSERT((eof & (dev_bsize - 1)) == 0);
                    533:
1.26      chs       534:        if ((flags & PGO_PASTEOF) == 0) {
                    535:                orignpages = MIN(orignpages,
                    536:                    round_page(eof - origoffset) >> PAGE_SHIFT);
1.21      chs       537:        }
                    538:        npages = orignpages;
                    539:        startoffset = origoffset & ~(fs_bsize - 1);
                    540:        endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
                    541:                                + fs_bsize - 1) & ~(fs_bsize - 1));
1.26      chs       542:        if ((flags & PGO_PASTEOF) == 0) {
                    543:                endoffset = MIN(endoffset, round_page(eof));
                    544:        }
1.21      chs       545:        ridx = (origoffset - startoffset) >> PAGE_SHIFT;
                    546:
                    547:        memset(pgs, 0, sizeof(pgs));
                    548:        uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
                    549:
                    550:        /*
                    551:         * if PGO_OVERWRITE is set, don't bother reading the pages.
                    552:         * PGO_OVERWRITE also means that the caller guarantees
                    553:         * that the pages already have backing store allocated.
                    554:         */
                    555:
                    556:        if (flags & PGO_OVERWRITE) {
                    557:                UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
                    558:
                    559:                for (i = 0; i < npages; i++) {
                    560:                        struct vm_page *pg = pgs[ridx + i];
                    561:
                    562:                        if (pg->flags & PG_FAKE) {
                    563:                                uvm_pagezero(pg);
                    564:                                pg->flags &= ~(PG_FAKE);
                    565:                        }
                    566:                        pg->flags &= ~(PG_RDONLY);
                    567:                }
1.26      chs       568:                npages += ridx;
1.21      chs       569:                goto out;
                    570:        }
                    571:
                    572:        /*
                    573:         * if the pages are already resident, just return them.
                    574:         */
                    575:
                    576:        for (i = 0; i < npages; i++) {
                    577:                struct vm_page *pg = pgs[ridx + i];
                    578:
                    579:                if ((pg->flags & PG_FAKE) ||
                    580:                    (write && (pg->flags & PG_RDONLY))) {
                    581:                        break;
                    582:                }
                    583:        }
                    584:        if (i == npages) {
                    585:                UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
                    586:                raoffset = origoffset + (orignpages << PAGE_SHIFT);
1.26      chs       587:                npages += ridx;
1.21      chs       588:                goto raout;
                    589:        }
                    590:
                    591:        /*
                    592:         * the page wasn't resident and we're not overwriting,
                    593:         * so we're going to have to do some i/o.
                    594:         * find any additional pages needed to cover the expanded range.
                    595:         */
                    596:
                    597:        if (startoffset != origoffset) {
                    598:
                    599:                /*
                    600:                 * XXXUBC we need to avoid deadlocks caused by locking
                    601:                 * additional pages at lower offsets than pages we
                    602:                 * already have locked.  for now, unlock them all and
                    603:                 * start over.
                    604:                 */
                    605:
                    606:                for (i = 0; i < npages; i++) {
                    607:                        struct vm_page *pg = pgs[ridx + i];
                    608:
                    609:                        if (pg->flags & PG_FAKE) {
                    610:                                pg->flags |= PG_RELEASED;
                    611:                        }
                    612:                }
                    613:                uvm_page_unbusy(&pgs[ridx], npages);
                    614:                memset(pgs, 0, sizeof(pgs));
                    615:
                    616:                UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
                    617:                            startoffset, endoffset, 0,0);
                    618:                npages = (endoffset - startoffset) >> PAGE_SHIFT;
                    619:                npgs = npages;
                    620:                uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
                    621:        }
                    622:        simple_unlock(&uobj->vmobjlock);
                    623:
                    624:        /*
                    625:         * read the desired page(s).
                    626:         */
                    627:
                    628:        totalbytes = npages << PAGE_SHIFT;
1.26      chs       629:        bytes = MIN(totalbytes, eof - startoffset);
1.21      chs       630:        tailbytes = totalbytes - bytes;
                    631:        skipbytes = 0;
                    632:
                    633:        kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
                    634:                             UVMPAGER_MAPIN_READ);
                    635:
                    636:        s = splbio();
                    637:        mbp = pool_get(&bufpool, PR_WAITOK);
                    638:        splx(s);
                    639:        mbp->b_bufsize = totalbytes;
                    640:        mbp->b_data = (void *)kva;
                    641:        mbp->b_resid = mbp->b_bcount = bytes;
                    642:        mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
                    643:        mbp->b_iodone = uvm_aio_biodone;
                    644:        mbp->b_vp = vp;
                    645:        LIST_INIT(&mbp->b_dep);
                    646:
                    647:        /*
                    648:         * if EOF is in the middle of the last page, zero the part past EOF.
                    649:         */
                    650:
1.23      chs       651:        if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) {
1.21      chs       652:                memset((void *)(kva + bytes), 0, tailbytes);
                    653:        }
                    654:
                    655:        /*
                    656:         * now loop over the pages, reading as needed.
                    657:         */
                    658:
                    659:        if (write) {
                    660:                lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL);
                    661:        } else {
                    662:                lockmgr(&vp->v_glock, LK_SHARED, NULL);
                    663:        }
                    664:
                    665:        bp = NULL;
                    666:        for (offset = startoffset;
                    667:             bytes > 0;
                    668:             offset += iobytes, bytes -= iobytes) {
                    669:
                    670:                /*
                    671:                 * skip pages which don't need to be read.
                    672:                 */
                    673:
                    674:                pidx = (offset - startoffset) >> PAGE_SHIFT;
                    675:                while ((pgs[pidx]->flags & PG_FAKE) == 0) {
                    676:                        size_t b;
                    677:
1.24      chs       678:                        KASSERT((offset & (PAGE_SIZE - 1)) == 0);
1.26      chs       679:                        b = MIN(PAGE_SIZE, bytes);
1.21      chs       680:                        offset += b;
                    681:                        bytes -= b;
                    682:                        skipbytes += b;
                    683:                        pidx++;
                    684:                        UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
                    685:                                    offset, 0,0,0);
                    686:                        if (bytes == 0) {
                    687:                                goto loopdone;
                    688:                        }
                    689:                }
                    690:
                    691:                /*
                    692:                 * bmap the file to find out the blkno to read from and
                    693:                 * how much we can read in one i/o.  if bmap returns an error,
                    694:                 * skip the rest of the top-level i/o.
                    695:                 */
                    696:
                    697:                lbn = offset >> fs_bshift;
                    698:                error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
                    699:                if (error) {
                    700:                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
                    701:                                    lbn, error,0,0);
                    702:                        skipbytes += bytes;
                    703:                        goto loopdone;
                    704:                }
                    705:
                    706:                /*
                    707:                 * see how many pages can be read with this i/o.
                    708:                 * reduce the i/o size if necessary to avoid
                    709:                 * overwriting pages with valid data.
                    710:                 */
                    711:
1.26      chs       712:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                    713:                    bytes);
1.21      chs       714:                if (offset + iobytes > round_page(offset)) {
                    715:                        pcount = 1;
                    716:                        while (pidx + pcount < npages &&
                    717:                               pgs[pidx + pcount]->flags & PG_FAKE) {
                    718:                                pcount++;
                    719:                        }
1.26      chs       720:                        iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
1.21      chs       721:                                      (offset - trunc_page(offset)));
                    722:                }
                    723:
                    724:                /*
                    725:                 * if this block isn't allocated, zero it instead of reading it.
                    726:                 * if this is a read access, mark the pages we zeroed PG_RDONLY.
                    727:                 */
                    728:
                    729:                if (blkno < 0) {
                    730:                        UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
                    731:
                    732:                        sawhole = TRUE;
                    733:                        memset((char *)kva + (offset - startoffset), 0,
                    734:                               iobytes);
                    735:                        skipbytes += iobytes;
                    736:
                    737:                        if (!write) {
                    738:                                int holepages =
                    739:                                        (round_page(offset + iobytes) -
                    740:                                         trunc_page(offset)) >> PAGE_SHIFT;
                    741:                                for (i = 0; i < holepages; i++) {
                    742:                                        pgs[pidx + i]->flags |= PG_RDONLY;
                    743:                                }
                    744:                        }
                    745:                        continue;
                    746:                }
                    747:
                    748:                /*
                    749:                 * allocate a sub-buf for this piece of the i/o
                    750:                 * (or just use mbp if there's only 1 piece),
                    751:                 * and start it going.
                    752:                 */
                    753:
                    754:                if (offset == startoffset && iobytes == bytes) {
                    755:                        bp = mbp;
                    756:                } else {
                    757:                        s = splbio();
                    758:                        bp = pool_get(&bufpool, PR_WAITOK);
                    759:                        splx(s);
                    760:                        bp->b_data = (char *)kva + offset - startoffset;
                    761:                        bp->b_resid = bp->b_bcount = iobytes;
                    762:                        bp->b_flags = B_BUSY|B_READ|B_CALL;
                    763:                        bp->b_iodone = uvm_aio_biodone1;
                    764:                        bp->b_vp = vp;
                    765:                        LIST_INIT(&bp->b_dep);
                    766:                }
                    767:                bp->b_lblkno = 0;
                    768:                bp->b_private = mbp;
                    769:
                    770:                /* adjust physical blkno for partial blocks */
1.25      fvdl      771:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.21      chs       772:                                       dev_bshift);
                    773:
                    774:                UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
                    775:                            bp, offset, iobytes, bp->b_blkno);
                    776:
                    777:                VOP_STRATEGY(bp);
                    778:        }
                    779:
                    780: loopdone:
                    781:        if (skipbytes) {
                    782:                s = splbio();
                    783:                if (error) {
                    784:                        mbp->b_flags |= B_ERROR;
                    785:                        mbp->b_error = error;
                    786:                }
                    787:                mbp->b_resid -= skipbytes;
                    788:                if (mbp->b_resid == 0) {
                    789:                        biodone(mbp);
                    790:                }
                    791:                splx(s);
                    792:        }
                    793:
                    794:        if (async) {
                    795:                UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);
                    796:                lockmgr(&vp->v_glock, LK_RELEASE, NULL);
                    797:                return EINPROGRESS;
                    798:        }
                    799:        if (bp != NULL) {
                    800:                error = biowait(mbp);
                    801:        }
                    802:        s = splbio();
                    803:        pool_put(&bufpool, mbp);
                    804:        splx(s);
                    805:        uvm_pagermapout(kva, npages);
1.24      chs       806:        raoffset = startoffset + totalbytes;
1.21      chs       807:
                    808:        /*
                    809:         * if this we encountered a hole then we have to do a little more work.
                    810:         * for read faults, we marked the page PG_RDONLY so that future
                    811:         * write accesses to the page will fault again.
                    812:         * for write faults, we must make sure that the backing store for
                    813:         * the page is completely allocated while the pages are locked.
                    814:         */
                    815:
                    816:        if (error == 0 && sawhole && write) {
                    817:                error = VOP_BALLOCN(vp, startoffset, npages << PAGE_SHIFT,
                    818:                                   cred, 0);
                    819:                if (error) {
                    820:                        UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d",
                    821:                                    lbn, error,0,0);
                    822:                        lockmgr(&vp->v_glock, LK_RELEASE, NULL);
                    823:                        simple_lock(&uobj->vmobjlock);
                    824:                        goto out;
                    825:                }
                    826:        }
                    827:        lockmgr(&vp->v_glock, LK_RELEASE, NULL);
                    828:        simple_lock(&uobj->vmobjlock);
                    829:
                    830:        /*
                    831:         * see if we want to start any readahead.
                    832:         * XXXUBC for now, just read the next 128k on 64k boundaries.
                    833:         * this is pretty nonsensical, but it is 50% faster than reading
                    834:         * just the next 64k.
                    835:         */
                    836:
                    837: raout:
1.24      chs       838:        if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
1.21      chs       839:            PAGE_SHIFT <= 16) {
                    840:                int racount;
                    841:
                    842:                racount = 1 << (16 - PAGE_SHIFT);
                    843:                (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
                    844:                                    VM_PROT_READ, 0, 0);
                    845:                simple_lock(&uobj->vmobjlock);
                    846:
                    847:                racount = 1 << (16 - PAGE_SHIFT);
                    848:                (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0,
                    849:                                    VM_PROT_READ, 0, 0);
                    850:                simple_lock(&uobj->vmobjlock);
                    851:        }
                    852:
                    853:        /*
                    854:         * we're almost done!  release the pages...
                    855:         * for errors, we free the pages.
                    856:         * otherwise we activate them and mark them as valid and clean.
                    857:         * also, unbusy pages that were not actually requested.
                    858:         */
                    859:
                    860: out:
                    861:        if (error) {
                    862:                uvm_lock_pageq();
                    863:                for (i = 0; i < npages; i++) {
                    864:                        if (pgs[i] == NULL) {
                    865:                                continue;
                    866:                        }
                    867:                        UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
                    868:                                    pgs[i], pgs[i]->flags, 0,0);
1.26      chs       869:                        if (pgs[i]->flags & PG_WANTED) {
                    870:                                wakeup(pgs[i]);
                    871:                        }
                    872:                        if (pgs[i]->flags & PG_RELEASED) {
                    873:                                uvm_unlock_pageq();
                    874:                                (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
                    875:                                uvm_lock_pageq();
1.21      chs       876:                                continue;
                    877:                        }
1.26      chs       878:                        if (pgs[i]->flags & PG_FAKE) {
                    879:                                uvm_pagefree(pgs[i]);
1.21      chs       880:                        }
                    881:                }
                    882:                uvm_unlock_pageq();
                    883:                simple_unlock(&uobj->vmobjlock);
                    884:                UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
                    885:                return error;
                    886:        }
                    887:
                    888:        UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
1.26      chs       889:        uvm_lock_pageq();
1.21      chs       890:        for (i = 0; i < npages; i++) {
                    891:                if (pgs[i] == NULL) {
                    892:                        continue;
                    893:                }
                    894:                UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
                    895:                            pgs[i], pgs[i]->flags, 0,0);
                    896:                if (pgs[i]->flags & PG_FAKE) {
                    897:                        UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
                    898:                                    pgs[i], pgs[i]->offset,0,0);
                    899:                        pgs[i]->flags &= ~(PG_FAKE);
                    900:                        pmap_clear_modify(pgs[i]);
                    901:                        pmap_clear_reference(pgs[i]);
                    902:                }
                    903:                if (write) {
                    904:                        pgs[i]->flags &= ~(PG_RDONLY);
                    905:                }
                    906:                if (i < ridx || i >= ridx + orignpages || async) {
                    907:                        UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
                    908:                                    pgs[i], pgs[i]->offset,0,0);
                    909:                        if (pgs[i]->flags & PG_WANTED) {
                    910:                                wakeup(pgs[i]);
                    911:                        }
1.26      chs       912:                        if (pgs[i]->flags & PG_RELEASED) {
                    913:                                uvm_unlock_pageq();
                    914:                                (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
                    915:                                uvm_lock_pageq();
                    916:                                continue;
1.21      chs       917:                        }
1.26      chs       918:                        uvm_pageactivate(pgs[i]);
1.21      chs       919:                        pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
                    920:                        UVM_PAGE_OWN(pgs[i], NULL);
                    921:                }
                    922:        }
1.26      chs       923:        uvm_unlock_pageq();
1.21      chs       924:        simple_unlock(&uobj->vmobjlock);
                    925:        if (ap->a_m != NULL) {
                    926:                memcpy(ap->a_m, &pgs[ridx],
                    927:                       orignpages * sizeof(struct vm_page *));
                    928:        }
                    929:        return 0;
                    930: }
                    931:
                    932: /*
                    933:  * generic VM putpages routine.
                    934:  * Write the given range of pages to backing store.
                    935:  */
                    936:
                    937: int
                    938: genfs_putpages(v)
                    939:        void *v;
                    940: {
                    941:        struct vop_putpages_args /* {
                    942:                struct vnode *a_vp;
                    943:                struct vm_page **a_m;
                    944:                int a_count;
                    945:                int a_flags;
                    946:                int *a_rtvals;
                    947:        } */ *ap = v;
                    948:
                    949:        int s, error, error2, npages, run;
                    950:        int fs_bshift, dev_bshift, dev_bsize;
                    951:        vaddr_t kva;
                    952:        off_t eof, offset, startoffset;
                    953:        size_t bytes, iobytes, skipbytes;
                    954:        daddr_t lbn, blkno;
                    955:        struct vm_page *pg;
                    956:        struct buf *mbp, *bp;
                    957:        struct vnode *vp = ap->a_vp;
                    958:        boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0;
                    959:        UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
                    960:
                    961:        simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
                    962:
                    963:        error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof);
                    964:        if (error) {
                    965:                return error;
                    966:        }
                    967:
                    968:        error = error2 = 0;
                    969:        npages = ap->a_count;
                    970:        fs_bshift = vp->v_mount->mnt_fs_bshift;
                    971:        dev_bshift = vp->v_mount->mnt_dev_bshift;
                    972:        dev_bsize = 1 << dev_bshift;
                    973:        KASSERT((eof & (dev_bsize - 1)) == 0);
                    974:
                    975:        pg = ap->a_m[0];
                    976:        startoffset = pg->offset;
1.26      chs       977:        bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
1.21      chs       978:        skipbytes = 0;
                    979:        KASSERT(bytes != 0);
                    980:
                    981:        kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK);
                    982:
                    983:        s = splbio();
                    984:        vp->v_numoutput += 2;
                    985:        mbp = pool_get(&bufpool, PR_WAITOK);
                    986:        UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
                    987:                    vp, mbp, vp->v_numoutput, bytes);
                    988:        splx(s);
                    989:        mbp->b_bufsize = npages << PAGE_SHIFT;
                    990:        mbp->b_data = (void *)kva;
                    991:        mbp->b_resid = mbp->b_bcount = bytes;
                    992:        mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
                    993:                (async ? B_CALL : 0) |
                    994:                (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
                    995:        mbp->b_iodone = uvm_aio_biodone;
                    996:        mbp->b_vp = vp;
                    997:        LIST_INIT(&mbp->b_dep);
                    998:
                    999:        bp = NULL;
                   1000:        for (offset = startoffset;
                   1001:             bytes > 0;
                   1002:             offset += iobytes, bytes -= iobytes) {
                   1003:                lbn = offset >> fs_bshift;
                   1004:                error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
                   1005:                if (error) {
                   1006:                        UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
                   1007:                        skipbytes += bytes;
                   1008:                        bytes = 0;
                   1009:                        break;
                   1010:                }
                   1011:
1.26      chs      1012:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                   1013:                    bytes);
1.21      chs      1014:                if (blkno == (daddr_t)-1) {
                   1015:                        skipbytes += iobytes;
                   1016:                        continue;
                   1017:                }
                   1018:
                   1019:                /* if it's really one i/o, don't make a second buf */
                   1020:                if (offset == startoffset && iobytes == bytes) {
                   1021:                        bp = mbp;
                   1022:                } else {
                   1023:                        s = splbio();
                   1024:                        vp->v_numoutput++;
                   1025:                        bp = pool_get(&bufpool, PR_WAITOK);
                   1026:                        UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
                   1027:                                    vp, bp, vp->v_numoutput, 0);
                   1028:                        splx(s);
                   1029:                        bp->b_data = (char *)kva +
                   1030:                                (vaddr_t)(offset - pg->offset);
                   1031:                        bp->b_resid = bp->b_bcount = iobytes;
                   1032:                        bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
                   1033:                        bp->b_iodone = uvm_aio_biodone1;
                   1034:                        bp->b_vp = vp;
                   1035:                        LIST_INIT(&bp->b_dep);
                   1036:                }
                   1037:                bp->b_lblkno = 0;
                   1038:                bp->b_private = mbp;
                   1039:
                   1040:                /* adjust physical blkno for partial blocks */
1.25      fvdl     1041:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.21      chs      1042:                                       dev_bshift);
                   1043:                UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
                   1044:                            vp, offset, bp->b_bcount, bp->b_blkno);
                   1045:                VOP_STRATEGY(bp);
                   1046:        }
                   1047:        if (skipbytes) {
                   1048:                UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0);
                   1049:                s = splbio();
                   1050:                mbp->b_resid -= skipbytes;
                   1051:                if (mbp->b_resid == 0) {
                   1052:                        biodone(mbp);
                   1053:                }
                   1054:                splx(s);
                   1055:        }
                   1056:        if (async) {
                   1057:                UVMHIST_LOG(ubchist, "returning PEND", 0,0,0,0);
                   1058:                return EINPROGRESS;
                   1059:        }
                   1060:        if (bp != NULL) {
                   1061:                UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
                   1062:                error2 = biowait(mbp);
                   1063:        }
1.22      chs      1064:        if (bioops.io_pageiodone) {
                   1065:                (*bioops.io_pageiodone)(mbp);
1.21      chs      1066:        }
                   1067:        s = splbio();
                   1068:        vwakeup(mbp);
                   1069:        pool_put(&bufpool, mbp);
                   1070:        splx(s);
                   1071:        uvm_pagermapout(kva, npages);
                   1072:        UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
                   1073:        return error ? error : error2;
                   1074: }
                   1075:
                   1076: int
                   1077: genfs_size(v)
                   1078:        void *v;
                   1079: {
                   1080:        struct vop_size_args /* {
                   1081:                struct vnode *a_vp;
                   1082:                off_t a_size;
                   1083:                off_t *a_eobp;
                   1084:        } */ *ap = v;
                   1085:        int bsize;
                   1086:
                   1087:        bsize = 1 << ap->a_vp->v_mount->mnt_fs_bshift;
1.24      chs      1088:        *ap->a_eobp = (ap->a_size + bsize - 1) & ~(bsize - 1);
1.21      chs      1089:        return 0;
1.1       mycroft  1090: }
CVSweb <webmaster@jp.NetBSD.org>