[BACK]Return to genfs_io.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / miscfs / genfs

Annotation of src/sys/miscfs/genfs/genfs_io.c, Revision 1.68.6.1

1.68.6.1! bouyer      1: /*     $NetBSD: genfs_io.c,v 1.69 2017/06/04 08:05:42 hannken Exp $    */
1.1       pooka       2:
                      3: /*
                      4:  * Copyright (c) 1982, 1986, 1989, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  * 3. Neither the name of the University nor the names of its contributors
                     16:  *    may be used to endorse or promote products derived from this software
                     17:  *    without specific prior written permission.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     20:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     21:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     22:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     23:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     24:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     25:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     26:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     27:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     28:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     29:  * SUCH DAMAGE.
                     30:  *
                     31:  */
                     32:
                     33: #include <sys/cdefs.h>
1.68.6.1! bouyer     34: __KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.69 2017/06/04 08:05:42 hannken Exp $");
1.1       pooka      35:
                     36: #include <sys/param.h>
                     37: #include <sys/systm.h>
                     38: #include <sys/proc.h>
                     39: #include <sys/kernel.h>
                     40: #include <sys/mount.h>
                     41: #include <sys/vnode.h>
                     42: #include <sys/kmem.h>
                     43: #include <sys/kauth.h>
                     44: #include <sys/fstrans.h>
1.15      pooka      45: #include <sys/buf.h>
1.1       pooka      46:
                     47: #include <miscfs/genfs/genfs.h>
                     48: #include <miscfs/genfs/genfs_node.h>
                     49: #include <miscfs/specfs/specdev.h>
                     50:
                     51: #include <uvm/uvm.h>
                     52: #include <uvm/uvm_pager.h>
                     53:
                     54: static int genfs_do_directio(struct vmspace *, vaddr_t, size_t, struct vnode *,
                     55:     off_t, enum uio_rw);
                     56: static void genfs_dio_iodone(struct buf *);
                     57:
1.59      riastrad   58: static int genfs_getpages_read(struct vnode *, struct vm_page **, int, off_t,
                     59:     off_t, bool, bool, bool, bool);
1.1       pooka      60: static int genfs_do_io(struct vnode *, off_t, vaddr_t, size_t, int, enum uio_rw,
                     61:     void (*)(struct buf *));
1.55      yamt       62: static void genfs_rel_pages(struct vm_page **, unsigned int);
1.38      chs        63: static void genfs_markdirty(struct vnode *);
1.1       pooka      64:
                     65: int genfs_maxdio = MAXPHYS;
                     66:
1.38      chs        67: static void
1.55      yamt       68: genfs_rel_pages(struct vm_page **pgs, unsigned int npages)
1.1       pooka      69: {
1.55      yamt       70:        unsigned int i;
1.1       pooka      71:
                     72:        for (i = 0; i < npages; i++) {
                     73:                struct vm_page *pg = pgs[i];
                     74:
                     75:                if (pg == NULL || pg == PGO_DONTCARE)
                     76:                        continue;
1.55      yamt       77:                KASSERT(uvm_page_locked_p(pg));
1.1       pooka      78:                if (pg->flags & PG_FAKE) {
                     79:                        pg->flags |= PG_RELEASED;
                     80:                }
                     81:        }
1.2       ad         82:        mutex_enter(&uvm_pageqlock);
1.1       pooka      83:        uvm_page_unbusy(pgs, npages);
1.2       ad         84:        mutex_exit(&uvm_pageqlock);
1.1       pooka      85: }
                     86:
1.38      chs        87: static void
                     88: genfs_markdirty(struct vnode *vp)
                     89: {
                     90:        struct genfs_node * const gp = VTOG(vp);
                     91:
1.49      rmind      92:        KASSERT(mutex_owned(vp->v_interlock));
1.38      chs        93:        gp->g_dirtygen++;
                     94:        if ((vp->v_iflag & VI_ONWORKLST) == 0) {
                     95:                vn_syncer_add_to_worklist(vp, filedelay);
                     96:        }
                     97:        if ((vp->v_iflag & (VI_WRMAP|VI_WRMAPDIRTY)) == VI_WRMAP) {
                     98:                vp->v_iflag |= VI_WRMAPDIRTY;
                     99:        }
                    100: }
                    101:
1.1       pooka     102: /*
                    103:  * generic VM getpages routine.
                    104:  * Return PG_BUSY pages for the given range,
                    105:  * reading from backing store if necessary.
                    106:  */
                    107:
                    108: int
                    109: genfs_getpages(void *v)
                    110: {
                    111:        struct vop_getpages_args /* {
                    112:                struct vnode *a_vp;
                    113:                voff_t a_offset;
                    114:                struct vm_page **a_m;
                    115:                int *a_count;
                    116:                int a_centeridx;
                    117:                vm_prot_t a_access_type;
                    118:                int a_advice;
                    119:                int a_flags;
1.22      uebayasi  120:        } */ * const ap = v;
1.1       pooka     121:
1.24      uebayasi  122:        off_t diskeof, memeof;
1.31      uebayasi  123:        int i, error, npages;
1.10      yamt      124:        const int flags = ap->a_flags;
1.22      uebayasi  125:        struct vnode * const vp = ap->a_vp;
                    126:        struct uvm_object * const uobj = &vp->v_uobj;
1.10      yamt      127:        const bool async = (flags & PGO_SYNCIO) == 0;
1.35      uebayasi  128:        const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.10      yamt      129:        const bool overwrite = (flags & PGO_OVERWRITE) != 0;
1.35      uebayasi  130:        const bool blockalloc = memwrite && (flags & PGO_NOBLOCKALLOC) == 0;
1.40      chs       131:        const bool glocked = (flags & PGO_GLOCKHELD) != 0;
1.64      hannken   132:        bool holds_wapbl = false;
                    133:        struct mount *trans_mount = NULL;
1.1       pooka     134:        UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
                    135:
                    136:        UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
                    137:            vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
                    138:
                    139:        KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
                    140:            vp->v_type == VLNK || vp->v_type == VBLK);
                    141:
                    142: startover:
                    143:        error = 0;
1.27      uebayasi  144:        const voff_t origvsize = vp->v_size;
                    145:        const off_t origoffset = ap->a_offset;
1.29      uebayasi  146:        const int orignpages = *ap->a_count;
1.33      uebayasi  147:
1.1       pooka     148:        GOP_SIZE(vp, origvsize, &diskeof, 0);
                    149:        if (flags & PGO_PASTEOF) {
1.24      uebayasi  150:                off_t newsize;
1.1       pooka     151: #if defined(DIAGNOSTIC)
                    152:                off_t writeeof;
                    153: #endif /* defined(DIAGNOSTIC) */
                    154:
                    155:                newsize = MAX(origvsize,
                    156:                    origoffset + (orignpages << PAGE_SHIFT));
                    157:                GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM);
                    158: #if defined(DIAGNOSTIC)
                    159:                GOP_SIZE(vp, vp->v_writesize, &writeeof, GOP_SIZE_MEM);
                    160:                if (newsize > round_page(writeeof)) {
1.39      pooka     161:                        panic("%s: past eof: %" PRId64 " vs. %" PRId64,
                    162:                            __func__, newsize, round_page(writeeof));
1.1       pooka     163:                }
                    164: #endif /* defined(DIAGNOSTIC) */
                    165:        } else {
                    166:                GOP_SIZE(vp, origvsize, &memeof, GOP_SIZE_MEM);
                    167:        }
                    168:        KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
                    169:        KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
                    170:        KASSERT(orignpages > 0);
                    171:
                    172:        /*
                    173:         * Bounds-check the request.
                    174:         */
                    175:
                    176:        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
                    177:                if ((flags & PGO_LOCKED) == 0) {
1.49      rmind     178:                        mutex_exit(uobj->vmobjlock);
1.1       pooka     179:                }
                    180:                UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
                    181:                    origoffset, *ap->a_count, memeof,0);
                    182:                error = EINVAL;
                    183:                goto out_err;
                    184:        }
                    185:
                    186:        /* uobj is locked */
                    187:
                    188:        if ((flags & PGO_NOTIMESTAMP) == 0 &&
                    189:            (vp->v_type != VBLK ||
                    190:            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                    191:                int updflags = 0;
                    192:
                    193:                if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
                    194:                        updflags = GOP_UPDATE_ACCESSED;
                    195:                }
1.35      uebayasi  196:                if (memwrite) {
1.1       pooka     197:                        updflags |= GOP_UPDATE_MODIFIED;
                    198:                }
                    199:                if (updflags != 0) {
                    200:                        GOP_MARKUPDATE(vp, updflags);
                    201:                }
                    202:        }
                    203:
                    204:        /*
                    205:         * For PGO_LOCKED requests, just return whatever's in memory.
                    206:         */
                    207:
                    208:        if (flags & PGO_LOCKED) {
                    209:                int nfound;
1.31      uebayasi  210:                struct vm_page *pg;
1.1       pooka     211:
1.40      chs       212:                KASSERT(!glocked);
1.1       pooka     213:                npages = *ap->a_count;
                    214: #if defined(DEBUG)
                    215:                for (i = 0; i < npages; i++) {
                    216:                        pg = ap->a_m[i];
                    217:                        KASSERT(pg == NULL || pg == PGO_DONTCARE);
                    218:                }
                    219: #endif /* defined(DEBUG) */
                    220:                nfound = uvn_findpages(uobj, origoffset, &npages,
1.35      uebayasi  221:                    ap->a_m, UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0));
1.1       pooka     222:                KASSERT(npages == *ap->a_count);
                    223:                if (nfound == 0) {
                    224:                        error = EBUSY;
                    225:                        goto out_err;
                    226:                }
1.23      uebayasi  227:                if (!genfs_node_rdtrylock(vp)) {
1.1       pooka     228:                        genfs_rel_pages(ap->a_m, npages);
                    229:
                    230:                        /*
                    231:                         * restore the array.
                    232:                         */
                    233:
                    234:                        for (i = 0; i < npages; i++) {
                    235:                                pg = ap->a_m[i];
                    236:
1.41      uebayasi  237:                                if (pg != NULL && pg != PGO_DONTCARE) {
1.1       pooka     238:                                        ap->a_m[i] = NULL;
                    239:                                }
1.46      uebayasi  240:                                KASSERT(ap->a_m[i] == NULL ||
                    241:                                    ap->a_m[i] == PGO_DONTCARE);
1.1       pooka     242:                        }
                    243:                } else {
1.23      uebayasi  244:                        genfs_node_unlock(vp);
1.1       pooka     245:                }
                    246:                error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1.38      chs       247:                if (error == 0 && memwrite) {
                    248:                        genfs_markdirty(vp);
                    249:                }
1.1       pooka     250:                goto out_err;
                    251:        }
1.49      rmind     252:        mutex_exit(uobj->vmobjlock);
1.1       pooka     253:
                    254:        /*
                    255:         * find the requested pages and make some simple checks.
                    256:         * leave space in the page array for a whole block.
                    257:         */
                    258:
1.27      uebayasi  259:        const int fs_bshift = (vp->v_type != VBLK) ?
                    260:            vp->v_mount->mnt_fs_bshift : DEV_BSHIFT;
                    261:        const int fs_bsize = 1 << fs_bshift;
1.30      uebayasi  262: #define        blk_mask        (fs_bsize - 1)
                    263: #define        trunc_blk(x)    ((x) & ~blk_mask)
                    264: #define        round_blk(x)    (((x) + blk_mask) & ~blk_mask)
1.1       pooka     265:
1.29      uebayasi  266:        const int orignmempages = MIN(orignpages,
1.1       pooka     267:            round_page(memeof - origoffset) >> PAGE_SHIFT);
1.29      uebayasi  268:        npages = orignmempages;
1.30      uebayasi  269:        const off_t startoffset = trunc_blk(origoffset);
                    270:        const off_t endoffset = MIN(
                    271:            round_page(round_blk(origoffset + (npages << PAGE_SHIFT))),
                    272:            round_page(memeof));
1.31      uebayasi  273:        const int ridx = (origoffset - startoffset) >> PAGE_SHIFT;
1.1       pooka     274:
1.33      uebayasi  275:        const int pgs_size = sizeof(struct vm_page *) *
1.1       pooka     276:            ((endoffset - startoffset) >> PAGE_SHIFT);
1.33      uebayasi  277:        struct vm_page **pgs, *pgs_onstack[UBC_MAX_PAGES];
1.31      uebayasi  278:
1.1       pooka     279:        if (pgs_size > sizeof(pgs_onstack)) {
                    280:                pgs = kmem_zalloc(pgs_size, async ? KM_NOSLEEP : KM_SLEEP);
                    281:                if (pgs == NULL) {
                    282:                        pgs = pgs_onstack;
                    283:                        error = ENOMEM;
1.32      uebayasi  284:                        goto out_err;
1.1       pooka     285:                }
                    286:        } else {
1.14      christos  287:                pgs = pgs_onstack;
                    288:                (void)memset(pgs, 0, pgs_size);
1.1       pooka     289:        }
1.14      christos  290:
1.1       pooka     291:        UVMHIST_LOG(ubchist, "ridx %d npages %d startoff %ld endoff %ld",
                    292:            ridx, npages, startoffset, endoffset);
                    293:
1.64      hannken   294:        if (trans_mount == NULL) {
                    295:                trans_mount = vp->v_mount;
1.68.6.1! bouyer    296:                fstrans_start(trans_mount);
1.64      hannken   297:                /*
                    298:                 * check if this vnode is still valid.
                    299:                 */
                    300:                mutex_enter(vp->v_interlock);
                    301:                error = vdead_check(vp, 0);
                    302:                mutex_exit(vp->v_interlock);
                    303:                if (error)
                    304:                        goto out_err_free;
1.42      hannken   305:                /*
                    306:                 * XXX: This assumes that we come here only via
                    307:                 * the mmio path
                    308:                 */
1.64      hannken   309:                if (blockalloc && vp->v_mount->mnt_wapbl) {
                    310:                        error = WAPBL_BEGIN(trans_mount);
                    311:                        if (error)
1.42      hannken   312:                                goto out_err_free;
1.64      hannken   313:                        holds_wapbl = true;
1.42      hannken   314:                }
1.1       pooka     315:        }
                    316:
                    317:        /*
                    318:         * hold g_glock to prevent a race with truncate.
                    319:         *
                    320:         * check if our idea of v_size is still valid.
                    321:         */
                    322:
1.40      chs       323:        KASSERT(!glocked || genfs_node_wrlocked(vp));
                    324:        if (!glocked) {
                    325:                if (blockalloc) {
                    326:                        genfs_node_wrlock(vp);
                    327:                } else {
                    328:                        genfs_node_rdlock(vp);
                    329:                }
1.1       pooka     330:        }
1.49      rmind     331:        mutex_enter(uobj->vmobjlock);
1.1       pooka     332:        if (vp->v_size < origvsize) {
1.40      chs       333:                if (!glocked) {
                    334:                        genfs_node_unlock(vp);
                    335:                }
1.1       pooka     336:                if (pgs != pgs_onstack)
                    337:                        kmem_free(pgs, pgs_size);
                    338:                goto startover;
                    339:        }
                    340:
                    341:        if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx],
1.29      uebayasi  342:            async ? UFP_NOWAIT : UFP_ALL) != orignmempages) {
1.40      chs       343:                if (!glocked) {
                    344:                        genfs_node_unlock(vp);
                    345:                }
1.1       pooka     346:                KASSERT(async != 0);
1.29      uebayasi  347:                genfs_rel_pages(&pgs[ridx], orignmempages);
1.49      rmind     348:                mutex_exit(uobj->vmobjlock);
1.1       pooka     349:                error = EBUSY;
1.33      uebayasi  350:                goto out_err_free;
1.1       pooka     351:        }
                    352:
                    353:        /*
                    354:         * if the pages are already resident, just return them.
                    355:         */
                    356:
                    357:        for (i = 0; i < npages; i++) {
1.31      uebayasi  358:                struct vm_page *pg = pgs[ridx + i];
1.1       pooka     359:
1.31      uebayasi  360:                if ((pg->flags & PG_FAKE) ||
                    361:                    (blockalloc && (pg->flags & PG_RDONLY))) {
1.1       pooka     362:                        break;
                    363:                }
                    364:        }
                    365:        if (i == npages) {
1.40      chs       366:                if (!glocked) {
                    367:                        genfs_node_unlock(vp);
                    368:                }
1.1       pooka     369:                UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
                    370:                npages += ridx;
                    371:                goto out;
                    372:        }
                    373:
                    374:        /*
                    375:         * if PGO_OVERWRITE is set, don't bother reading the pages.
                    376:         */
                    377:
                    378:        if (overwrite) {
1.40      chs       379:                if (!glocked) {
                    380:                        genfs_node_unlock(vp);
                    381:                }
1.1       pooka     382:                UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
                    383:
                    384:                for (i = 0; i < npages; i++) {
1.31      uebayasi  385:                        struct vm_page *pg = pgs[ridx + i];
1.1       pooka     386:
1.31      uebayasi  387:                        pg->flags &= ~(PG_RDONLY|PG_CLEAN);
1.1       pooka     388:                }
                    389:                npages += ridx;
                    390:                goto out;
                    391:        }
                    392:
                    393:        /*
                    394:         * the page wasn't resident and we're not overwriting,
                    395:         * so we're going to have to do some i/o.
                    396:         * find any additional pages needed to cover the expanded range.
                    397:         */
                    398:
                    399:        npages = (endoffset - startoffset) >> PAGE_SHIFT;
1.29      uebayasi  400:        if (startoffset != origoffset || npages != orignmempages) {
1.31      uebayasi  401:                int npgs;
1.1       pooka     402:
                    403:                /*
                    404:                 * we need to avoid deadlocks caused by locking
                    405:                 * additional pages at lower offsets than pages we
                    406:                 * already have locked.  unlock them all and start over.
                    407:                 */
                    408:
1.29      uebayasi  409:                genfs_rel_pages(&pgs[ridx], orignmempages);
1.1       pooka     410:                memset(pgs, 0, pgs_size);
                    411:
                    412:                UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
                    413:                    startoffset, endoffset, 0,0);
                    414:                npgs = npages;
                    415:                if (uvn_findpages(uobj, startoffset, &npgs, pgs,
                    416:                    async ? UFP_NOWAIT : UFP_ALL) != npages) {
1.40      chs       417:                        if (!glocked) {
                    418:                                genfs_node_unlock(vp);
                    419:                        }
1.1       pooka     420:                        KASSERT(async != 0);
                    421:                        genfs_rel_pages(pgs, npages);
1.49      rmind     422:                        mutex_exit(uobj->vmobjlock);
1.1       pooka     423:                        error = EBUSY;
1.33      uebayasi  424:                        goto out_err_free;
1.1       pooka     425:                }
                    426:        }
1.34      uebayasi  427:
1.49      rmind     428:        mutex_exit(uobj->vmobjlock);
1.59      riastrad  429:        error = genfs_getpages_read(vp, pgs, npages, startoffset, diskeof,
                    430:            async, memwrite, blockalloc, glocked);
                    431:        if (!glocked) {
                    432:                genfs_node_unlock(vp);
                    433:        }
1.67      riastrad  434:        if (error == 0 && async)
                    435:                goto out_err_free;
1.59      riastrad  436:        mutex_enter(uobj->vmobjlock);
                    437:
                    438:        /*
                    439:         * we're almost done!  release the pages...
                    440:         * for errors, we free the pages.
                    441:         * otherwise we activate them and mark them as valid and clean.
                    442:         * also, unbusy pages that were not actually requested.
                    443:         */
                    444:
                    445:        if (error) {
                    446:                genfs_rel_pages(pgs, npages);
                    447:                mutex_exit(uobj->vmobjlock);
                    448:                UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
                    449:                goto out_err_free;
                    450:        }
                    451:
                    452: out:
                    453:        UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
                    454:        error = 0;
                    455:        mutex_enter(&uvm_pageqlock);
                    456:        for (i = 0; i < npages; i++) {
                    457:                struct vm_page *pg = pgs[i];
                    458:                if (pg == NULL) {
                    459:                        continue;
                    460:                }
                    461:                UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
                    462:                    pg, pg->flags, 0,0);
                    463:                if (pg->flags & PG_FAKE && !overwrite) {
                    464:                        pg->flags &= ~(PG_FAKE);
                    465:                        pmap_clear_modify(pgs[i]);
                    466:                }
                    467:                KASSERT(!memwrite || !blockalloc || (pg->flags & PG_RDONLY) == 0);
                    468:                if (i < ridx || i >= ridx + orignmempages || async) {
                    469:                        UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
                    470:                            pg, pg->offset,0,0);
                    471:                        if (pg->flags & PG_WANTED) {
                    472:                                wakeup(pg);
                    473:                        }
                    474:                        if (pg->flags & PG_FAKE) {
                    475:                                KASSERT(overwrite);
                    476:                                uvm_pagezero(pg);
                    477:                        }
                    478:                        if (pg->flags & PG_RELEASED) {
                    479:                                uvm_pagefree(pg);
                    480:                                continue;
                    481:                        }
                    482:                        uvm_pageenqueue(pg);
                    483:                        pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
                    484:                        UVM_PAGE_OWN(pg, NULL);
                    485:                }
                    486:        }
                    487:        mutex_exit(&uvm_pageqlock);
                    488:        if (memwrite) {
                    489:                genfs_markdirty(vp);
                    490:        }
                    491:        mutex_exit(uobj->vmobjlock);
                    492:        if (ap->a_m != NULL) {
                    493:                memcpy(ap->a_m, &pgs[ridx],
                    494:                    orignmempages * sizeof(struct vm_page *));
                    495:        }
1.1       pooka     496:
1.59      riastrad  497: out_err_free:
                    498:        if (pgs != NULL && pgs != pgs_onstack)
                    499:                kmem_free(pgs, pgs_size);
                    500: out_err:
1.64      hannken   501:        if (trans_mount != NULL) {
                    502:                if (holds_wapbl)
                    503:                        WAPBL_END(trans_mount);
                    504:                fstrans_done(trans_mount);
1.59      riastrad  505:        }
                    506:        return error;
                    507: }
                    508:
                    509: /*
                    510:  * genfs_getpages_read: Read the pages in with VOP_BMAP/VOP_STRATEGY.
1.68      dholland  511:  *
                    512:  * "glocked" (which is currently not actually used) tells us not whether
                    513:  * the genfs_node is locked on entry (it always is) but whether it was
                    514:  * locked on entry to genfs_getpages.
1.59      riastrad  515:  */
                    516: static int
                    517: genfs_getpages_read(struct vnode *vp, struct vm_page **pgs, int npages,
                    518:     off_t startoffset, off_t diskeof,
                    519:     bool async, bool memwrite, bool blockalloc, bool glocked)
                    520: {
                    521:        struct uvm_object * const uobj = &vp->v_uobj;
                    522:        const int fs_bshift = (vp->v_type != VBLK) ?
                    523:            vp->v_mount->mnt_fs_bshift : DEV_BSHIFT;
                    524:        const int dev_bshift = (vp->v_type != VBLK) ?
                    525:            vp->v_mount->mnt_dev_bshift : DEV_BSHIFT;
                    526:        kauth_cred_t const cred = curlwp->l_cred;               /* XXXUBC curlwp */
1.34      uebayasi  527:        size_t bytes, iobytes, tailstart, tailbytes, totalbytes, skipbytes;
                    528:        vaddr_t kva;
                    529:        struct buf *bp, *mbp;
                    530:        bool sawhole = false;
1.59      riastrad  531:        int i;
                    532:        int error = 0;
1.34      uebayasi  533:
1.60      skrll     534:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                    535:
1.1       pooka     536:        /*
                    537:         * read the desired page(s).
                    538:         */
                    539:
                    540:        totalbytes = npages << PAGE_SHIFT;
                    541:        bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
                    542:        tailbytes = totalbytes - bytes;
                    543:        skipbytes = 0;
                    544:
                    545:        kva = uvm_pagermapin(pgs, npages,
1.55      yamt      546:            UVMPAGER_MAPIN_READ | (async ? 0 : UVMPAGER_MAPIN_WAITOK));
1.59      riastrad  547:        if (kva == 0)
                    548:                return EBUSY;
1.1       pooka     549:
1.2       ad        550:        mbp = getiobuf(vp, true);
1.1       pooka     551:        mbp->b_bufsize = totalbytes;
                    552:        mbp->b_data = (void *)kva;
                    553:        mbp->b_resid = mbp->b_bcount = bytes;
1.2       ad        554:        mbp->b_cflags = BC_BUSY;
                    555:        if (async) {
                    556:                mbp->b_flags = B_READ | B_ASYNC;
                    557:                mbp->b_iodone = uvm_aio_biodone;
                    558:        } else {
                    559:                mbp->b_flags = B_READ;
                    560:                mbp->b_iodone = NULL;
1.43      uebayasi  561:        }
1.1       pooka     562:        if (async)
                    563:                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
                    564:        else
                    565:                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
                    566:
                    567:        /*
                    568:         * if EOF is in the middle of the range, zero the part past EOF.
                    569:         * skip over pages which are not PG_FAKE since in that case they have
                    570:         * valid data that we need to preserve.
                    571:         */
                    572:
                    573:        tailstart = bytes;
                    574:        while (tailbytes > 0) {
                    575:                const int len = PAGE_SIZE - (tailstart & PAGE_MASK);
                    576:
                    577:                KASSERT(len <= tailbytes);
                    578:                if ((pgs[tailstart >> PAGE_SHIFT]->flags & PG_FAKE) != 0) {
                    579:                        memset((void *)(kva + tailstart), 0, len);
                    580:                        UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
                    581:                            kva, tailstart, len, 0);
                    582:                }
                    583:                tailstart += len;
                    584:                tailbytes -= len;
                    585:        }
                    586:
                    587:        /*
                    588:         * now loop over the pages, reading as needed.
                    589:         */
                    590:
                    591:        bp = NULL;
1.28      uebayasi  592:        off_t offset;
                    593:        for (offset = startoffset;
1.1       pooka     594:            bytes > 0;
                    595:            offset += iobytes, bytes -= iobytes) {
1.30      uebayasi  596:                int run;
1.25      uebayasi  597:                daddr_t lbn, blkno;
1.24      uebayasi  598:                int pidx;
1.26      uebayasi  599:                struct vnode *devvp;
1.1       pooka     600:
                    601:                /*
                    602:                 * skip pages which don't need to be read.
                    603:                 */
                    604:
                    605:                pidx = (offset - startoffset) >> PAGE_SHIFT;
                    606:                while ((pgs[pidx]->flags & PG_FAKE) == 0) {
                    607:                        size_t b;
                    608:
                    609:                        KASSERT((offset & (PAGE_SIZE - 1)) == 0);
                    610:                        if ((pgs[pidx]->flags & PG_RDONLY)) {
                    611:                                sawhole = true;
                    612:                        }
                    613:                        b = MIN(PAGE_SIZE, bytes);
                    614:                        offset += b;
                    615:                        bytes -= b;
                    616:                        skipbytes += b;
                    617:                        pidx++;
                    618:                        UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
                    619:                            offset, 0,0,0);
                    620:                        if (bytes == 0) {
                    621:                                goto loopdone;
                    622:                        }
                    623:                }
                    624:
                    625:                /*
                    626:                 * bmap the file to find out the blkno to read from and
                    627:                 * how much we can read in one i/o.  if bmap returns an error,
                    628:                 * skip the rest of the top-level i/o.
                    629:                 */
                    630:
                    631:                lbn = offset >> fs_bshift;
                    632:                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
                    633:                if (error) {
                    634:                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
1.36      uebayasi  635:                            lbn,error,0,0);
1.1       pooka     636:                        skipbytes += bytes;
1.36      uebayasi  637:                        bytes = 0;
1.1       pooka     638:                        goto loopdone;
                    639:                }
                    640:
                    641:                /*
                    642:                 * see how many pages can be read with this i/o.
                    643:                 * reduce the i/o size if necessary to avoid
                    644:                 * overwriting pages with valid data.
                    645:                 */
                    646:
                    647:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                    648:                    bytes);
                    649:                if (offset + iobytes > round_page(offset)) {
1.24      uebayasi  650:                        int pcount;
                    651:
1.1       pooka     652:                        pcount = 1;
                    653:                        while (pidx + pcount < npages &&
                    654:                            pgs[pidx + pcount]->flags & PG_FAKE) {
                    655:                                pcount++;
                    656:                        }
                    657:                        iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
                    658:                            (offset - trunc_page(offset)));
                    659:                }
                    660:
                    661:                /*
                    662:                 * if this block isn't allocated, zero it instead of
                    663:                 * reading it.  unless we are going to allocate blocks,
                    664:                 * mark the pages we zeroed PG_RDONLY.
                    665:                 */
                    666:
1.36      uebayasi  667:                if (blkno == (daddr_t)-1) {
1.1       pooka     668:                        int holepages = (round_page(offset + iobytes) -
                    669:                            trunc_page(offset)) >> PAGE_SHIFT;
                    670:                        UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
                    671:
                    672:                        sawhole = true;
                    673:                        memset((char *)kva + (offset - startoffset), 0,
                    674:                            iobytes);
                    675:                        skipbytes += iobytes;
                    676:
1.49      rmind     677:                        mutex_enter(uobj->vmobjlock);
1.1       pooka     678:                        for (i = 0; i < holepages; i++) {
1.35      uebayasi  679:                                if (memwrite) {
1.1       pooka     680:                                        pgs[pidx + i]->flags &= ~PG_CLEAN;
                    681:                                }
                    682:                                if (!blockalloc) {
                    683:                                        pgs[pidx + i]->flags |= PG_RDONLY;
                    684:                                }
                    685:                        }
1.49      rmind     686:                        mutex_exit(uobj->vmobjlock);
1.1       pooka     687:                        continue;
                    688:                }
                    689:
                    690:                /*
                    691:                 * allocate a sub-buf for this piece of the i/o
                    692:                 * (or just use mbp if there's only 1 piece),
                    693:                 * and start it going.
                    694:                 */
                    695:
                    696:                if (offset == startoffset && iobytes == bytes) {
                    697:                        bp = mbp;
                    698:                } else {
1.36      uebayasi  699:                        UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
                    700:                            vp, bp, vp->v_numoutput, 0);
1.2       ad        701:                        bp = getiobuf(vp, true);
1.1       pooka     702:                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
                    703:                }
                    704:                bp->b_lblkno = 0;
                    705:
                    706:                /* adjust physical blkno for partial blocks */
                    707:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
                    708:                    dev_bshift);
                    709:
                    710:                UVMHIST_LOG(ubchist,
                    711:                    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
1.36      uebayasi  712:                    bp, offset, bp->b_bcount, bp->b_blkno);
1.1       pooka     713:
                    714:                VOP_STRATEGY(devvp, bp);
                    715:        }
                    716:
                    717: loopdone:
                    718:        nestiobuf_done(mbp, skipbytes, error);
                    719:        if (async) {
                    720:                UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
1.59      riastrad  721:                return 0;
1.1       pooka     722:        }
                    723:        if (bp != NULL) {
                    724:                error = biowait(mbp);
                    725:        }
                    726:
1.19      rmind     727:        /* Remove the mapping (make KVA available as soon as possible) */
                    728:        uvm_pagermapout(kva, npages);
                    729:
1.1       pooka     730:        /*
                    731:         * if this we encountered a hole then we have to do a little more work.
                    732:         * for read faults, we marked the page PG_RDONLY so that future
                    733:         * write accesses to the page will fault again.
                    734:         * for write faults, we must make sure that the backing store for
                    735:         * the page is completely allocated while the pages are locked.
                    736:         */
                    737:
                    738:        if (!error && sawhole && blockalloc) {
1.42      hannken   739:                error = GOP_ALLOC(vp, startoffset,
                    740:                    npages << PAGE_SHIFT, 0, cred);
1.1       pooka     741:                UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
                    742:                    startoffset, npages << PAGE_SHIFT, error,0);
                    743:                if (!error) {
1.49      rmind     744:                        mutex_enter(uobj->vmobjlock);
1.1       pooka     745:                        for (i = 0; i < npages; i++) {
1.31      uebayasi  746:                                struct vm_page *pg = pgs[i];
                    747:
                    748:                                if (pg == NULL) {
1.1       pooka     749:                                        continue;
                    750:                                }
1.31      uebayasi  751:                                pg->flags &= ~(PG_CLEAN|PG_RDONLY);
1.1       pooka     752:                                UVMHIST_LOG(ubchist, "mark dirty pg %p",
1.31      uebayasi  753:                                    pg,0,0,0);
1.1       pooka     754:                        }
1.49      rmind     755:                        mutex_exit(uobj->vmobjlock);
1.1       pooka     756:                }
                    757:        }
1.18      rmind     758:
                    759:        putiobuf(mbp);
1.38      chs       760:        return error;
1.1       pooka     761: }
                    762:
                    763: /*
                    764:  * generic VM putpages routine.
                    765:  * Write the given range of pages to backing store.
                    766:  *
                    767:  * => "offhi == 0" means flush all pages at or after "offlo".
                    768:  * => object should be locked by caller.  we return with the
                    769:  *      object unlocked.
                    770:  * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
                    771:  *     thus, a caller might want to unlock higher level resources
                    772:  *     (e.g. vm_map) before calling flush.
                    773:  * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, we will not block
                    774:  * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
                    775:  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
                    776:  *     that new pages are inserted on the tail end of the list.   thus,
                    777:  *     we can make a complete pass through the object in one go by starting
                    778:  *     at the head and working towards the tail (new pages are put in
                    779:  *     front of us).
                    780:  * => NOTE: we are allowed to lock the page queues, so the caller
                    781:  *     must not be holding the page queue lock.
                    782:  *
                    783:  * note on "cleaning" object and PG_BUSY pages:
                    784:  *     this routine is holding the lock on the object.   the only time
                    785:  *     that it can run into a PG_BUSY page that it does not own is if
                    786:  *     some other process has started I/O on the page (e.g. either
                    787:  *     a pagein, or a pageout).    if the PG_BUSY page is being paged
                    788:  *     in, then it can not be dirty (!PG_CLEAN) because no one has
                    789:  *     had a chance to modify it yet.    if the PG_BUSY page is being
                    790:  *     paged out then it means that someone else has already started
                    791:  *     cleaning the page for us (how nice!).    in this case, if we
                    792:  *     have syncio specified, then after we make our pass through the
                    793:  *     object we need to wait for the other PG_BUSY pages to clear
                    794:  *     off (i.e. we need to do an iosync).   also note that once a
                    795:  *     page is PG_BUSY it must stay in its object until it is un-busyed.
                    796:  *
                    797:  * note on page traversal:
                    798:  *     we can traverse the pages in an object either by going down the
                    799:  *     linked list in "uobj->memq", or we can go over the address range
                    800:  *     by page doing hash table lookups for each address.    depending
                    801:  *     on how many pages are in the object it may be cheaper to do one
                    802:  *     or the other.   we set "by_list" to true if we are using memq.
                    803:  *     if the cost of a hash lookup was equal to the cost of the list
                    804:  *     traversal we could compare the number of pages in the start->stop
                    805:  *     range to the total number of pages in the object.   however, it
                    806:  *     seems that a hash table lookup is more expensive than the linked
                    807:  *     list traversal, so we multiply the number of pages in the
                    808:  *     range by an estimate of the relatively higher cost of the hash lookup.
                    809:  */
                    810:
                    811: int
                    812: genfs_putpages(void *v)
                    813: {
                    814:        struct vop_putpages_args /* {
                    815:                struct vnode *a_vp;
                    816:                voff_t a_offlo;
                    817:                voff_t a_offhi;
                    818:                int a_flags;
1.22      uebayasi  819:        } */ * const ap = v;
1.1       pooka     820:
                    821:        return genfs_do_putpages(ap->a_vp, ap->a_offlo, ap->a_offhi,
                    822:            ap->a_flags, NULL);
                    823: }
                    824:
                    825: int
1.4       yamt      826: genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff,
                    827:     int origflags, struct vm_page **busypg)
1.1       pooka     828: {
1.22      uebayasi  829:        struct uvm_object * const uobj = &vp->v_uobj;
1.49      rmind     830:        kmutex_t * const slock = uobj->vmobjlock;
1.1       pooka     831:        off_t off;
1.2       ad        832:        int i, error, npages, nback;
1.1       pooka     833:        int freeflag;
1.63      christos  834:        /*
                    835:         * This array is larger than it should so that it's size is constant.
                    836:         * The right size is MAXPAGES.
                    837:         */
                    838:        struct vm_page *pgs[MAXPHYS / MIN_PAGE_SIZE];
                    839: #define MAXPAGES (MAXPHYS / PAGE_SIZE)
                    840:        struct vm_page *pg, *nextpg, *tpg, curmp, endmp;
1.1       pooka     841:        bool wasclean, by_list, needs_clean, yld;
1.4       yamt      842:        bool async = (origflags & PGO_SYNCIO) == 0;
1.1       pooka     843:        bool pagedaemon = curlwp == uvm.pagedaemon_lwp;
1.22      uebayasi  844:        struct lwp * const l = curlwp ? curlwp : &lwp0;
                    845:        struct genfs_node * const gp = VTOG(vp);
1.65      hannken   846:        struct mount *trans_mp;
1.4       yamt      847:        int flags;
1.1       pooka     848:        int dirtygen;
1.4       yamt      849:        bool modified;
1.65      hannken   850:        bool holds_wapbl;
1.1       pooka     851:        bool cleanall;
1.4       yamt      852:        bool onworklst;
1.1       pooka     853:
                    854:        UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
                    855:
1.4       yamt      856:        KASSERT(origflags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1.1       pooka     857:        KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
                    858:        KASSERT(startoff < endoff || endoff == 0);
                    859:
                    860:        UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
                    861:            vp, uobj->uo_npages, startoff, endoff - startoff);
                    862:
1.65      hannken   863:        trans_mp = NULL;
                    864:        holds_wapbl = false;
1.6       hannken   865:
1.4       yamt      866: retry:
                    867:        modified = false;
                    868:        flags = origflags;
1.1       pooka     869:        KASSERT((vp->v_iflag & VI_ONWORKLST) != 0 ||
                    870:            (vp->v_iflag & VI_WRMAPDIRTY) == 0);
                    871:        if (uobj->uo_npages == 0) {
                    872:                if (vp->v_iflag & VI_ONWORKLST) {
                    873:                        vp->v_iflag &= ~VI_WRMAPDIRTY;
                    874:                        if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                    875:                                vn_syncer_remove_from_worklist(vp);
                    876:                }
1.65      hannken   877:                if (trans_mp) {
                    878:                        if (holds_wapbl)
                    879:                                WAPBL_END(trans_mp);
                    880:                        fstrans_done(trans_mp);
1.12      hannken   881:                }
1.2       ad        882:                mutex_exit(slock);
1.1       pooka     883:                return (0);
                    884:        }
                    885:
                    886:        /*
                    887:         * the vnode has pages, set up to process the request.
                    888:         */
                    889:
1.65      hannken   890:        if (trans_mp == NULL && (flags & PGO_CLEANIT) != 0) {
1.1       pooka     891:                if (pagedaemon) {
1.65      hannken   892:                        /* Pagedaemon must not sleep here. */
                    893:                        trans_mp = vp->v_mount;
1.68.6.1! bouyer    894:                        error = fstrans_start_nowait(trans_mp);
1.12      hannken   895:                        if (error) {
1.65      hannken   896:                                mutex_exit(slock);
1.12      hannken   897:                                return error;
                    898:                        }
1.65      hannken   899:                } else {
                    900:                        /*
                    901:                         * Cannot use vdeadcheck() here as this operation
                    902:                         * usually gets used from VOP_RECLAIM().  Test for
                    903:                         * change of v_mount instead and retry on change.
                    904:                         */
                    905:                        mutex_exit(slock);
                    906:                        trans_mp = vp->v_mount;
1.68.6.1! bouyer    907:                        fstrans_start(trans_mp);
1.65      hannken   908:                        if (vp->v_mount != trans_mp) {
                    909:                                fstrans_done(trans_mp);
                    910:                                trans_mp = NULL;
                    911:                        } else {
                    912:                                holds_wapbl = (trans_mp->mnt_wapbl &&
                    913:                                    (origflags & PGO_JOURNALLOCKED) == 0);
                    914:                                if (holds_wapbl) {
                    915:                                        error = WAPBL_BEGIN(trans_mp);
                    916:                                        if (error) {
                    917:                                                fstrans_done(trans_mp);
                    918:                                                return error;
                    919:                                        }
                    920:                                }
                    921:                        }
                    922:                        mutex_enter(slock);
                    923:                        goto retry;
1.12      hannken   924:                }
1.1       pooka     925:        }
                    926:
                    927:        error = 0;
                    928:        wasclean = (vp->v_numoutput == 0);
                    929:        off = startoff;
                    930:        if (endoff == 0 || flags & PGO_ALLPAGES) {
                    931:                endoff = trunc_page(LLONG_MAX);
                    932:        }
                    933:        by_list = (uobj->uo_npages <=
1.17      yamt      934:            ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_TREE_PENALTY);
1.1       pooka     935:
                    936:        /*
                    937:         * if this vnode is known not to have dirty pages,
                    938:         * don't bother to clean it out.
                    939:         */
                    940:
                    941:        if ((vp->v_iflag & VI_ONWORKLST) == 0) {
1.48      matt      942: #if !defined(DEBUG)
1.1       pooka     943:                if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) {
                    944:                        goto skip_scan;
                    945:                }
1.48      matt      946: #endif /* !defined(DEBUG) */
1.1       pooka     947:                flags &= ~PGO_CLEANIT;
                    948:        }
                    949:
                    950:        /*
                    951:         * start the loop.  when scanning by list, hold the last page
                    952:         * in the list before we start.  pages allocated after we start
                    953:         * will be added to the end of the list, so we can stop at the
                    954:         * current last page.
                    955:         */
                    956:
                    957:        cleanall = (flags & PGO_CLEANIT) != 0 && wasclean &&
                    958:            startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
                    959:            (vp->v_iflag & VI_ONWORKLST) != 0;
                    960:        dirtygen = gp->g_dirtygen;
                    961:        freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED;
                    962:        if (by_list) {
1.37      hannken   963:                curmp.flags = PG_MARKER;
                    964:                endmp.flags = PG_MARKER;
1.1       pooka     965:                pg = TAILQ_FIRST(&uobj->memq);
1.8       ad        966:                TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq.queue);
1.1       pooka     967:        } else {
                    968:                pg = uvm_pagelookup(uobj, off);
                    969:        }
                    970:        nextpg = NULL;
                    971:        while (by_list || off < endoff) {
                    972:
                    973:                /*
                    974:                 * if the current page is not interesting, move on to the next.
                    975:                 */
                    976:
1.37      hannken   977:                KASSERT(pg == NULL || pg->uobject == uobj ||
                    978:                    (pg->flags & PG_MARKER) != 0);
1.1       pooka     979:                KASSERT(pg == NULL ||
                    980:                    (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1.37      hannken   981:                    (pg->flags & (PG_BUSY|PG_MARKER)) != 0);
1.1       pooka     982:                if (by_list) {
                    983:                        if (pg == &endmp) {
                    984:                                break;
                    985:                        }
1.37      hannken   986:                        if (pg->flags & PG_MARKER) {
                    987:                                pg = TAILQ_NEXT(pg, listq.queue);
                    988:                                continue;
                    989:                        }
1.1       pooka     990:                        if (pg->offset < startoff || pg->offset >= endoff ||
                    991:                            pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
                    992:                                if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
                    993:                                        wasclean = false;
                    994:                                }
1.8       ad        995:                                pg = TAILQ_NEXT(pg, listq.queue);
1.1       pooka     996:                                continue;
                    997:                        }
                    998:                        off = pg->offset;
                    999:                } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
                   1000:                        if (pg != NULL) {
                   1001:                                wasclean = false;
                   1002:                        }
                   1003:                        off += PAGE_SIZE;
                   1004:                        if (off < endoff) {
                   1005:                                pg = uvm_pagelookup(uobj, off);
                   1006:                        }
                   1007:                        continue;
                   1008:                }
                   1009:
                   1010:                /*
                   1011:                 * if the current page needs to be cleaned and it's busy,
                   1012:                 * wait for it to become unbusy.
                   1013:                 */
                   1014:
                   1015:                yld = (l->l_cpu->ci_schedstate.spc_flags &
                   1016:                    SPCF_SHOULDYIELD) && !pagedaemon;
                   1017:                if (pg->flags & PG_BUSY || yld) {
                   1018:                        UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
                   1019:                        if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) {
                   1020:                                UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0);
                   1021:                                error = EDEADLK;
                   1022:                                if (busypg != NULL)
                   1023:                                        *busypg = pg;
                   1024:                                break;
                   1025:                        }
                   1026:                        if (pagedaemon) {
                   1027:                                /*
                   1028:                                 * someone has taken the page while we
                   1029:                                 * dropped the lock for fstrans_start.
                   1030:                                 */
                   1031:                                break;
                   1032:                        }
                   1033:                        if (by_list) {
1.8       ad       1034:                                TAILQ_INSERT_BEFORE(pg, &curmp, listq.queue);
1.1       pooka    1035:                                UVMHIST_LOG(ubchist, "curmp next %p",
1.8       ad       1036:                                    TAILQ_NEXT(&curmp, listq.queue), 0,0,0);
1.1       pooka    1037:                        }
                   1038:                        if (yld) {
1.2       ad       1039:                                mutex_exit(slock);
1.1       pooka    1040:                                preempt();
1.2       ad       1041:                                mutex_enter(slock);
1.1       pooka    1042:                        } else {
                   1043:                                pg->flags |= PG_WANTED;
                   1044:                                UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
1.2       ad       1045:                                mutex_enter(slock);
1.1       pooka    1046:                        }
                   1047:                        if (by_list) {
                   1048:                                UVMHIST_LOG(ubchist, "after next %p",
1.8       ad       1049:                                    TAILQ_NEXT(&curmp, listq.queue), 0,0,0);
                   1050:                                pg = TAILQ_NEXT(&curmp, listq.queue);
                   1051:                                TAILQ_REMOVE(&uobj->memq, &curmp, listq.queue);
1.1       pooka    1052:                        } else {
                   1053:                                pg = uvm_pagelookup(uobj, off);
                   1054:                        }
                   1055:                        continue;
                   1056:                }
                   1057:
                   1058:                /*
                   1059:                 * if we're freeing, remove all mappings of the page now.
                   1060:                 * if we're cleaning, check if the page is needs to be cleaned.
                   1061:                 */
                   1062:
                   1063:                if (flags & PGO_FREE) {
                   1064:                        pmap_page_protect(pg, VM_PROT_NONE);
                   1065:                } else if (flags & PGO_CLEANIT) {
                   1066:
                   1067:                        /*
                   1068:                         * if we still have some hope to pull this vnode off
                   1069:                         * from the syncer queue, write-protect the page.
                   1070:                         */
                   1071:
                   1072:                        if (cleanall && wasclean &&
                   1073:                            gp->g_dirtygen == dirtygen) {
                   1074:
                   1075:                                /*
                   1076:                                 * uobj pages get wired only by uvm_fault
                   1077:                                 * where uobj is locked.
                   1078:                                 */
                   1079:
                   1080:                                if (pg->wire_count == 0) {
                   1081:                                        pmap_page_protect(pg,
                   1082:                                            VM_PROT_READ|VM_PROT_EXECUTE);
                   1083:                                } else {
                   1084:                                        cleanall = false;
                   1085:                                }
                   1086:                        }
                   1087:                }
                   1088:
                   1089:                if (flags & PGO_CLEANIT) {
                   1090:                        needs_clean = pmap_clear_modify(pg) ||
                   1091:                            (pg->flags & PG_CLEAN) == 0;
                   1092:                        pg->flags |= PG_CLEAN;
                   1093:                } else {
                   1094:                        needs_clean = false;
                   1095:                }
                   1096:
                   1097:                /*
                   1098:                 * if we're cleaning, build a cluster.
                   1099:                 * the cluster will consist of pages which are currently dirty,
                   1100:                 * but they will be returned to us marked clean.
                   1101:                 * if not cleaning, just operate on the one page.
                   1102:                 */
                   1103:
                   1104:                if (needs_clean) {
                   1105:                        KDASSERT((vp->v_iflag & VI_ONWORKLST));
                   1106:                        wasclean = false;
                   1107:                        memset(pgs, 0, sizeof(pgs));
                   1108:                        pg->flags |= PG_BUSY;
                   1109:                        UVM_PAGE_OWN(pg, "genfs_putpages");
                   1110:
                   1111:                        /*
                   1112:                         * first look backward.
                   1113:                         */
                   1114:
1.62      christos 1115:                        npages = MIN(MAXPAGES >> 1, off >> PAGE_SHIFT);
1.1       pooka    1116:                        nback = npages;
                   1117:                        uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
                   1118:                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
                   1119:                        if (nback) {
                   1120:                                memmove(&pgs[0], &pgs[npages - nback],
                   1121:                                    nback * sizeof(pgs[0]));
                   1122:                                if (npages - nback < nback)
                   1123:                                        memset(&pgs[nback], 0,
                   1124:                                            (npages - nback) * sizeof(pgs[0]));
                   1125:                                else
                   1126:                                        memset(&pgs[npages - nback], 0,
                   1127:                                            nback * sizeof(pgs[0]));
                   1128:                        }
                   1129:
                   1130:                        /*
                   1131:                         * then plug in our page of interest.
                   1132:                         */
                   1133:
                   1134:                        pgs[nback] = pg;
                   1135:
                   1136:                        /*
                   1137:                         * then look forward to fill in the remaining space in
                   1138:                         * the array of pages.
                   1139:                         */
                   1140:
1.62      christos 1141:                        npages = MAXPAGES - nback - 1;
1.1       pooka    1142:                        uvn_findpages(uobj, off + PAGE_SIZE, &npages,
                   1143:                            &pgs[nback + 1],
                   1144:                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
                   1145:                        npages += nback + 1;
                   1146:                } else {
                   1147:                        pgs[0] = pg;
                   1148:                        npages = 1;
                   1149:                        nback = 0;
                   1150:                }
                   1151:
                   1152:                /*
                   1153:                 * apply FREE or DEACTIVATE options if requested.
                   1154:                 */
                   1155:
                   1156:                if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1.2       ad       1157:                        mutex_enter(&uvm_pageqlock);
1.1       pooka    1158:                }
                   1159:                for (i = 0; i < npages; i++) {
                   1160:                        tpg = pgs[i];
                   1161:                        KASSERT(tpg->uobject == uobj);
1.8       ad       1162:                        if (by_list && tpg == TAILQ_NEXT(pg, listq.queue))
1.1       pooka    1163:                                pg = tpg;
                   1164:                        if (tpg->offset < startoff || tpg->offset >= endoff)
                   1165:                                continue;
                   1166:                        if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) {
                   1167:                                uvm_pagedeactivate(tpg);
                   1168:                        } else if (flags & PGO_FREE) {
                   1169:                                pmap_page_protect(tpg, VM_PROT_NONE);
                   1170:                                if (tpg->flags & PG_BUSY) {
                   1171:                                        tpg->flags |= freeflag;
                   1172:                                        if (pagedaemon) {
1.2       ad       1173:                                                uvm_pageout_start(1);
1.1       pooka    1174:                                                uvm_pagedequeue(tpg);
                   1175:                                        }
                   1176:                                } else {
                   1177:
                   1178:                                        /*
                   1179:                                         * ``page is not busy''
                   1180:                                         * implies that npages is 1
                   1181:                                         * and needs_clean is false.
                   1182:                                         */
                   1183:
1.8       ad       1184:                                        nextpg = TAILQ_NEXT(tpg, listq.queue);
1.1       pooka    1185:                                        uvm_pagefree(tpg);
                   1186:                                        if (pagedaemon)
                   1187:                                                uvmexp.pdfreed++;
                   1188:                                }
                   1189:                        }
                   1190:                }
                   1191:                if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1.2       ad       1192:                        mutex_exit(&uvm_pageqlock);
1.1       pooka    1193:                }
                   1194:                if (needs_clean) {
                   1195:                        modified = true;
                   1196:
                   1197:                        /*
                   1198:                         * start the i/o.  if we're traversing by list,
                   1199:                         * keep our place in the list with a marker page.
                   1200:                         */
                   1201:
                   1202:                        if (by_list) {
                   1203:                                TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1.8       ad       1204:                                    listq.queue);
1.1       pooka    1205:                        }
1.2       ad       1206:                        mutex_exit(slock);
1.1       pooka    1207:                        error = GOP_WRITE(vp, pgs, npages, flags);
1.2       ad       1208:                        mutex_enter(slock);
1.1       pooka    1209:                        if (by_list) {
1.8       ad       1210:                                pg = TAILQ_NEXT(&curmp, listq.queue);
                   1211:                                TAILQ_REMOVE(&uobj->memq, &curmp, listq.queue);
1.1       pooka    1212:                        }
                   1213:                        if (error) {
                   1214:                                break;
                   1215:                        }
                   1216:                        if (by_list) {
                   1217:                                continue;
                   1218:                        }
                   1219:                }
                   1220:
                   1221:                /*
                   1222:                 * find the next page and continue if there was no error.
                   1223:                 */
                   1224:
                   1225:                if (by_list) {
                   1226:                        if (nextpg) {
                   1227:                                pg = nextpg;
                   1228:                                nextpg = NULL;
                   1229:                        } else {
1.8       ad       1230:                                pg = TAILQ_NEXT(pg, listq.queue);
1.1       pooka    1231:                        }
                   1232:                } else {
                   1233:                        off += (npages - nback) << PAGE_SHIFT;
                   1234:                        if (off < endoff) {
                   1235:                                pg = uvm_pagelookup(uobj, off);
                   1236:                        }
                   1237:                }
                   1238:        }
                   1239:        if (by_list) {
1.8       ad       1240:                TAILQ_REMOVE(&uobj->memq, &endmp, listq.queue);
1.1       pooka    1241:        }
                   1242:
                   1243:        if (modified && (vp->v_iflag & VI_WRMAPDIRTY) != 0 &&
                   1244:            (vp->v_type != VBLK ||
                   1245:            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                   1246:                GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED);
                   1247:        }
                   1248:
                   1249:        /*
                   1250:         * if we're cleaning and there was nothing to clean,
                   1251:         * take us off the syncer list.  if we started any i/o
                   1252:         * and we're doing sync i/o, wait for all writes to finish.
                   1253:         */
                   1254:
                   1255:        if (cleanall && wasclean && gp->g_dirtygen == dirtygen &&
                   1256:            (vp->v_iflag & VI_ONWORKLST) != 0) {
1.5       yamt     1257: #if defined(DEBUG)
1.8       ad       1258:                TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
1.45      hannken  1259:                        if ((pg->flags & (PG_FAKE | PG_MARKER)) != 0) {
1.37      hannken  1260:                                continue;
                   1261:                        }
1.5       yamt     1262:                        if ((pg->flags & PG_CLEAN) == 0) {
                   1263:                                printf("%s: %p: !CLEAN\n", __func__, pg);
                   1264:                        }
                   1265:                        if (pmap_is_modified(pg)) {
                   1266:                                printf("%s: %p: modified\n", __func__, pg);
                   1267:                        }
                   1268:                }
                   1269: #endif /* defined(DEBUG) */
1.1       pooka    1270:                vp->v_iflag &= ~VI_WRMAPDIRTY;
                   1271:                if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                   1272:                        vn_syncer_remove_from_worklist(vp);
                   1273:        }
                   1274:
                   1275: #if !defined(DEBUG)
                   1276: skip_scan:
                   1277: #endif /* !defined(DEBUG) */
1.2       ad       1278:
                   1279:        /* Wait for output to complete. */
                   1280:        if (!wasclean && !async && vp->v_numoutput != 0) {
                   1281:                while (vp->v_numoutput != 0)
                   1282:                        cv_wait(&vp->v_cv, slock);
1.1       pooka    1283:        }
1.4       yamt     1284:        onworklst = (vp->v_iflag & VI_ONWORKLST) != 0;
1.2       ad       1285:        mutex_exit(slock);
1.1       pooka    1286:
1.4       yamt     1287:        if ((flags & PGO_RECLAIM) != 0 && onworklst) {
                   1288:                /*
                   1289:                 * in the case of PGO_RECLAIM, ensure to make the vnode clean.
                   1290:                 * retrying is not a big deal because, in many cases,
                   1291:                 * uobj->uo_npages is already 0 here.
                   1292:                 */
                   1293:                mutex_enter(slock);
                   1294:                goto retry;
                   1295:        }
                   1296:
1.65      hannken  1297:        if (trans_mp) {
                   1298:                if (holds_wapbl)
                   1299:                        WAPBL_END(trans_mp);
                   1300:                fstrans_done(trans_mp);
1.12      hannken  1301:        }
1.6       hannken  1302:
1.1       pooka    1303:        return (error);
                   1304: }
                   1305:
                   1306: int
                   1307: genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
                   1308: {
                   1309:        off_t off;
                   1310:        vaddr_t kva;
                   1311:        size_t len;
                   1312:        int error;
                   1313:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                   1314:
                   1315:        UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
                   1316:            vp, pgs, npages, flags);
                   1317:
                   1318:        off = pgs[0]->offset;
                   1319:        kva = uvm_pagermapin(pgs, npages,
                   1320:            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
                   1321:        len = npages << PAGE_SHIFT;
                   1322:
                   1323:        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                   1324:                            uvm_aio_biodone);
                   1325:
                   1326:        return error;
                   1327: }
                   1328:
1.7       reinoud  1329: int
                   1330: genfs_gop_write_rwmap(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
                   1331: {
                   1332:        off_t off;
                   1333:        vaddr_t kva;
                   1334:        size_t len;
                   1335:        int error;
                   1336:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                   1337:
                   1338:        UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
                   1339:            vp, pgs, npages, flags);
                   1340:
                   1341:        off = pgs[0]->offset;
                   1342:        kva = uvm_pagermapin(pgs, npages,
                   1343:            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
                   1344:        len = npages << PAGE_SHIFT;
                   1345:
                   1346:        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                   1347:                            uvm_aio_biodone);
                   1348:
                   1349:        return error;
                   1350: }
                   1351:
1.1       pooka    1352: /*
                   1353:  * Backend routine for doing I/O to vnode pages.  Pages are already locked
                   1354:  * and mapped into kernel memory.  Here we just look up the underlying
                   1355:  * device block addresses and call the strategy routine.
                   1356:  */
                   1357:
                   1358: static int
                   1359: genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
                   1360:     enum uio_rw rw, void (*iodone)(struct buf *))
                   1361: {
1.36      uebayasi 1362:        int s, error;
1.1       pooka    1363:        int fs_bshift, dev_bshift;
                   1364:        off_t eof, offset, startoffset;
                   1365:        size_t bytes, iobytes, skipbytes;
                   1366:        struct buf *mbp, *bp;
1.35      uebayasi 1367:        const bool async = (flags & PGO_SYNCIO) == 0;
1.54      chs      1368:        const bool lazy = (flags & PGO_LAZY) == 0;
1.35      uebayasi 1369:        const bool iowrite = rw == UIO_WRITE;
                   1370:        const int brw = iowrite ? B_WRITE : B_READ;
1.1       pooka    1371:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                   1372:
                   1373:        UVMHIST_LOG(ubchist, "vp %p kva %p len 0x%x flags 0x%x",
                   1374:            vp, kva, len, flags);
                   1375:
                   1376:        KASSERT(vp->v_size <= vp->v_writesize);
                   1377:        GOP_SIZE(vp, vp->v_writesize, &eof, 0);
                   1378:        if (vp->v_type != VBLK) {
                   1379:                fs_bshift = vp->v_mount->mnt_fs_bshift;
                   1380:                dev_bshift = vp->v_mount->mnt_dev_bshift;
                   1381:        } else {
                   1382:                fs_bshift = DEV_BSHIFT;
                   1383:                dev_bshift = DEV_BSHIFT;
                   1384:        }
                   1385:        error = 0;
                   1386:        startoffset = off;
                   1387:        bytes = MIN(len, eof - startoffset);
                   1388:        skipbytes = 0;
                   1389:        KASSERT(bytes != 0);
                   1390:
1.35      uebayasi 1391:        if (iowrite) {
1.49      rmind    1392:                mutex_enter(vp->v_interlock);
1.1       pooka    1393:                vp->v_numoutput += 2;
1.49      rmind    1394:                mutex_exit(vp->v_interlock);
1.1       pooka    1395:        }
1.2       ad       1396:        mbp = getiobuf(vp, true);
1.1       pooka    1397:        UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
                   1398:            vp, mbp, vp->v_numoutput, bytes);
                   1399:        mbp->b_bufsize = len;
                   1400:        mbp->b_data = (void *)kva;
                   1401:        mbp->b_resid = mbp->b_bcount = bytes;
1.2       ad       1402:        mbp->b_cflags = BC_BUSY | BC_AGE;
                   1403:        if (async) {
                   1404:                mbp->b_flags = brw | B_ASYNC;
                   1405:                mbp->b_iodone = iodone;
                   1406:        } else {
                   1407:                mbp->b_flags = brw;
                   1408:                mbp->b_iodone = NULL;
                   1409:        }
1.1       pooka    1410:        if (curlwp == uvm.pagedaemon_lwp)
                   1411:                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
1.54      chs      1412:        else if (async || lazy)
1.1       pooka    1413:                BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
                   1414:        else
                   1415:                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
                   1416:
                   1417:        bp = NULL;
                   1418:        for (offset = startoffset;
                   1419:            bytes > 0;
                   1420:            offset += iobytes, bytes -= iobytes) {
1.36      uebayasi 1421:                int run;
                   1422:                daddr_t lbn, blkno;
                   1423:                struct vnode *devvp;
                   1424:
                   1425:                /*
                   1426:                 * bmap the file to find out the blkno to read from and
                   1427:                 * how much we can read in one i/o.  if bmap returns an error,
                   1428:                 * skip the rest of the top-level i/o.
                   1429:                 */
                   1430:
1.1       pooka    1431:                lbn = offset >> fs_bshift;
                   1432:                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
                   1433:                if (error) {
1.36      uebayasi 1434:                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
                   1435:                            lbn,error,0,0);
1.1       pooka    1436:                        skipbytes += bytes;
                   1437:                        bytes = 0;
1.36      uebayasi 1438:                        goto loopdone;
1.1       pooka    1439:                }
                   1440:
1.36      uebayasi 1441:                /*
                   1442:                 * see how many pages can be read with this i/o.
                   1443:                 * reduce the i/o size if necessary to avoid
                   1444:                 * overwriting pages with valid data.
                   1445:                 */
                   1446:
1.1       pooka    1447:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                   1448:                    bytes);
1.36      uebayasi 1449:
                   1450:                /*
                   1451:                 * if this block isn't allocated, zero it instead of
                   1452:                 * reading it.  unless we are going to allocate blocks,
                   1453:                 * mark the pages we zeroed PG_RDONLY.
                   1454:                 */
                   1455:
1.1       pooka    1456:                if (blkno == (daddr_t)-1) {
1.35      uebayasi 1457:                        if (!iowrite) {
1.1       pooka    1458:                                memset((char *)kva + (offset - startoffset), 0,
1.36      uebayasi 1459:                                    iobytes);
1.1       pooka    1460:                        }
                   1461:                        skipbytes += iobytes;
                   1462:                        continue;
                   1463:                }
                   1464:
1.36      uebayasi 1465:                /*
                   1466:                 * allocate a sub-buf for this piece of the i/o
                   1467:                 * (or just use mbp if there's only 1 piece),
                   1468:                 * and start it going.
                   1469:                 */
                   1470:
1.1       pooka    1471:                if (offset == startoffset && iobytes == bytes) {
                   1472:                        bp = mbp;
                   1473:                } else {
                   1474:                        UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
                   1475:                            vp, bp, vp->v_numoutput, 0);
1.2       ad       1476:                        bp = getiobuf(vp, true);
1.1       pooka    1477:                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
                   1478:                }
                   1479:                bp->b_lblkno = 0;
                   1480:
                   1481:                /* adjust physical blkno for partial blocks */
                   1482:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
                   1483:                    dev_bshift);
1.36      uebayasi 1484:
1.1       pooka    1485:                UVMHIST_LOG(ubchist,
1.36      uebayasi 1486:                    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
                   1487:                    bp, offset, bp->b_bcount, bp->b_blkno);
1.1       pooka    1488:
                   1489:                VOP_STRATEGY(devvp, bp);
                   1490:        }
1.36      uebayasi 1491:
                   1492: loopdone:
1.1       pooka    1493:        if (skipbytes) {
                   1494:                UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
                   1495:        }
                   1496:        nestiobuf_done(mbp, skipbytes, error);
                   1497:        if (async) {
                   1498:                UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
                   1499:                return (0);
                   1500:        }
                   1501:        UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
                   1502:        error = biowait(mbp);
                   1503:        s = splbio();
                   1504:        (*iodone)(mbp);
                   1505:        splx(s);
                   1506:        UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
                   1507:        return (error);
                   1508: }
                   1509:
                   1510: int
                   1511: genfs_compat_getpages(void *v)
                   1512: {
                   1513:        struct vop_getpages_args /* {
                   1514:                struct vnode *a_vp;
                   1515:                voff_t a_offset;
                   1516:                struct vm_page **a_m;
                   1517:                int *a_count;
                   1518:                int a_centeridx;
                   1519:                vm_prot_t a_access_type;
                   1520:                int a_advice;
                   1521:                int a_flags;
                   1522:        } */ *ap = v;
                   1523:
                   1524:        off_t origoffset;
                   1525:        struct vnode *vp = ap->a_vp;
                   1526:        struct uvm_object *uobj = &vp->v_uobj;
                   1527:        struct vm_page *pg, **pgs;
                   1528:        vaddr_t kva;
                   1529:        int i, error, orignpages, npages;
                   1530:        struct iovec iov;
                   1531:        struct uio uio;
                   1532:        kauth_cred_t cred = curlwp->l_cred;
1.35      uebayasi 1533:        const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.1       pooka    1534:
                   1535:        error = 0;
                   1536:        origoffset = ap->a_offset;
                   1537:        orignpages = *ap->a_count;
                   1538:        pgs = ap->a_m;
                   1539:
                   1540:        if (ap->a_flags & PGO_LOCKED) {
                   1541:                uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1.35      uebayasi 1542:                    UFP_NOWAIT|UFP_NOALLOC| (memwrite ? UFP_NORDONLY : 0));
1.1       pooka    1543:
1.38      chs      1544:                error = ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
                   1545:                if (error == 0 && memwrite) {
                   1546:                        genfs_markdirty(vp);
                   1547:                }
                   1548:                return error;
1.1       pooka    1549:        }
                   1550:        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
1.49      rmind    1551:                mutex_exit(uobj->vmobjlock);
1.38      chs      1552:                return EINVAL;
1.1       pooka    1553:        }
                   1554:        if ((ap->a_flags & PGO_SYNCIO) == 0) {
1.49      rmind    1555:                mutex_exit(uobj->vmobjlock);
1.1       pooka    1556:                return 0;
                   1557:        }
                   1558:        npages = orignpages;
                   1559:        uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
1.49      rmind    1560:        mutex_exit(uobj->vmobjlock);
1.1       pooka    1561:        kva = uvm_pagermapin(pgs, npages,
                   1562:            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
                   1563:        for (i = 0; i < npages; i++) {
                   1564:                pg = pgs[i];
                   1565:                if ((pg->flags & PG_FAKE) == 0) {
                   1566:                        continue;
                   1567:                }
                   1568:                iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
                   1569:                iov.iov_len = PAGE_SIZE;
                   1570:                uio.uio_iov = &iov;
                   1571:                uio.uio_iovcnt = 1;
                   1572:                uio.uio_offset = origoffset + (i << PAGE_SHIFT);
                   1573:                uio.uio_rw = UIO_READ;
                   1574:                uio.uio_resid = PAGE_SIZE;
                   1575:                UIO_SETUP_SYSSPACE(&uio);
                   1576:                /* XXX vn_lock */
                   1577:                error = VOP_READ(vp, &uio, 0, cred);
                   1578:                if (error) {
                   1579:                        break;
                   1580:                }
                   1581:                if (uio.uio_resid) {
                   1582:                        memset(iov.iov_base, 0, uio.uio_resid);
                   1583:                }
                   1584:        }
                   1585:        uvm_pagermapout(kva, npages);
1.49      rmind    1586:        mutex_enter(uobj->vmobjlock);
1.2       ad       1587:        mutex_enter(&uvm_pageqlock);
1.1       pooka    1588:        for (i = 0; i < npages; i++) {
                   1589:                pg = pgs[i];
                   1590:                if (error && (pg->flags & PG_FAKE) != 0) {
                   1591:                        pg->flags |= PG_RELEASED;
                   1592:                } else {
                   1593:                        pmap_clear_modify(pg);
                   1594:                        uvm_pageactivate(pg);
                   1595:                }
                   1596:        }
                   1597:        if (error) {
                   1598:                uvm_page_unbusy(pgs, npages);
                   1599:        }
1.2       ad       1600:        mutex_exit(&uvm_pageqlock);
1.38      chs      1601:        if (error == 0 && memwrite) {
                   1602:                genfs_markdirty(vp);
                   1603:        }
1.49      rmind    1604:        mutex_exit(uobj->vmobjlock);
1.38      chs      1605:        return error;
1.1       pooka    1606: }
                   1607:
                   1608: int
                   1609: genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
                   1610:     int flags)
                   1611: {
                   1612:        off_t offset;
                   1613:        struct iovec iov;
                   1614:        struct uio uio;
                   1615:        kauth_cred_t cred = curlwp->l_cred;
                   1616:        struct buf *bp;
                   1617:        vaddr_t kva;
1.2       ad       1618:        int error;
1.1       pooka    1619:
                   1620:        offset = pgs[0]->offset;
                   1621:        kva = uvm_pagermapin(pgs, npages,
                   1622:            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
                   1623:
                   1624:        iov.iov_base = (void *)kva;
                   1625:        iov.iov_len = npages << PAGE_SHIFT;
                   1626:        uio.uio_iov = &iov;
                   1627:        uio.uio_iovcnt = 1;
                   1628:        uio.uio_offset = offset;
                   1629:        uio.uio_rw = UIO_WRITE;
                   1630:        uio.uio_resid = npages << PAGE_SHIFT;
                   1631:        UIO_SETUP_SYSSPACE(&uio);
                   1632:        /* XXX vn_lock */
                   1633:        error = VOP_WRITE(vp, &uio, 0, cred);
                   1634:
1.49      rmind    1635:        mutex_enter(vp->v_interlock);
1.2       ad       1636:        vp->v_numoutput++;
1.49      rmind    1637:        mutex_exit(vp->v_interlock);
1.1       pooka    1638:
1.2       ad       1639:        bp = getiobuf(vp, true);
                   1640:        bp->b_cflags = BC_BUSY | BC_AGE;
1.1       pooka    1641:        bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
                   1642:        bp->b_data = (char *)kva;
                   1643:        bp->b_bcount = npages << PAGE_SHIFT;
                   1644:        bp->b_bufsize = npages << PAGE_SHIFT;
                   1645:        bp->b_resid = 0;
                   1646:        bp->b_error = error;
                   1647:        uvm_aio_aiodone(bp);
                   1648:        return (error);
                   1649: }
                   1650:
                   1651: /*
                   1652:  * Process a uio using direct I/O.  If we reach a part of the request
                   1653:  * which cannot be processed in this fashion for some reason, just return.
                   1654:  * The caller must handle some additional part of the request using
                   1655:  * buffered I/O before trying direct I/O again.
                   1656:  */
                   1657:
                   1658: void
                   1659: genfs_directio(struct vnode *vp, struct uio *uio, int ioflag)
                   1660: {
                   1661:        struct vmspace *vs;
                   1662:        struct iovec *iov;
                   1663:        vaddr_t va;
                   1664:        size_t len;
                   1665:        const int mask = DEV_BSIZE - 1;
                   1666:        int error;
1.16      joerg    1667:        bool need_wapbl = (vp->v_mount && vp->v_mount->mnt_wapbl &&
                   1668:            (ioflag & IO_JOURNALLOCKED) == 0);
1.1       pooka    1669:
                   1670:        /*
                   1671:         * We only support direct I/O to user space for now.
                   1672:         */
                   1673:
                   1674:        if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
                   1675:                return;
                   1676:        }
                   1677:
                   1678:        /*
                   1679:         * If the vnode is mapped, we would need to get the getpages lock
1.53      yamt     1680:         * to stabilize the bmap, but then we would get into trouble while
1.1       pooka    1681:         * locking the pages if the pages belong to this same vnode (or a
                   1682:         * multi-vnode cascade to the same effect).  Just fall back to
                   1683:         * buffered I/O if the vnode is mapped to avoid this mess.
                   1684:         */
                   1685:
                   1686:        if (vp->v_vflag & VV_MAPPED) {
                   1687:                return;
                   1688:        }
                   1689:
1.16      joerg    1690:        if (need_wapbl) {
1.13      hannken  1691:                error = WAPBL_BEGIN(vp->v_mount);
                   1692:                if (error)
                   1693:                        return;
                   1694:        }
                   1695:
1.1       pooka    1696:        /*
                   1697:         * Do as much of the uio as possible with direct I/O.
                   1698:         */
                   1699:
                   1700:        vs = uio->uio_vmspace;
                   1701:        while (uio->uio_resid) {
                   1702:                iov = uio->uio_iov;
                   1703:                if (iov->iov_len == 0) {
                   1704:                        uio->uio_iov++;
                   1705:                        uio->uio_iovcnt--;
                   1706:                        continue;
                   1707:                }
                   1708:                va = (vaddr_t)iov->iov_base;
                   1709:                len = MIN(iov->iov_len, genfs_maxdio);
                   1710:                len &= ~mask;
                   1711:
                   1712:                /*
                   1713:                 * If the next chunk is smaller than DEV_BSIZE or extends past
                   1714:                 * the current EOF, then fall back to buffered I/O.
                   1715:                 */
                   1716:
                   1717:                if (len == 0 || uio->uio_offset + len > vp->v_size) {
1.13      hannken  1718:                        break;
1.1       pooka    1719:                }
                   1720:
                   1721:                /*
                   1722:                 * Check alignment.  The file offset must be at least
                   1723:                 * sector-aligned.  The exact constraint on memory alignment
                   1724:                 * is very hardware-dependent, but requiring sector-aligned
                   1725:                 * addresses there too is safe.
                   1726:                 */
                   1727:
                   1728:                if (uio->uio_offset & mask || va & mask) {
1.13      hannken  1729:                        break;
1.1       pooka    1730:                }
                   1731:                error = genfs_do_directio(vs, va, len, vp, uio->uio_offset,
                   1732:                                          uio->uio_rw);
                   1733:                if (error) {
                   1734:                        break;
                   1735:                }
                   1736:                iov->iov_base = (char *)iov->iov_base + len;
                   1737:                iov->iov_len -= len;
                   1738:                uio->uio_offset += len;
                   1739:                uio->uio_resid -= len;
                   1740:        }
1.13      hannken  1741:
1.16      joerg    1742:        if (need_wapbl)
1.13      hannken  1743:                WAPBL_END(vp->v_mount);
1.1       pooka    1744: }
                   1745:
                   1746: /*
                   1747:  * Iodone routine for direct I/O.  We don't do much here since the request is
                   1748:  * always synchronous, so the caller will do most of the work after biowait().
                   1749:  */
                   1750:
                   1751: static void
                   1752: genfs_dio_iodone(struct buf *bp)
                   1753: {
                   1754:
                   1755:        KASSERT((bp->b_flags & B_ASYNC) == 0);
1.2       ad       1756:        if ((bp->b_flags & B_READ) == 0 && (bp->b_cflags & BC_AGE) != 0) {
                   1757:                mutex_enter(bp->b_objlock);
1.1       pooka    1758:                vwakeup(bp);
1.2       ad       1759:                mutex_exit(bp->b_objlock);
1.1       pooka    1760:        }
                   1761:        putiobuf(bp);
                   1762: }
                   1763:
                   1764: /*
                   1765:  * Process one chunk of a direct I/O request.
                   1766:  */
                   1767:
                   1768: static int
                   1769: genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
                   1770:     off_t off, enum uio_rw rw)
                   1771: {
                   1772:        struct vm_map *map;
1.56      martin   1773:        struct pmap *upm, *kpm __unused;
1.1       pooka    1774:        size_t klen = round_page(uva + len) - trunc_page(uva);
                   1775:        off_t spoff, epoff;
                   1776:        vaddr_t kva, puva;
                   1777:        paddr_t pa;
                   1778:        vm_prot_t prot;
1.58      martin   1779:        int error, rv __diagused, poff, koff;
1.13      hannken  1780:        const int pgoflags = PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED |
1.1       pooka    1781:                (rw == UIO_WRITE ? PGO_FREE : 0);
                   1782:
                   1783:        /*
                   1784:         * For writes, verify that this range of the file already has fully
                   1785:         * allocated backing store.  If there are any holes, just punt and
                   1786:         * make the caller take the buffered write path.
                   1787:         */
                   1788:
                   1789:        if (rw == UIO_WRITE) {
                   1790:                daddr_t lbn, elbn, blkno;
                   1791:                int bsize, bshift, run;
                   1792:
                   1793:                bshift = vp->v_mount->mnt_fs_bshift;
                   1794:                bsize = 1 << bshift;
                   1795:                lbn = off >> bshift;
                   1796:                elbn = (off + len + bsize - 1) >> bshift;
                   1797:                while (lbn < elbn) {
                   1798:                        error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
                   1799:                        if (error) {
                   1800:                                return error;
                   1801:                        }
                   1802:                        if (blkno == (daddr_t)-1) {
                   1803:                                return ENOSPC;
                   1804:                        }
                   1805:                        lbn += 1 + run;
                   1806:                }
                   1807:        }
                   1808:
                   1809:        /*
                   1810:         * Flush any cached pages for parts of the file that we're about to
                   1811:         * access.  If we're writing, invalidate pages as well.
                   1812:         */
                   1813:
                   1814:        spoff = trunc_page(off);
                   1815:        epoff = round_page(off + len);
1.49      rmind    1816:        mutex_enter(vp->v_interlock);
1.1       pooka    1817:        error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags);
                   1818:        if (error) {
                   1819:                return error;
                   1820:        }
                   1821:
                   1822:        /*
                   1823:         * Wire the user pages and remap them into kernel memory.
                   1824:         */
                   1825:
                   1826:        prot = rw == UIO_READ ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
                   1827:        error = uvm_vslock(vs, (void *)uva, len, prot);
                   1828:        if (error) {
                   1829:                return error;
                   1830:        }
                   1831:
                   1832:        map = &vs->vm_map;
                   1833:        upm = vm_map_pmap(map);
                   1834:        kpm = vm_map_pmap(kernel_map);
                   1835:        puva = trunc_page(uva);
1.51      matt     1836:        kva = uvm_km_alloc(kernel_map, klen, atop(puva) & uvmexp.colormask,
                   1837:            UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
1.1       pooka    1838:        for (poff = 0; poff < klen; poff += PAGE_SIZE) {
                   1839:                rv = pmap_extract(upm, puva + poff, &pa);
                   1840:                KASSERT(rv);
1.51      matt     1841:                pmap_kenter_pa(kva + poff, pa, prot, PMAP_WIRED);
1.1       pooka    1842:        }
                   1843:        pmap_update(kpm);
                   1844:
                   1845:        /*
                   1846:         * Do the I/O.
                   1847:         */
                   1848:
                   1849:        koff = uva - trunc_page(uva);
                   1850:        error = genfs_do_io(vp, off, kva + koff, len, PGO_SYNCIO, rw,
                   1851:                            genfs_dio_iodone);
                   1852:
                   1853:        /*
                   1854:         * Tear down the kernel mapping.
                   1855:         */
                   1856:
1.51      matt     1857:        pmap_kremove(kva, klen);
1.1       pooka    1858:        pmap_update(kpm);
                   1859:        uvm_km_free(kernel_map, kva, klen, UVM_KMF_VAONLY);
                   1860:
                   1861:        /*
                   1862:         * Unwire the user pages.
                   1863:         */
                   1864:
                   1865:        uvm_vsunlock(vs, (void *)uva, len);
                   1866:        return error;
                   1867: }

CVSweb <webmaster@jp.NetBSD.org>