[BACK]Return to genfs_io.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / miscfs / genfs

Annotation of src/sys/miscfs/genfs/genfs_io.c, Revision 1.36.2.46

1.36.2.31  uebayasi    1: /*     $NetBSD$        */
1.1       pooka       2:
                      3: /*
                      4:  * Copyright (c) 1982, 1986, 1989, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  * 3. Neither the name of the University nor the names of its contributors
                     16:  *    may be used to endorse or promote products derived from this software
                     17:  *    without specific prior written permission.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     20:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     21:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     22:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     23:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     24:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     25:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     26:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     27:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     28:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     29:  * SUCH DAMAGE.
                     30:  *
                     31:  */
                     32:
                     33: #include <sys/cdefs.h>
1.36.2.31  uebayasi   34: __KERNEL_RCSID(0, "$NetBSD$");
1.1       pooka      35:
1.36.2.1  uebayasi   36: #include "opt_xip.h"
                     37:
1.1       pooka      38: #include <sys/param.h>
                     39: #include <sys/systm.h>
                     40: #include <sys/proc.h>
                     41: #include <sys/kernel.h>
                     42: #include <sys/mount.h>
                     43: #include <sys/namei.h>
                     44: #include <sys/vnode.h>
                     45: #include <sys/fcntl.h>
                     46: #include <sys/kmem.h>
                     47: #include <sys/poll.h>
                     48: #include <sys/mman.h>
                     49: #include <sys/file.h>
                     50: #include <sys/kauth.h>
                     51: #include <sys/fstrans.h>
1.15      pooka      52: #include <sys/buf.h>
1.36.2.11  uebayasi   53: #include <sys/once.h>
1.1       pooka      54:
                     55: #include <miscfs/genfs/genfs.h>
                     56: #include <miscfs/genfs/genfs_node.h>
                     57: #include <miscfs/specfs/specdev.h>
                     58:
                     59: #include <uvm/uvm.h>
                     60: #include <uvm/uvm_pager.h>
                     61:
1.36.2.4  uebayasi   62: #ifdef XIP
1.36.2.35  uebayasi   63: static int genfs_do_getpages_xip_io(struct vnode *, voff_t, struct vm_page **,
1.36.2.42  uebayasi   64:     int *, int, vm_prot_t, int, int, const int);
1.36.2.44  uebayasi   65: static int genfs_do_getpages_xip_io_done(struct vnode *, voff_t, struct vm_page **,
                     66:     int *, int, vm_prot_t, int, int, const int);
1.36.2.24  uebayasi   67: static int genfs_do_putpages_xip(struct vnode *, off_t, off_t, int,
                     68:     struct vm_page **);
1.36.2.4  uebayasi   69: #endif
1.1       pooka      70: static int genfs_do_directio(struct vmspace *, vaddr_t, size_t, struct vnode *,
                     71:     off_t, enum uio_rw);
                     72: static void genfs_dio_iodone(struct buf *);
                     73:
                     74: static int genfs_do_io(struct vnode *, off_t, vaddr_t, size_t, int, enum uio_rw,
                     75:     void (*)(struct buf *));
1.36.2.21  uebayasi   76: static void genfs_rel_pages(struct vm_page **, int);
                     77: static void genfs_markdirty(struct vnode *);
1.1       pooka      78:
                     79: int genfs_maxdio = MAXPHYS;
                     80:
1.36.2.21  uebayasi   81: static void
1.1       pooka      82: genfs_rel_pages(struct vm_page **pgs, int npages)
                     83: {
                     84:        int i;
                     85:
                     86:        for (i = 0; i < npages; i++) {
                     87:                struct vm_page *pg = pgs[i];
                     88:
                     89:                if (pg == NULL || pg == PGO_DONTCARE)
                     90:                        continue;
                     91:                if (pg->flags & PG_FAKE) {
                     92:                        pg->flags |= PG_RELEASED;
                     93:                }
                     94:        }
1.2       ad         95:        mutex_enter(&uvm_pageqlock);
1.1       pooka      96:        uvm_page_unbusy(pgs, npages);
1.2       ad         97:        mutex_exit(&uvm_pageqlock);
1.1       pooka      98: }
                     99:
1.36.2.21  uebayasi  100: static void
                    101: genfs_markdirty(struct vnode *vp)
                    102: {
                    103:        struct genfs_node * const gp = VTOG(vp);
                    104:
                    105:        KASSERT(mutex_owned(&vp->v_interlock));
                    106:        gp->g_dirtygen++;
                    107:        if ((vp->v_iflag & VI_ONWORKLST) == 0) {
                    108:                vn_syncer_add_to_worklist(vp, filedelay);
                    109:        }
                    110:        if ((vp->v_iflag & (VI_WRMAP|VI_WRMAPDIRTY)) == VI_WRMAP) {
                    111:                vp->v_iflag |= VI_WRMAPDIRTY;
                    112:        }
                    113: }
                    114:
1.1       pooka     115: /*
                    116:  * generic VM getpages routine.
                    117:  * Return PG_BUSY pages for the given range,
                    118:  * reading from backing store if necessary.
                    119:  */
                    120:
                    121: int
                    122: genfs_getpages(void *v)
                    123: {
1.36.2.1  uebayasi  124:        struct vop_getpages_args /* {
                    125:                struct vnode *a_vp;
                    126:                voff_t a_offset;
                    127:                struct vm_page **a_m;
                    128:                int *a_count;
                    129:                int a_centeridx;
                    130:                vm_prot_t a_access_type;
                    131:                int a_advice;
                    132:                int a_flags;
                    133:        } */ * const ap = v;
1.1       pooka     134:
1.24      uebayasi  135:        off_t diskeof, memeof;
1.31      uebayasi  136:        int i, error, npages;
1.10      yamt      137:        const int flags = ap->a_flags;
1.22      uebayasi  138:        struct vnode * const vp = ap->a_vp;
                    139:        struct uvm_object * const uobj = &vp->v_uobj;
1.31      uebayasi  140:        kauth_cred_t const cred = curlwp->l_cred;               /* XXXUBC curlwp */
1.10      yamt      141:        const bool async = (flags & PGO_SYNCIO) == 0;
1.35      uebayasi  142:        const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.1       pooka     143:        bool has_trans = false;
1.10      yamt      144:        const bool overwrite = (flags & PGO_OVERWRITE) != 0;
1.35      uebayasi  145:        const bool blockalloc = memwrite && (flags & PGO_NOBLOCKALLOC) == 0;
1.36.2.27  uebayasi  146:        const bool glocked = (flags & PGO_GLOCKHELD) != 0;
1.1       pooka     147:        UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
                    148:
                    149:        UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
                    150:            vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
                    151:
                    152:        KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
                    153:            vp->v_type == VLNK || vp->v_type == VBLK);
                    154:
                    155: startover:
                    156:        error = 0;
1.27      uebayasi  157:        const voff_t origvsize = vp->v_size;
                    158:        const off_t origoffset = ap->a_offset;
1.29      uebayasi  159:        const int orignpages = *ap->a_count;
1.33      uebayasi  160:
1.1       pooka     161:        GOP_SIZE(vp, origvsize, &diskeof, 0);
                    162:        if (flags & PGO_PASTEOF) {
1.24      uebayasi  163:                off_t newsize;
1.1       pooka     164: #if defined(DIAGNOSTIC)
                    165:                off_t writeeof;
                    166: #endif /* defined(DIAGNOSTIC) */
                    167:
                    168:                newsize = MAX(origvsize,
                    169:                    origoffset + (orignpages << PAGE_SHIFT));
                    170:                GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM);
                    171: #if defined(DIAGNOSTIC)
                    172:                GOP_SIZE(vp, vp->v_writesize, &writeeof, GOP_SIZE_MEM);
                    173:                if (newsize > round_page(writeeof)) {
1.36.2.27  uebayasi  174:                        panic("%s: past eof: %" PRId64 " vs. %" PRId64,
                    175:                            __func__, newsize, round_page(writeeof));
1.1       pooka     176:                }
                    177: #endif /* defined(DIAGNOSTIC) */
                    178:        } else {
                    179:                GOP_SIZE(vp, origvsize, &memeof, GOP_SIZE_MEM);
                    180:        }
                    181:        KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
                    182:        KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
                    183:        KASSERT(orignpages > 0);
                    184:
                    185:        /*
                    186:         * Bounds-check the request.
                    187:         */
                    188:
                    189:        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
                    190:                if ((flags & PGO_LOCKED) == 0) {
1.2       ad        191:                        mutex_exit(&uobj->vmobjlock);
1.1       pooka     192:                }
                    193:                UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
                    194:                    origoffset, *ap->a_count, memeof,0);
                    195:                error = EINVAL;
                    196:                goto out_err;
                    197:        }
                    198:
                    199:        /* uobj is locked */
                    200:
                    201:        if ((flags & PGO_NOTIMESTAMP) == 0 &&
                    202:            (vp->v_type != VBLK ||
                    203:            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                    204:                int updflags = 0;
                    205:
                    206:                if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
                    207:                        updflags = GOP_UPDATE_ACCESSED;
                    208:                }
1.35      uebayasi  209:                if (memwrite) {
1.1       pooka     210:                        updflags |= GOP_UPDATE_MODIFIED;
                    211:                }
                    212:                if (updflags != 0) {
                    213:                        GOP_MARKUPDATE(vp, updflags);
                    214:                }
                    215:        }
                    216:
                    217:        /*
                    218:         * For PGO_LOCKED requests, just return whatever's in memory.
                    219:         */
                    220:
                    221:        if (flags & PGO_LOCKED) {
1.36.2.39  uebayasi  222: #if 0
1.36.2.46! uebayasi  223:                genfs_getpages_mem();
1.36.2.39  uebayasi  224:        } else {
1.36.2.46! uebayasi  225:                genfs_getpages_io();
1.36.2.39  uebayasi  226:        }
                    227: }
                    228:
                    229: int
1.36.2.46! uebayasi  230: genfs_getpages_mem()
1.36.2.39  uebayasi  231: {
                    232: #endif
1.1       pooka     233:                int nfound;
1.31      uebayasi  234:                struct vm_page *pg;
1.1       pooka     235:
1.36.2.46! uebayasi  236: #ifdef XIP
1.36.2.39  uebayasi  237:                if ((ap->a_vp->v_vflag & VV_XIP) != 0) {
                    238:                        *ap->a_count = 0;
                    239:                        return 0;
                    240:                }
                    241: #endif
                    242:
1.36.2.27  uebayasi  243:                KASSERT(!glocked);
1.1       pooka     244:                npages = *ap->a_count;
                    245: #if defined(DEBUG)
                    246:                for (i = 0; i < npages; i++) {
                    247:                        pg = ap->a_m[i];
                    248:                        KASSERT(pg == NULL || pg == PGO_DONTCARE);
                    249:                }
                    250: #endif /* defined(DEBUG) */
                    251:                nfound = uvn_findpages(uobj, origoffset, &npages,
1.35      uebayasi  252:                    ap->a_m, UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0));
1.1       pooka     253:                KASSERT(npages == *ap->a_count);
                    254:                if (nfound == 0) {
                    255:                        error = EBUSY;
                    256:                        goto out_err;
                    257:                }
1.23      uebayasi  258:                if (!genfs_node_rdtrylock(vp)) {
1.1       pooka     259:                        genfs_rel_pages(ap->a_m, npages);
                    260:
                    261:                        /*
                    262:                         * restore the array.
                    263:                         */
                    264:
                    265:                        for (i = 0; i < npages; i++) {
                    266:                                pg = ap->a_m[i];
                    267:
1.36.2.30  uebayasi  268:                                if (pg != NULL && pg != PGO_DONTCARE) {
1.1       pooka     269:                                        ap->a_m[i] = NULL;
                    270:                                }
1.36.2.30  uebayasi  271:                                KASSERT(pg == NULL || pg == PGO_DONTCARE);
1.1       pooka     272:                        }
                    273:                } else {
1.23      uebayasi  274:                        genfs_node_unlock(vp);
1.1       pooka     275:                }
                    276:                error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1.36.2.21  uebayasi  277:                if (error == 0 && memwrite) {
                    278:                        genfs_markdirty(vp);
                    279:                }
1.1       pooka     280:                goto out_err;
                    281:        }
1.2       ad        282:        mutex_exit(&uobj->vmobjlock);
1.36.2.39  uebayasi  283: #if 0
                    284: }
                    285:
                    286: int
1.36.2.46! uebayasi  287: genfs_getpages_io()
1.36.2.39  uebayasi  288: {
                    289: #endif
1.1       pooka     290:        /*
                    291:         * find the requested pages and make some simple checks.
                    292:         * leave space in the page array for a whole block.
                    293:         */
                    294:
1.36.2.42  uebayasi  295: #define        vp2fs_bshift(vp) \
                    296:        (((vp)->v_type != VBLK) ? (vp)->v_mount->mnt_fs_bshift : DEV_BSHIFT)
                    297: #define        vp2dev_bshift(vp) \
                    298:        (((vp)->v_type != VBLK) ? (vp)->v_mount->mnt_dev_bshift : DEV_BSHIFT)
                    299:
                    300:        const int fs_bshift = vp2fs_bshift(vp);
                    301:        const int dev_bshift = vp2dev_bshift(vp);
1.27      uebayasi  302:        const int fs_bsize = 1 << fs_bshift;
1.30      uebayasi  303: #define        blk_mask        (fs_bsize - 1)
                    304: #define        trunc_blk(x)    ((x) & ~blk_mask)
                    305: #define        round_blk(x)    (((x) + blk_mask) & ~blk_mask)
1.1       pooka     306:
1.29      uebayasi  307:        const int orignmempages = MIN(orignpages,
1.1       pooka     308:            round_page(memeof - origoffset) >> PAGE_SHIFT);
1.29      uebayasi  309:        npages = orignmempages;
1.30      uebayasi  310:        const off_t startoffset = trunc_blk(origoffset);
                    311:        const off_t endoffset = MIN(
                    312:            round_page(round_blk(origoffset + (npages << PAGE_SHIFT))),
                    313:            round_page(memeof));
1.31      uebayasi  314:        const int ridx = (origoffset - startoffset) >> PAGE_SHIFT;
1.1       pooka     315:
1.33      uebayasi  316:        const int pgs_size = sizeof(struct vm_page *) *
1.1       pooka     317:            ((endoffset - startoffset) >> PAGE_SHIFT);
1.33      uebayasi  318:        struct vm_page **pgs, *pgs_onstack[UBC_MAX_PAGES];
1.31      uebayasi  319:
1.1       pooka     320:        if (pgs_size > sizeof(pgs_onstack)) {
                    321:                pgs = kmem_zalloc(pgs_size, async ? KM_NOSLEEP : KM_SLEEP);
                    322:                if (pgs == NULL) {
                    323:                        pgs = pgs_onstack;
                    324:                        error = ENOMEM;
1.32      uebayasi  325:                        goto out_err;
1.1       pooka     326:                }
                    327:        } else {
1.14      christos  328:                pgs = pgs_onstack;
                    329:                (void)memset(pgs, 0, pgs_size);
1.1       pooka     330:        }
1.14      christos  331:
1.1       pooka     332:        UVMHIST_LOG(ubchist, "ridx %d npages %d startoff %ld endoff %ld",
                    333:            ridx, npages, startoffset, endoffset);
1.36.2.46! uebayasi  334: #if 0
        !           335: }
1.1       pooka     336:
1.36.2.46! uebayasi  337: int
        !           338: genfs_getpages_io_relock()
        !           339: {
        !           340: #endif
1.1       pooka     341:        if (!has_trans) {
                    342:                fstrans_start(vp->v_mount, FSTRANS_SHARED);
                    343:                has_trans = true;
                    344:        }
                    345:
                    346:        /*
                    347:         * hold g_glock to prevent a race with truncate.
                    348:         *
                    349:         * check if our idea of v_size is still valid.
                    350:         */
                    351:
1.36.2.27  uebayasi  352:        KASSERT(!glocked || genfs_node_wrlocked(vp));
                    353:        if (!glocked) {
                    354:                if (blockalloc) {
                    355:                        genfs_node_wrlock(vp);
                    356:                } else {
                    357:                        genfs_node_rdlock(vp);
                    358:                }
1.1       pooka     359:        }
1.2       ad        360:        mutex_enter(&uobj->vmobjlock);
1.1       pooka     361:        if (vp->v_size < origvsize) {
1.36.2.27  uebayasi  362:                if (!glocked) {
                    363:                        genfs_node_unlock(vp);
                    364:                }
1.1       pooka     365:                if (pgs != pgs_onstack)
                    366:                        kmem_free(pgs, pgs_size);
                    367:                goto startover;
                    368:        }
1.36.2.46! uebayasi  369: #if 0
        !           370: }
1.1       pooka     371:
1.36.2.46! uebayasi  372: int
        !           373: genfs_getpages_io_findpages()
        !           374: {
        !           375: #endif
        !           376: #ifdef XIP
1.36.2.43  uebayasi  377:        if ((ap->a_vp->v_vflag & VV_XIP) != 0)
1.36.2.46! uebayasi  378:                goto genfs_getpages_allocpages_done;
1.36.2.43  uebayasi  379: #endif
                    380:
1.1       pooka     381:        if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx],
1.29      uebayasi  382:            async ? UFP_NOWAIT : UFP_ALL) != orignmempages) {
1.36.2.27  uebayasi  383:                if (!glocked) {
                    384:                        genfs_node_unlock(vp);
                    385:                }
1.1       pooka     386:                KASSERT(async != 0);
1.29      uebayasi  387:                genfs_rel_pages(&pgs[ridx], orignmempages);
1.2       ad        388:                mutex_exit(&uobj->vmobjlock);
1.1       pooka     389:                error = EBUSY;
1.33      uebayasi  390:                goto out_err_free;
1.1       pooka     391:        }
                    392:
                    393:        /*
                    394:         * if the pages are already resident, just return them.
                    395:         */
                    396:
                    397:        for (i = 0; i < npages; i++) {
1.31      uebayasi  398:                struct vm_page *pg = pgs[ridx + i];
1.1       pooka     399:
1.31      uebayasi  400:                if ((pg->flags & PG_FAKE) ||
                    401:                    (blockalloc && (pg->flags & PG_RDONLY))) {
1.1       pooka     402:                        break;
                    403:                }
                    404:        }
                    405:        if (i == npages) {
1.36.2.27  uebayasi  406:                if (!glocked) {
                    407:                        genfs_node_unlock(vp);
                    408:                }
1.1       pooka     409:                UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
                    410:                npages += ridx;
                    411:                goto out;
                    412:        }
                    413:
                    414:        /*
                    415:         * if PGO_OVERWRITE is set, don't bother reading the pages.
                    416:         */
                    417:
                    418:        if (overwrite) {
1.36.2.39  uebayasi  419: #if 0
1.36.2.46! uebayasi  420:                genfs_getpages_io_overwrite();
1.36.2.39  uebayasi  421:        } else {
1.36.2.46! uebayasi  422:                genfs_getpages_io_read();
1.36.2.39  uebayasi  423:        }
                    424: }
                    425:
                    426: int
1.36.2.46! uebayasi  427: genfs_getpages_io_overwrite()
1.36.2.39  uebayasi  428: {
                    429:        {
                    430: #endif
1.36.2.27  uebayasi  431:                if (!glocked) {
                    432:                        genfs_node_unlock(vp);
                    433:                }
1.1       pooka     434:                UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
                    435:
                    436:                for (i = 0; i < npages; i++) {
1.31      uebayasi  437:                        struct vm_page *pg = pgs[ridx + i];
1.1       pooka     438:
1.31      uebayasi  439:                        pg->flags &= ~(PG_RDONLY|PG_CLEAN);
1.1       pooka     440:                }
                    441:                npages += ridx;
                    442:                goto out;
                    443:        }
1.36.2.39  uebayasi  444: #if 0
                    445: }
1.1       pooka     446:
1.36.2.39  uebayasi  447: int
1.36.2.46! uebayasi  448: genfs_getpages_io_read()
1.36.2.39  uebayasi  449: {
                    450: #endif
1.1       pooka     451:        /*
                    452:         * the page wasn't resident and we're not overwriting,
                    453:         * so we're going to have to do some i/o.
                    454:         * find any additional pages needed to cover the expanded range.
                    455:         */
1.36.2.46! uebayasi  456: #if 0
        !           457: }
1.1       pooka     458:
1.36.2.46! uebayasi  459: int
        !           460: genfs_getpages_io_read_allocpages()
        !           461: {
        !           462: #endif
1.1       pooka     463:        npages = (endoffset - startoffset) >> PAGE_SHIFT;
1.29      uebayasi  464:        if (startoffset != origoffset || npages != orignmempages) {
1.31      uebayasi  465:                int npgs;
1.1       pooka     466:
                    467:                /*
                    468:                 * we need to avoid deadlocks caused by locking
                    469:                 * additional pages at lower offsets than pages we
                    470:                 * already have locked.  unlock them all and start over.
                    471:                 */
                    472:
1.29      uebayasi  473:                genfs_rel_pages(&pgs[ridx], orignmempages);
1.1       pooka     474:                memset(pgs, 0, pgs_size);
                    475:
                    476:                UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
                    477:                    startoffset, endoffset, 0,0);
                    478:                npgs = npages;
                    479:                if (uvn_findpages(uobj, startoffset, &npgs, pgs,
                    480:                    async ? UFP_NOWAIT : UFP_ALL) != npages) {
1.36.2.27  uebayasi  481:                        if (!glocked) {
                    482:                                genfs_node_unlock(vp);
                    483:                        }
1.1       pooka     484:                        KASSERT(async != 0);
                    485:                        genfs_rel_pages(pgs, npages);
1.2       ad        486:                        mutex_exit(&uobj->vmobjlock);
1.1       pooka     487:                        error = EBUSY;
1.33      uebayasi  488:                        goto out_err_free;
1.1       pooka     489:                }
                    490:        }
1.36.2.46! uebayasi  491: #ifdef XIP
        !           492: genfs_getpages_io_read_allocpages_done:
1.36.2.43  uebayasi  493: #endif
1.36.2.46! uebayasi  494: #if 0
        !           495: }
1.36.2.43  uebayasi  496:
1.36.2.46! uebayasi  497: int
        !           498: genfs_getpages_io_read_bio()
        !           499: {
        !           500: #endif
1.2       ad        501:        mutex_exit(&uobj->vmobjlock);
1.1       pooka     502:
1.34      uebayasi  503:     {
                    504:        size_t bytes, iobytes, tailstart, tailbytes, totalbytes, skipbytes;
1.36.2.44  uebayasi  505:        vaddr_t kva = 0;
                    506:        struct buf *bp = NULL, *mbp = NULL;
1.34      uebayasi  507:        bool sawhole = false;
                    508:
1.1       pooka     509:        /*
                    510:         * read the desired page(s).
                    511:         */
                    512:
                    513:        totalbytes = npages << PAGE_SHIFT;
                    514:        bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
                    515:        tailbytes = totalbytes - bytes;
                    516:        skipbytes = 0;
                    517:
1.36.2.44  uebayasi  518: #if 1
                    519:        if ((ap->a_vp->v_vflag & VV_XIP) != 0)
                    520:                goto genfs_getpages_bio_prepare_done;
                    521: #endif
1.36.2.46! uebayasi  522: #if 0
        !           523: }
1.36.2.44  uebayasi  524:
1.36.2.46! uebayasi  525: int
        !           526: genfs_getpages_io_read_bio_prepare()
        !           527: {
        !           528: #endif
1.1       pooka     529:        kva = uvm_pagermapin(pgs, npages,
                    530:            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
                    531:
1.2       ad        532:        mbp = getiobuf(vp, true);
1.1       pooka     533:        mbp->b_bufsize = totalbytes;
                    534:        mbp->b_data = (void *)kva;
                    535:        mbp->b_resid = mbp->b_bcount = bytes;
1.2       ad        536:        mbp->b_cflags = BC_BUSY;
                    537:        if (async) {
                    538:                mbp->b_flags = B_READ | B_ASYNC;
                    539:                mbp->b_iodone = uvm_aio_biodone;
                    540:        } else {
                    541:                mbp->b_flags = B_READ;
                    542:                mbp->b_iodone = NULL;
1.36.2.41  uebayasi  543:        }
1.1       pooka     544:        if (async)
                    545:                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
                    546:        else
                    547:                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
1.36.2.46! uebayasi  548: #if 0
        !           549: }
1.1       pooka     550:
1.36.2.46! uebayasi  551: #endif
1.36.2.44  uebayasi  552: #if 1
                    553: genfs_getpages_bio_prepare_done:
                    554: #endif
                    555:
1.1       pooka     556:        /*
                    557:         * if EOF is in the middle of the range, zero the part past EOF.
                    558:         * skip over pages which are not PG_FAKE since in that case they have
                    559:         * valid data that we need to preserve.
                    560:         */
                    561:
                    562:        tailstart = bytes;
                    563:        while (tailbytes > 0) {
                    564:                const int len = PAGE_SIZE - (tailstart & PAGE_MASK);
                    565:
                    566:                KASSERT(len <= tailbytes);
                    567:                if ((pgs[tailstart >> PAGE_SHIFT]->flags & PG_FAKE) != 0) {
                    568:                        memset((void *)(kva + tailstart), 0, len);
                    569:                        UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
                    570:                            kva, tailstart, len, 0);
                    571:                }
                    572:                tailstart += len;
                    573:                tailbytes -= len;
                    574:        }
                    575:
1.36.2.44  uebayasi  576: #if 1
                    577:        if ((ap->a_vp->v_vflag & VV_XIP) != 0) {
                    578:                error = genfs_do_getpages_xip_io(
                    579:                        ap->a_vp,
                    580:                        ap->a_offset,
                    581:                        ap->a_m,
                    582:                        ap->a_count,
                    583:                        ap->a_centeridx,
                    584:                        ap->a_access_type,
                    585:                        ap->a_advice,
                    586:                        ap->a_flags,
                    587:                        orignmempages);
                    588:                goto loopdone;
                    589:        }
                    590: #endif
1.36.2.46! uebayasi  591: #if 0
        !           592: }
1.36.2.44  uebayasi  593:
1.36.2.46! uebayasi  594: int
        !           595: genfs_getpages_io_read_bio_loop()
        !           596: {
        !           597: #endif
1.1       pooka     598:        /*
                    599:         * now loop over the pages, reading as needed.
                    600:         */
                    601:
                    602:        bp = NULL;
1.28      uebayasi  603:        off_t offset;
                    604:        for (offset = startoffset;
1.1       pooka     605:            bytes > 0;
                    606:            offset += iobytes, bytes -= iobytes) {
1.30      uebayasi  607:                int run;
1.25      uebayasi  608:                daddr_t lbn, blkno;
1.24      uebayasi  609:                int pidx;
1.26      uebayasi  610:                struct vnode *devvp;
1.1       pooka     611:
                    612:                /*
                    613:                 * skip pages which don't need to be read.
                    614:                 */
                    615:
                    616:                pidx = (offset - startoffset) >> PAGE_SHIFT;
                    617:                while ((pgs[pidx]->flags & PG_FAKE) == 0) {
                    618:                        size_t b;
                    619:
                    620:                        KASSERT((offset & (PAGE_SIZE - 1)) == 0);
                    621:                        if ((pgs[pidx]->flags & PG_RDONLY)) {
                    622:                                sawhole = true;
                    623:                        }
                    624:                        b = MIN(PAGE_SIZE, bytes);
                    625:                        offset += b;
                    626:                        bytes -= b;
                    627:                        skipbytes += b;
                    628:                        pidx++;
                    629:                        UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
                    630:                            offset, 0,0,0);
                    631:                        if (bytes == 0) {
                    632:                                goto loopdone;
                    633:                        }
                    634:                }
                    635:
                    636:                /*
                    637:                 * bmap the file to find out the blkno to read from and
                    638:                 * how much we can read in one i/o.  if bmap returns an error,
                    639:                 * skip the rest of the top-level i/o.
                    640:                 */
                    641:
                    642:                lbn = offset >> fs_bshift;
                    643:                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
                    644:                if (error) {
                    645:                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
1.36      uebayasi  646:                            lbn,error,0,0);
1.1       pooka     647:                        skipbytes += bytes;
1.36      uebayasi  648:                        bytes = 0;
1.1       pooka     649:                        goto loopdone;
                    650:                }
                    651:
                    652:                /*
                    653:                 * see how many pages can be read with this i/o.
                    654:                 * reduce the i/o size if necessary to avoid
                    655:                 * overwriting pages with valid data.
                    656:                 */
                    657:
                    658:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                    659:                    bytes);
                    660:                if (offset + iobytes > round_page(offset)) {
1.24      uebayasi  661:                        int pcount;
                    662:
1.1       pooka     663:                        pcount = 1;
                    664:                        while (pidx + pcount < npages &&
                    665:                            pgs[pidx + pcount]->flags & PG_FAKE) {
                    666:                                pcount++;
                    667:                        }
                    668:                        iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
                    669:                            (offset - trunc_page(offset)));
                    670:                }
                    671:
                    672:                /*
                    673:                 * if this block isn't allocated, zero it instead of
                    674:                 * reading it.  unless we are going to allocate blocks,
                    675:                 * mark the pages we zeroed PG_RDONLY.
                    676:                 */
                    677:
1.36      uebayasi  678:                if (blkno == (daddr_t)-1) {
1.1       pooka     679:                        int holepages = (round_page(offset + iobytes) -
                    680:                            trunc_page(offset)) >> PAGE_SHIFT;
                    681:                        UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
                    682:
                    683:                        sawhole = true;
                    684:                        memset((char *)kva + (offset - startoffset), 0,
                    685:                            iobytes);
                    686:                        skipbytes += iobytes;
                    687:
                    688:                        for (i = 0; i < holepages; i++) {
1.35      uebayasi  689:                                if (memwrite) {
1.1       pooka     690:                                        pgs[pidx + i]->flags &= ~PG_CLEAN;
                    691:                                }
                    692:                                if (!blockalloc) {
                    693:                                        pgs[pidx + i]->flags |= PG_RDONLY;
                    694:                                }
                    695:                        }
                    696:                        continue;
                    697:                }
                    698:
                    699:                /*
                    700:                 * allocate a sub-buf for this piece of the i/o
                    701:                 * (or just use mbp if there's only 1 piece),
                    702:                 * and start it going.
                    703:                 */
                    704:
                    705:                if (offset == startoffset && iobytes == bytes) {
                    706:                        bp = mbp;
                    707:                } else {
1.36      uebayasi  708:                        UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
                    709:                            vp, bp, vp->v_numoutput, 0);
1.2       ad        710:                        bp = getiobuf(vp, true);
1.1       pooka     711:                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
                    712:                }
                    713:                bp->b_lblkno = 0;
                    714:
                    715:                /* adjust physical blkno for partial blocks */
                    716:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
                    717:                    dev_bshift);
                    718:
                    719:                UVMHIST_LOG(ubchist,
                    720:                    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
1.36      uebayasi  721:                    bp, offset, bp->b_bcount, bp->b_blkno);
1.1       pooka     722:
                    723:                VOP_STRATEGY(devvp, bp);
                    724:        }
                    725:
                    726: loopdone:
1.36.2.44  uebayasi  727: #if 1
                    728:        if ((ap->a_vp->v_vflag & VV_XIP) != 0)
                    729:                goto genfs_getpages_biodone_done;
                    730: #endif
                    731: #if 0
                    732:
                    733: int
                    734: genfs_getpages_biodone()
                    735: {
                    736: #endif
1.1       pooka     737:        nestiobuf_done(mbp, skipbytes, error);
                    738:        if (async) {
                    739:                UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
1.36.2.27  uebayasi  740:                if (!glocked) {
                    741:                        genfs_node_unlock(vp);
                    742:                }
1.1       pooka     743:                error = 0;
1.33      uebayasi  744:                goto out_err_free;
1.1       pooka     745:        }
                    746:        if (bp != NULL) {
                    747:                error = biowait(mbp);
                    748:        }
                    749:
1.19      rmind     750:        /* Remove the mapping (make KVA available as soon as possible) */
                    751:        uvm_pagermapout(kva, npages);
                    752:
1.1       pooka     753:        /*
                    754:         * if this we encountered a hole then we have to do a little more work.
                    755:         * for read faults, we marked the page PG_RDONLY so that future
                    756:         * write accesses to the page will fault again.
                    757:         * for write faults, we must make sure that the backing store for
                    758:         * the page is completely allocated while the pages are locked.
                    759:         */
                    760:
                    761:        if (!error && sawhole && blockalloc) {
1.9       simonb    762:                /*
                    763:                 * XXX: This assumes that we come here only via
                    764:                 * the mmio path
                    765:                 */
1.11      yamt      766:                if (vp->v_mount->mnt_wapbl) {
1.9       simonb    767:                        error = WAPBL_BEGIN(vp->v_mount);
                    768:                }
                    769:
                    770:                if (!error) {
                    771:                        error = GOP_ALLOC(vp, startoffset,
                    772:                            npages << PAGE_SHIFT, 0, cred);
1.11      yamt      773:                        if (vp->v_mount->mnt_wapbl) {
1.9       simonb    774:                                WAPBL_END(vp->v_mount);
                    775:                        }
                    776:                }
                    777:
1.1       pooka     778:                UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
                    779:                    startoffset, npages << PAGE_SHIFT, error,0);
                    780:                if (!error) {
                    781:                        for (i = 0; i < npages; i++) {
1.31      uebayasi  782:                                struct vm_page *pg = pgs[i];
                    783:
                    784:                                if (pg == NULL) {
1.1       pooka     785:                                        continue;
                    786:                                }
1.31      uebayasi  787:                                pg->flags &= ~(PG_CLEAN|PG_RDONLY);
1.1       pooka     788:                                UVMHIST_LOG(ubchist, "mark dirty pg %p",
1.31      uebayasi  789:                                    pg,0,0,0);
1.1       pooka     790:                        }
                    791:                }
                    792:        }
1.36.2.44  uebayasi  793:
                    794:        putiobuf(mbp);
                    795: #if 0
                    796: }
                    797:
                    798: #endif
                    799: #if 1
                    800: genfs_getpages_biodone_done:
                    801:        {}
                    802: #endif
                    803:     }
                    804:
1.36.2.27  uebayasi  805:        if (!glocked) {
                    806:                genfs_node_unlock(vp);
                    807:        }
1.18      rmind     808:
1.36.2.44  uebayasi  809: #if 1
                    810:        if ((ap->a_vp->v_vflag & VV_XIP) != 0) {
                    811:                error = genfs_do_getpages_xip_io_done(
                    812:                        ap->a_vp,
                    813:                        ap->a_offset,
                    814:                        ap->a_m,
                    815:                        ap->a_count,
                    816:                        ap->a_centeridx,
                    817:                        ap->a_access_type,
                    818:                        ap->a_advice,
                    819:                        ap->a_flags,
                    820:                        orignmempages);
                    821:                goto genfs_getpages_generic_io_done_done;
                    822:        }
                    823: #endif
                    824: #if 0
                    825:        else {
                    826:                error = genfs_getpages_generic_io_done();
                    827:        }
                    828: }
                    829:
                    830: int
                    831: genfs_getpages_generic_io_done()
                    832: {
                    833: #endif
1.18      rmind     834:
1.2       ad        835:        mutex_enter(&uobj->vmobjlock);
1.1       pooka     836:
                    837:        /*
                    838:         * we're almost done!  release the pages...
                    839:         * for errors, we free the pages.
                    840:         * otherwise we activate them and mark them as valid and clean.
                    841:         * also, unbusy pages that were not actually requested.
                    842:         */
                    843:
                    844:        if (error) {
                    845:                for (i = 0; i < npages; i++) {
1.31      uebayasi  846:                        struct vm_page *pg = pgs[i];
                    847:
                    848:                        if (pg == NULL) {
1.1       pooka     849:                                continue;
                    850:                        }
                    851:                        UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.31      uebayasi  852:                            pg, pg->flags, 0,0);
                    853:                        if (pg->flags & PG_FAKE) {
                    854:                                pg->flags |= PG_RELEASED;
1.1       pooka     855:                        }
                    856:                }
1.2       ad        857:                mutex_enter(&uvm_pageqlock);
1.1       pooka     858:                uvm_page_unbusy(pgs, npages);
1.2       ad        859:                mutex_exit(&uvm_pageqlock);
                    860:                mutex_exit(&uobj->vmobjlock);
1.1       pooka     861:                UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
1.33      uebayasi  862:                goto out_err_free;
1.1       pooka     863:        }
                    864:
                    865: out:
                    866:        UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
                    867:        error = 0;
1.2       ad        868:        mutex_enter(&uvm_pageqlock);
1.1       pooka     869:        for (i = 0; i < npages; i++) {
1.31      uebayasi  870:                struct vm_page *pg = pgs[i];
1.1       pooka     871:                if (pg == NULL) {
                    872:                        continue;
                    873:                }
                    874:                UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
                    875:                    pg, pg->flags, 0,0);
                    876:                if (pg->flags & PG_FAKE && !overwrite) {
                    877:                        pg->flags &= ~(PG_FAKE);
                    878:                        pmap_clear_modify(pgs[i]);
                    879:                }
1.35      uebayasi  880:                KASSERT(!memwrite || !blockalloc || (pg->flags & PG_RDONLY) == 0);
1.29      uebayasi  881:                if (i < ridx || i >= ridx + orignmempages || async) {
1.1       pooka     882:                        UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
                    883:                            pg, pg->offset,0,0);
                    884:                        if (pg->flags & PG_WANTED) {
                    885:                                wakeup(pg);
                    886:                        }
                    887:                        if (pg->flags & PG_FAKE) {
                    888:                                KASSERT(overwrite);
                    889:                                uvm_pagezero(pg);
                    890:                        }
                    891:                        if (pg->flags & PG_RELEASED) {
                    892:                                uvm_pagefree(pg);
                    893:                                continue;
                    894:                        }
                    895:                        uvm_pageenqueue(pg);
                    896:                        pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
                    897:                        UVM_PAGE_OWN(pg, NULL);
                    898:                }
                    899:        }
1.2       ad        900:        mutex_exit(&uvm_pageqlock);
1.36.2.44  uebayasi  901:
1.36.2.21  uebayasi  902:        if (memwrite) {
                    903:                genfs_markdirty(vp);
                    904:        }
1.2       ad        905:        mutex_exit(&uobj->vmobjlock);
1.1       pooka     906:        if (ap->a_m != NULL) {
                    907:                memcpy(ap->a_m, &pgs[ridx],
1.29      uebayasi  908:                    orignmempages * sizeof(struct vm_page *));
1.1       pooka     909:        }
1.36.2.44  uebayasi  910: #if 0
                    911: }
                    912:
                    913: #endif
                    914: #if 1
                    915: genfs_getpages_generic_io_done_done:
                    916:        {}
                    917: #endif
1.1       pooka     918:
1.33      uebayasi  919: out_err_free:
1.14      christos  920:        if (pgs != NULL && pgs != pgs_onstack)
1.1       pooka     921:                kmem_free(pgs, pgs_size);
1.33      uebayasi  922: out_err:
1.1       pooka     923:        if (has_trans)
                    924:                fstrans_done(vp->v_mount);
1.36.2.21  uebayasi  925:        return error;
1.1       pooka     926: }
                    927:
1.36.2.1  uebayasi  928: #ifdef XIP
1.36.2.9  uebayasi  929: /*
1.36.2.41  uebayasi  930:  * genfs_do_getpages_xip_io
1.36.2.9  uebayasi  931:  *      Return "direct pages" of XIP vnode.  The block addresses of XIP
                    932:  *      vnode pages are returned back to the VM fault handler as the
                    933:  *     actually mapped physical addresses.
                    934:  */
1.36.2.1  uebayasi  935: static int
1.36.2.35  uebayasi  936: genfs_do_getpages_xip_io(
                    937:        struct vnode *vp,
1.36.2.42  uebayasi  938:        voff_t origoffset,
1.36.2.35  uebayasi  939:        struct vm_page **pps,
                    940:        int *npagesp,
                    941:        int centeridx,
                    942:        vm_prot_t access_type,
                    943:        int advice,
1.36.2.42  uebayasi  944:        int flags,
                    945:        const int orignmempages)
1.36.2.35  uebayasi  946: {
1.36.2.42  uebayasi  947:        const int fs_bshift = vp2fs_bshift(vp);
                    948:        const int dev_bshift = vp2dev_bshift(vp);
                    949:        const int fs_bsize = 1 << fs_bshift;
                    950:
1.36.2.1  uebayasi  951:        int error;
1.36.2.42  uebayasi  952:        off_t off;
1.36.2.1  uebayasi  953:        int i;
                    954:
1.36.2.35  uebayasi  955:        UVMHIST_FUNC("genfs_do_getpages_xip_io"); UVMHIST_CALLED(ubchist);
1.36.2.1  uebayasi  956:
1.36.2.44  uebayasi  957:        KASSERT(((flags & PGO_GLOCKHELD) != 0) || genfs_node_rdlocked(vp));
1.36.2.43  uebayasi  958:
1.36.2.42  uebayasi  959: #ifdef UVMHIST
                    960:        const off_t startoffset = trunc_blk(origoffset);
                    961:        const off_t endoffset = round_blk(origoffset + PAGE_SIZE * orignmempages);
                    962: #endif
1.36.2.1  uebayasi  963:
1.36.2.42  uebayasi  964:        UVMHIST_LOG(ubchist, "xip npages=%d startoffset=%lx endoffset=%lx",
                    965:            orignmempages, (long)startoffset, (long)endoffset, 0);
1.36.2.1  uebayasi  966:
1.36.2.42  uebayasi  967:        off = origoffset;
                    968:        for (i = 0; i < orignmempages; i++) {
1.36.2.1  uebayasi  969:                daddr_t lbn, blkno;
                    970:                int run;
                    971:                struct vnode *devvp;
                    972:
                    973:                lbn = (off & ~(fs_bsize - 1)) >> fs_bshift;
                    974:
                    975:                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
                    976:                KASSERT(error == 0);
1.36.2.23  uebayasi  977:                UVMHIST_LOG(ubchist, "xip VOP_BMAP: lbn=%ld blkno=%ld run=%d",
                    978:                    (long)lbn, (long)blkno, run, 0);
1.36.2.1  uebayasi  979:
1.36.2.12  uebayasi  980:                /*
                    981:                 * XIP page metadata assignment
                    982:                 * - Unallocated block is redirected to the dedicated zero'ed
                    983:                 *   page.
                    984:                 */
1.36.2.1  uebayasi  985:                if (blkno < 0) {
1.36.2.37  uebayasi  986:                        panic("XIP hole is not supported yet!");
1.36.2.1  uebayasi  987:                } else {
1.36.2.32  uebayasi  988:                        daddr_t blk_off, fs_off;
1.36.2.13  uebayasi  989:
1.36.2.32  uebayasi  990:                        blk_off = blkno << dev_bshift;
                    991:                        fs_off = off - (lbn << fs_bshift);
                    992:
1.36.2.34  uebayasi  993:                        pps[i] = uvn_findpage_xip(devvp, &vp->v_uobj,
1.36.2.32  uebayasi  994:                            blk_off + fs_off);
                    995:                        KASSERT(pps[i] != NULL);
1.36.2.1  uebayasi  996:                }
                    997:
                    998:                UVMHIST_LOG(ubchist, "xip pgs %d => phys_addr=0x%lx (%p)",
                    999:                        i,
1.36.2.22  uebayasi 1000:                        (long)pps[i]->phys_addr,
1.36.2.1  uebayasi 1001:                        pps[i],
                   1002:                        0);
                   1003:
                   1004:                off += PAGE_SIZE;
                   1005:        }
                   1006:
1.36.2.44  uebayasi 1007:        return 0;
                   1008: }
                   1009:
                   1010: int
                   1011: genfs_do_getpages_xip_io_done(
                   1012:        struct vnode *vp,
                   1013:        voff_t origoffset,
                   1014:        struct vm_page **pps,
                   1015:        int *npagesp,
                   1016:        int centeridx,
                   1017:        vm_prot_t access_type,
                   1018:        int advice,
                   1019:        int flags,
                   1020:        const int orignmempages)
                   1021: {
                   1022:        struct uvm_object * const uobj = &vp->v_uobj;
                   1023:        int i;
                   1024:
1.36.2.26  uebayasi 1025:        mutex_enter(&uobj->vmobjlock);
1.36.2.13  uebayasi 1026:
1.36.2.42  uebayasi 1027:        for (i = 0; i < orignmempages; i++) {
1.36.2.13  uebayasi 1028:                struct vm_page *pg = pps[i];
                   1029:
1.36.2.31  uebayasi 1030:                KASSERT((pg->flags & PG_RDONLY) != 0);
1.36.2.33  uebayasi 1031:                KASSERT((pg->flags & PG_BUSY) == 0);
                   1032:                KASSERT((pg->flags & PG_CLEAN) != 0);
                   1033:                KASSERT((pg->flags & PG_DEVICE) != 0);
                   1034:                pg->flags |= PG_BUSY;
                   1035:                pg->flags &= ~PG_FAKE;
                   1036:                pg->uobject = &vp->v_uobj;
1.36.2.13  uebayasi 1037:        }
                   1038:
1.36.2.36  uebayasi 1039:        mutex_exit(&uobj->vmobjlock);
1.36.2.13  uebayasi 1040:
1.36.2.42  uebayasi 1041:        *npagesp = orignmempages;
1.36.2.1  uebayasi 1042:
                   1043:        return 0;
                   1044: }
                   1045: #endif
                   1046:
1.1       pooka    1047: /*
                   1048:  * generic VM putpages routine.
                   1049:  * Write the given range of pages to backing store.
                   1050:  *
                   1051:  * => "offhi == 0" means flush all pages at or after "offlo".
                   1052:  * => object should be locked by caller.  we return with the
                   1053:  *      object unlocked.
                   1054:  * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
                   1055:  *     thus, a caller might want to unlock higher level resources
                   1056:  *     (e.g. vm_map) before calling flush.
                   1057:  * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, we will not block
                   1058:  * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
                   1059:  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
                   1060:  *     that new pages are inserted on the tail end of the list.   thus,
                   1061:  *     we can make a complete pass through the object in one go by starting
                   1062:  *     at the head and working towards the tail (new pages are put in
                   1063:  *     front of us).
                   1064:  * => NOTE: we are allowed to lock the page queues, so the caller
                   1065:  *     must not be holding the page queue lock.
                   1066:  *
                   1067:  * note on "cleaning" object and PG_BUSY pages:
                   1068:  *     this routine is holding the lock on the object.   the only time
                   1069:  *     that it can run into a PG_BUSY page that it does not own is if
                   1070:  *     some other process has started I/O on the page (e.g. either
                   1071:  *     a pagein, or a pageout).    if the PG_BUSY page is being paged
                   1072:  *     in, then it can not be dirty (!PG_CLEAN) because no one has
                   1073:  *     had a chance to modify it yet.    if the PG_BUSY page is being
                   1074:  *     paged out then it means that someone else has already started
                   1075:  *     cleaning the page for us (how nice!).    in this case, if we
                   1076:  *     have syncio specified, then after we make our pass through the
                   1077:  *     object we need to wait for the other PG_BUSY pages to clear
                   1078:  *     off (i.e. we need to do an iosync).   also note that once a
                   1079:  *     page is PG_BUSY it must stay in its object until it is un-busyed.
                   1080:  *
                   1081:  * note on page traversal:
                   1082:  *     we can traverse the pages in an object either by going down the
                   1083:  *     linked list in "uobj->memq", or we can go over the address range
                   1084:  *     by page doing hash table lookups for each address.    depending
                   1085:  *     on how many pages are in the object it may be cheaper to do one
                   1086:  *     or the other.   we set "by_list" to true if we are using memq.
                   1087:  *     if the cost of a hash lookup was equal to the cost of the list
                   1088:  *     traversal we could compare the number of pages in the start->stop
                   1089:  *     range to the total number of pages in the object.   however, it
                   1090:  *     seems that a hash table lookup is more expensive than the linked
                   1091:  *     list traversal, so we multiply the number of pages in the
                   1092:  *     range by an estimate of the relatively higher cost of the hash lookup.
                   1093:  */
                   1094:
                   1095: int
                   1096: genfs_putpages(void *v)
                   1097: {
                   1098:        struct vop_putpages_args /* {
                   1099:                struct vnode *a_vp;
                   1100:                voff_t a_offlo;
                   1101:                voff_t a_offhi;
                   1102:                int a_flags;
1.22      uebayasi 1103:        } */ * const ap = v;
1.1       pooka    1104:
1.36.2.24  uebayasi 1105: #ifdef XIP
                   1106:        if ((ap->a_vp->v_vflag & VV_XIP) != 0)
                   1107:                return genfs_do_putpages_xip(ap->a_vp, ap->a_offlo, ap->a_offhi,
                   1108:                    ap->a_flags, NULL);
                   1109:        else
                   1110: #endif
1.1       pooka    1111:        return genfs_do_putpages(ap->a_vp, ap->a_offlo, ap->a_offhi,
                   1112:            ap->a_flags, NULL);
                   1113: }
                   1114:
                   1115: int
1.4       yamt     1116: genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff,
                   1117:     int origflags, struct vm_page **busypg)
1.1       pooka    1118: {
1.22      uebayasi 1119:        struct uvm_object * const uobj = &vp->v_uobj;
                   1120:        kmutex_t * const slock = &uobj->vmobjlock;
1.1       pooka    1121:        off_t off;
                   1122:        /* Even for strange MAXPHYS, the shift rounds down to a page */
                   1123: #define maxpages (MAXPHYS >> PAGE_SHIFT)
1.2       ad       1124:        int i, error, npages, nback;
1.1       pooka    1125:        int freeflag;
                   1126:        struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
                   1127:        bool wasclean, by_list, needs_clean, yld;
1.4       yamt     1128:        bool async = (origflags & PGO_SYNCIO) == 0;
1.1       pooka    1129:        bool pagedaemon = curlwp == uvm.pagedaemon_lwp;
1.22      uebayasi 1130:        struct lwp * const l = curlwp ? curlwp : &lwp0;
                   1131:        struct genfs_node * const gp = VTOG(vp);
1.4       yamt     1132:        int flags;
1.1       pooka    1133:        int dirtygen;
1.4       yamt     1134:        bool modified;
1.12      hannken  1135:        bool need_wapbl;
1.4       yamt     1136:        bool has_trans;
1.1       pooka    1137:        bool cleanall;
1.4       yamt     1138:        bool onworklst;
1.1       pooka    1139:
                   1140:        UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
                   1141:
1.4       yamt     1142:        KASSERT(origflags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1.1       pooka    1143:        KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
                   1144:        KASSERT(startoff < endoff || endoff == 0);
                   1145:
                   1146:        UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
                   1147:            vp, uobj->uo_npages, startoff, endoff - startoff);
                   1148:
1.6       hannken  1149:        has_trans = false;
1.12      hannken  1150:        need_wapbl = (!pagedaemon && vp->v_mount && vp->v_mount->mnt_wapbl &&
                   1151:            (origflags & PGO_JOURNALLOCKED) == 0);
1.6       hannken  1152:
1.4       yamt     1153: retry:
                   1154:        modified = false;
                   1155:        flags = origflags;
1.1       pooka    1156:        KASSERT((vp->v_iflag & VI_ONWORKLST) != 0 ||
                   1157:            (vp->v_iflag & VI_WRMAPDIRTY) == 0);
                   1158:        if (uobj->uo_npages == 0) {
                   1159:                if (vp->v_iflag & VI_ONWORKLST) {
                   1160:                        vp->v_iflag &= ~VI_WRMAPDIRTY;
                   1161:                        if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                   1162:                                vn_syncer_remove_from_worklist(vp);
                   1163:                }
1.12      hannken  1164:                if (has_trans) {
                   1165:                        if (need_wapbl)
                   1166:                                WAPBL_END(vp->v_mount);
1.6       hannken  1167:                        fstrans_done(vp->v_mount);
1.12      hannken  1168:                }
1.2       ad       1169:                mutex_exit(slock);
1.1       pooka    1170:                return (0);
                   1171:        }
                   1172:
                   1173:        /*
                   1174:         * the vnode has pages, set up to process the request.
                   1175:         */
                   1176:
1.6       hannken  1177:        if (!has_trans && (flags & PGO_CLEANIT) != 0) {
1.2       ad       1178:                mutex_exit(slock);
1.1       pooka    1179:                if (pagedaemon) {
                   1180:                        error = fstrans_start_nowait(vp->v_mount, FSTRANS_LAZY);
                   1181:                        if (error)
                   1182:                                return error;
                   1183:                } else
                   1184:                        fstrans_start(vp->v_mount, FSTRANS_LAZY);
1.12      hannken  1185:                if (need_wapbl) {
                   1186:                        error = WAPBL_BEGIN(vp->v_mount);
                   1187:                        if (error) {
                   1188:                                fstrans_done(vp->v_mount);
                   1189:                                return error;
                   1190:                        }
                   1191:                }
1.1       pooka    1192:                has_trans = true;
1.2       ad       1193:                mutex_enter(slock);
1.6       hannken  1194:                goto retry;
1.1       pooka    1195:        }
                   1196:
                   1197:        error = 0;
                   1198:        wasclean = (vp->v_numoutput == 0);
                   1199:        off = startoff;
                   1200:        if (endoff == 0 || flags & PGO_ALLPAGES) {
                   1201:                endoff = trunc_page(LLONG_MAX);
                   1202:        }
                   1203:        by_list = (uobj->uo_npages <=
1.17      yamt     1204:            ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_TREE_PENALTY);
1.1       pooka    1205:
                   1206: #if !defined(DEBUG)
                   1207:        /*
                   1208:         * if this vnode is known not to have dirty pages,
                   1209:         * don't bother to clean it out.
                   1210:         */
                   1211:
                   1212:        if ((vp->v_iflag & VI_ONWORKLST) == 0) {
                   1213:                if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) {
                   1214:                        goto skip_scan;
                   1215:                }
                   1216:                flags &= ~PGO_CLEANIT;
                   1217:        }
                   1218: #endif /* !defined(DEBUG) */
                   1219:
                   1220:        /*
                   1221:         * start the loop.  when scanning by list, hold the last page
                   1222:         * in the list before we start.  pages allocated after we start
                   1223:         * will be added to the end of the list, so we can stop at the
                   1224:         * current last page.
                   1225:         */
                   1226:
                   1227:        cleanall = (flags & PGO_CLEANIT) != 0 && wasclean &&
                   1228:            startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
                   1229:            (vp->v_iflag & VI_ONWORKLST) != 0;
                   1230:        dirtygen = gp->g_dirtygen;
                   1231:        freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED;
                   1232:        if (by_list) {
1.36.2.21  uebayasi 1233:                curmp.flags = PG_MARKER;
                   1234:                endmp.flags = PG_MARKER;
1.1       pooka    1235:                pg = TAILQ_FIRST(&uobj->memq);
1.8       ad       1236:                TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq.queue);
1.1       pooka    1237:        } else {
                   1238:                pg = uvm_pagelookup(uobj, off);
                   1239:        }
                   1240:        nextpg = NULL;
                   1241:        while (by_list || off < endoff) {
                   1242:
                   1243:                /*
                   1244:                 * if the current page is not interesting, move on to the next.
                   1245:                 */
                   1246:
1.36.2.21  uebayasi 1247:                KASSERT(pg == NULL || pg->uobject == uobj ||
                   1248:                    (pg->flags & PG_MARKER) != 0);
1.1       pooka    1249:                KASSERT(pg == NULL ||
                   1250:                    (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1.36.2.21  uebayasi 1251:                    (pg->flags & (PG_BUSY|PG_MARKER)) != 0);
1.1       pooka    1252:                if (by_list) {
                   1253:                        if (pg == &endmp) {
                   1254:                                break;
                   1255:                        }
1.36.2.21  uebayasi 1256:                        if (pg->flags & PG_MARKER) {
                   1257:                                pg = TAILQ_NEXT(pg, listq.queue);
                   1258:                                continue;
                   1259:                        }
1.1       pooka    1260:                        if (pg->offset < startoff || pg->offset >= endoff ||
                   1261:                            pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
                   1262:                                if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
                   1263:                                        wasclean = false;
                   1264:                                }
1.8       ad       1265:                                pg = TAILQ_NEXT(pg, listq.queue);
1.1       pooka    1266:                                continue;
                   1267:                        }
                   1268:                        off = pg->offset;
                   1269:                } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
                   1270:                        if (pg != NULL) {
                   1271:                                wasclean = false;
                   1272:                        }
                   1273:                        off += PAGE_SIZE;
                   1274:                        if (off < endoff) {
                   1275:                                pg = uvm_pagelookup(uobj, off);
                   1276:                        }
                   1277:                        continue;
                   1278:                }
                   1279:
                   1280:                /*
                   1281:                 * if the current page needs to be cleaned and it's busy,
                   1282:                 * wait for it to become unbusy.
                   1283:                 */
                   1284:
                   1285:                yld = (l->l_cpu->ci_schedstate.spc_flags &
                   1286:                    SPCF_SHOULDYIELD) && !pagedaemon;
                   1287:                if (pg->flags & PG_BUSY || yld) {
                   1288:                        UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
                   1289:                        if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) {
                   1290:                                UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0);
                   1291:                                error = EDEADLK;
                   1292:                                if (busypg != NULL)
                   1293:                                        *busypg = pg;
                   1294:                                break;
                   1295:                        }
                   1296:                        if (pagedaemon) {
                   1297:                                /*
                   1298:                                 * someone has taken the page while we
                   1299:                                 * dropped the lock for fstrans_start.
                   1300:                                 */
                   1301:                                break;
                   1302:                        }
                   1303:                        if (by_list) {
1.8       ad       1304:                                TAILQ_INSERT_BEFORE(pg, &curmp, listq.queue);
1.1       pooka    1305:                                UVMHIST_LOG(ubchist, "curmp next %p",
1.8       ad       1306:                                    TAILQ_NEXT(&curmp, listq.queue), 0,0,0);
1.1       pooka    1307:                        }
                   1308:                        if (yld) {
1.2       ad       1309:                                mutex_exit(slock);
1.1       pooka    1310:                                preempt();
1.2       ad       1311:                                mutex_enter(slock);
1.1       pooka    1312:                        } else {
                   1313:                                pg->flags |= PG_WANTED;
                   1314:                                UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
1.2       ad       1315:                                mutex_enter(slock);
1.1       pooka    1316:                        }
                   1317:                        if (by_list) {
                   1318:                                UVMHIST_LOG(ubchist, "after next %p",
1.8       ad       1319:                                    TAILQ_NEXT(&curmp, listq.queue), 0,0,0);
                   1320:                                pg = TAILQ_NEXT(&curmp, listq.queue);
                   1321:                                TAILQ_REMOVE(&uobj->memq, &curmp, listq.queue);
1.1       pooka    1322:                        } else {
                   1323:                                pg = uvm_pagelookup(uobj, off);
                   1324:                        }
                   1325:                        continue;
                   1326:                }
                   1327:
                   1328:                /*
                   1329:                 * if we're freeing, remove all mappings of the page now.
                   1330:                 * if we're cleaning, check if the page is needs to be cleaned.
                   1331:                 */
                   1332:
                   1333:                if (flags & PGO_FREE) {
                   1334:                        pmap_page_protect(pg, VM_PROT_NONE);
                   1335:                } else if (flags & PGO_CLEANIT) {
                   1336:
                   1337:                        /*
                   1338:                         * if we still have some hope to pull this vnode off
                   1339:                         * from the syncer queue, write-protect the page.
                   1340:                         */
                   1341:
                   1342:                        if (cleanall && wasclean &&
                   1343:                            gp->g_dirtygen == dirtygen) {
                   1344:
                   1345:                                /*
                   1346:                                 * uobj pages get wired only by uvm_fault
                   1347:                                 * where uobj is locked.
                   1348:                                 */
                   1349:
                   1350:                                if (pg->wire_count == 0) {
                   1351:                                        pmap_page_protect(pg,
                   1352:                                            VM_PROT_READ|VM_PROT_EXECUTE);
                   1353:                                } else {
                   1354:                                        cleanall = false;
                   1355:                                }
                   1356:                        }
                   1357:                }
                   1358:
                   1359:                if (flags & PGO_CLEANIT) {
                   1360:                        needs_clean = pmap_clear_modify(pg) ||
                   1361:                            (pg->flags & PG_CLEAN) == 0;
                   1362:                        pg->flags |= PG_CLEAN;
                   1363:                } else {
                   1364:                        needs_clean = false;
                   1365:                }
                   1366:
                   1367:                /*
                   1368:                 * if we're cleaning, build a cluster.
                   1369:                 * the cluster will consist of pages which are currently dirty,
                   1370:                 * but they will be returned to us marked clean.
                   1371:                 * if not cleaning, just operate on the one page.
                   1372:                 */
                   1373:
                   1374:                if (needs_clean) {
                   1375:                        KDASSERT((vp->v_iflag & VI_ONWORKLST));
                   1376:                        wasclean = false;
                   1377:                        memset(pgs, 0, sizeof(pgs));
                   1378:                        pg->flags |= PG_BUSY;
                   1379:                        UVM_PAGE_OWN(pg, "genfs_putpages");
                   1380:
                   1381:                        /*
                   1382:                         * first look backward.
                   1383:                         */
                   1384:
                   1385:                        npages = MIN(maxpages >> 1, off >> PAGE_SHIFT);
                   1386:                        nback = npages;
                   1387:                        uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
                   1388:                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
                   1389:                        if (nback) {
                   1390:                                memmove(&pgs[0], &pgs[npages - nback],
                   1391:                                    nback * sizeof(pgs[0]));
                   1392:                                if (npages - nback < nback)
                   1393:                                        memset(&pgs[nback], 0,
                   1394:                                            (npages - nback) * sizeof(pgs[0]));
                   1395:                                else
                   1396:                                        memset(&pgs[npages - nback], 0,
                   1397:                                            nback * sizeof(pgs[0]));
                   1398:                        }
                   1399:
                   1400:                        /*
                   1401:                         * then plug in our page of interest.
                   1402:                         */
                   1403:
                   1404:                        pgs[nback] = pg;
                   1405:
                   1406:                        /*
                   1407:                         * then look forward to fill in the remaining space in
                   1408:                         * the array of pages.
                   1409:                         */
                   1410:
                   1411:                        npages = maxpages - nback - 1;
                   1412:                        uvn_findpages(uobj, off + PAGE_SIZE, &npages,
                   1413:                            &pgs[nback + 1],
                   1414:                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
                   1415:                        npages += nback + 1;
                   1416:                } else {
                   1417:                        pgs[0] = pg;
                   1418:                        npages = 1;
                   1419:                        nback = 0;
                   1420:                }
                   1421:
                   1422:                /*
                   1423:                 * apply FREE or DEACTIVATE options if requested.
                   1424:                 */
                   1425:
                   1426:                if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1.2       ad       1427:                        mutex_enter(&uvm_pageqlock);
1.1       pooka    1428:                }
                   1429:                for (i = 0; i < npages; i++) {
                   1430:                        tpg = pgs[i];
                   1431:                        KASSERT(tpg->uobject == uobj);
1.8       ad       1432:                        if (by_list && tpg == TAILQ_NEXT(pg, listq.queue))
1.1       pooka    1433:                                pg = tpg;
                   1434:                        if (tpg->offset < startoff || tpg->offset >= endoff)
                   1435:                                continue;
                   1436:                        if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) {
                   1437:                                uvm_pagedeactivate(tpg);
                   1438:                        } else if (flags & PGO_FREE) {
                   1439:                                pmap_page_protect(tpg, VM_PROT_NONE);
                   1440:                                if (tpg->flags & PG_BUSY) {
                   1441:                                        tpg->flags |= freeflag;
                   1442:                                        if (pagedaemon) {
1.2       ad       1443:                                                uvm_pageout_start(1);
1.1       pooka    1444:                                                uvm_pagedequeue(tpg);
                   1445:                                        }
                   1446:                                } else {
                   1447:
                   1448:                                        /*
                   1449:                                         * ``page is not busy''
                   1450:                                         * implies that npages is 1
                   1451:                                         * and needs_clean is false.
                   1452:                                         */
                   1453:
1.8       ad       1454:                                        nextpg = TAILQ_NEXT(tpg, listq.queue);
1.1       pooka    1455:                                        uvm_pagefree(tpg);
                   1456:                                        if (pagedaemon)
                   1457:                                                uvmexp.pdfreed++;
                   1458:                                }
                   1459:                        }
                   1460:                }
                   1461:                if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1.2       ad       1462:                        mutex_exit(&uvm_pageqlock);
1.1       pooka    1463:                }
                   1464:                if (needs_clean) {
                   1465:                        modified = true;
                   1466:
                   1467:                        /*
                   1468:                         * start the i/o.  if we're traversing by list,
                   1469:                         * keep our place in the list with a marker page.
                   1470:                         */
                   1471:
                   1472:                        if (by_list) {
                   1473:                                TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1.8       ad       1474:                                    listq.queue);
1.1       pooka    1475:                        }
1.2       ad       1476:                        mutex_exit(slock);
1.1       pooka    1477:                        error = GOP_WRITE(vp, pgs, npages, flags);
1.2       ad       1478:                        mutex_enter(slock);
1.1       pooka    1479:                        if (by_list) {
1.8       ad       1480:                                pg = TAILQ_NEXT(&curmp, listq.queue);
                   1481:                                TAILQ_REMOVE(&uobj->memq, &curmp, listq.queue);
1.1       pooka    1482:                        }
                   1483:                        if (error) {
                   1484:                                break;
                   1485:                        }
                   1486:                        if (by_list) {
                   1487:                                continue;
                   1488:                        }
                   1489:                }
                   1490:
                   1491:                /*
                   1492:                 * find the next page and continue if there was no error.
                   1493:                 */
                   1494:
                   1495:                if (by_list) {
                   1496:                        if (nextpg) {
                   1497:                                pg = nextpg;
                   1498:                                nextpg = NULL;
                   1499:                        } else {
1.8       ad       1500:                                pg = TAILQ_NEXT(pg, listq.queue);
1.1       pooka    1501:                        }
                   1502:                } else {
                   1503:                        off += (npages - nback) << PAGE_SHIFT;
                   1504:                        if (off < endoff) {
                   1505:                                pg = uvm_pagelookup(uobj, off);
                   1506:                        }
                   1507:                }
                   1508:        }
                   1509:        if (by_list) {
1.8       ad       1510:                TAILQ_REMOVE(&uobj->memq, &endmp, listq.queue);
1.1       pooka    1511:        }
                   1512:
                   1513:        if (modified && (vp->v_iflag & VI_WRMAPDIRTY) != 0 &&
                   1514:            (vp->v_type != VBLK ||
                   1515:            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                   1516:                GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED);
                   1517:        }
                   1518:
                   1519:        /*
                   1520:         * if we're cleaning and there was nothing to clean,
                   1521:         * take us off the syncer list.  if we started any i/o
                   1522:         * and we're doing sync i/o, wait for all writes to finish.
                   1523:         */
                   1524:
                   1525:        if (cleanall && wasclean && gp->g_dirtygen == dirtygen &&
                   1526:            (vp->v_iflag & VI_ONWORKLST) != 0) {
1.5       yamt     1527: #if defined(DEBUG)
1.8       ad       1528:                TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
1.36.2.21  uebayasi 1529:                        if ((pg->flags & PG_MARKER) != 0) {
                   1530:                                continue;
                   1531:                        }
1.5       yamt     1532:                        if ((pg->flags & PG_CLEAN) == 0) {
                   1533:                                printf("%s: %p: !CLEAN\n", __func__, pg);
                   1534:                        }
                   1535:                        if (pmap_is_modified(pg)) {
                   1536:                                printf("%s: %p: modified\n", __func__, pg);
                   1537:                        }
                   1538:                }
                   1539: #endif /* defined(DEBUG) */
1.1       pooka    1540:                vp->v_iflag &= ~VI_WRMAPDIRTY;
                   1541:                if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                   1542:                        vn_syncer_remove_from_worklist(vp);
                   1543:        }
                   1544:
                   1545: #if !defined(DEBUG)
                   1546: skip_scan:
                   1547: #endif /* !defined(DEBUG) */
1.2       ad       1548:
                   1549:        /* Wait for output to complete. */
                   1550:        if (!wasclean && !async && vp->v_numoutput != 0) {
                   1551:                while (vp->v_numoutput != 0)
                   1552:                        cv_wait(&vp->v_cv, slock);
1.1       pooka    1553:        }
1.4       yamt     1554:        onworklst = (vp->v_iflag & VI_ONWORKLST) != 0;
1.2       ad       1555:        mutex_exit(slock);
1.1       pooka    1556:
1.4       yamt     1557:        if ((flags & PGO_RECLAIM) != 0 && onworklst) {
                   1558:                /*
                   1559:                 * in the case of PGO_RECLAIM, ensure to make the vnode clean.
                   1560:                 * retrying is not a big deal because, in many cases,
                   1561:                 * uobj->uo_npages is already 0 here.
                   1562:                 */
                   1563:                mutex_enter(slock);
                   1564:                goto retry;
                   1565:        }
                   1566:
1.12      hannken  1567:        if (has_trans) {
                   1568:                if (need_wapbl)
                   1569:                        WAPBL_END(vp->v_mount);
1.6       hannken  1570:                fstrans_done(vp->v_mount);
1.12      hannken  1571:        }
1.6       hannken  1572:
1.1       pooka    1573:        return (error);
                   1574: }
                   1575:
1.36.2.24  uebayasi 1576: #ifdef XIP
                   1577: int
                   1578: genfs_do_putpages_xip(struct vnode *vp, off_t startoff, off_t endoff,
                   1579:     int flags, struct vm_page **busypg)
                   1580: {
                   1581:        struct uvm_object *uobj = &vp->v_uobj;
1.36.2.25  uebayasi 1582: #ifdef DIAGNOSTIC
1.36.2.24  uebayasi 1583:        struct genfs_node * const gp = VTOG(vp);
1.36.2.25  uebayasi 1584: #endif
1.36.2.24  uebayasi 1585:
                   1586:        UVMHIST_FUNC("genfs_do_putpages_xip"); UVMHIST_CALLED(ubchist);
                   1587:
                   1588:        KASSERT(mutex_owned(&uobj->vmobjlock));
                   1589:        KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
                   1590:        KASSERT(vp->v_numoutput == 0);
                   1591:        KASSERT(gp->g_dirtygen == 0);
                   1592:
                   1593:        UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
                   1594:            vp, uobj->uo_npages, startoff, endoff - startoff);
                   1595:
                   1596:        /*
                   1597:         * XIP pages are read-only, and never become dirty.  They're also never
                   1598:         * queued.  PGO_DEACTIVATE and PGO_CLEANIT are meaningless for XIP
                   1599:         * pages, so we ignore them.
                   1600:         */
                   1601:        if ((flags & PGO_FREE) == 0)
                   1602:                goto done;
                   1603:
                   1604:        /*
                   1605:         * For PGO_FREE (or (PGO_CLEANIT | PGO_FREE)), we invalidate MMU
                   1606:         * mappings of both XIP pages and XIP zero pages.
                   1607:         *
                   1608:         * Zero page is freed when one of its mapped offset is freed, even if
                   1609:         * one file (vnode) has many holes and mapping its zero page to all
                   1610:         * of those hole pages.
                   1611:         *
1.36.2.25  uebayasi 1612:         * We don't know which pages are currently mapped in the given vnode,
                   1613:         * because XIP pages are not added to vnode.  What we can do is to
                   1614:         * locate pages by querying the filesystem as done in getpages.  Call
1.36.2.40  uebayasi 1615:         * genfs_do_getpages_xip_io().
1.36.2.24  uebayasi 1616:         */
                   1617:
                   1618:        off_t off, eof;
                   1619:
                   1620:        off = trunc_page(startoff);
                   1621:        if (endoff == 0 || (flags & PGO_ALLPAGES))
                   1622:                GOP_SIZE(vp, vp->v_size, &eof, GOP_SIZE_MEM);
                   1623:        else
                   1624:                eof = endoff;
                   1625:
                   1626:        while (off < eof) {
                   1627:                int npages, orignpages, error, i;
                   1628:                struct vm_page *pgs[maxpages], *pg;
                   1629:
                   1630:                npages = round_page(eof - off) >> PAGE_SHIFT;
                   1631:                if (npages > maxpages)
                   1632:                        npages = maxpages;
                   1633:
                   1634:                orignpages = npages;
1.36.2.26  uebayasi 1635:                KASSERT(mutex_owned(&uobj->vmobjlock));
1.36.2.39  uebayasi 1636:                mutex_exit(&uobj->vmobjlock);
1.36.2.40  uebayasi 1637:                error = genfs_do_getpages_xip_io(vp, off, pgs, &npages, 0,
1.36.2.43  uebayasi 1638:                    VM_PROT_ALL, 0, PGO_GLOCKHELD, orignpages);
1.36.2.24  uebayasi 1639:                KASSERT(error == 0);
                   1640:                KASSERT(npages == orignpages);
1.36.2.36  uebayasi 1641:                mutex_enter(&uobj->vmobjlock);
1.36.2.24  uebayasi 1642:                for (i = 0; i < npages; i++) {
                   1643:                        pg = pgs[i];
                   1644:                        if (pg == NULL || pg == PGO_DONTCARE)
                   1645:                                continue;
1.36.2.33  uebayasi 1646:                        /*
                   1647:                         * Freeing normal XIP pages; nothing to do.
                   1648:                         */
                   1649:                        pmap_page_protect(pg, VM_PROT_NONE);
                   1650:                        KASSERT((pg->flags & PG_RDONLY) != 0);
                   1651:                        KASSERT((pg->flags & PG_CLEAN) != 0);
                   1652:                        KASSERT((pg->flags & PG_FAKE) == 0);
                   1653:                        KASSERT((pg->flags & PG_DEVICE) != 0);
                   1654:                        pg->flags &= ~PG_BUSY;
1.36.2.24  uebayasi 1655:                }
                   1656:                off += npages << PAGE_SHIFT;
                   1657:        }
                   1658:
                   1659:        KASSERT(uobj->uo_npages == 0);
                   1660:
                   1661: done:
1.36.2.26  uebayasi 1662:        KASSERT(mutex_owned(&uobj->vmobjlock));
1.36.2.24  uebayasi 1663:        mutex_exit(&uobj->vmobjlock);
                   1664:        return 0;
                   1665: }
                   1666: #endif
                   1667:
1.1       pooka    1668: int
                   1669: genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
                   1670: {
                   1671:        off_t off;
                   1672:        vaddr_t kva;
                   1673:        size_t len;
                   1674:        int error;
                   1675:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                   1676:
                   1677:        UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
                   1678:            vp, pgs, npages, flags);
                   1679:
                   1680:        off = pgs[0]->offset;
                   1681:        kva = uvm_pagermapin(pgs, npages,
                   1682:            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
                   1683:        len = npages << PAGE_SHIFT;
                   1684:
                   1685:        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                   1686:                            uvm_aio_biodone);
                   1687:
                   1688:        return error;
                   1689: }
                   1690:
1.7       reinoud  1691: int
                   1692: genfs_gop_write_rwmap(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
                   1693: {
                   1694:        off_t off;
                   1695:        vaddr_t kva;
                   1696:        size_t len;
                   1697:        int error;
                   1698:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                   1699:
                   1700:        UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
                   1701:            vp, pgs, npages, flags);
                   1702:
                   1703:        off = pgs[0]->offset;
                   1704:        kva = uvm_pagermapin(pgs, npages,
                   1705:            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
                   1706:        len = npages << PAGE_SHIFT;
                   1707:
                   1708:        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                   1709:                            uvm_aio_biodone);
                   1710:
                   1711:        return error;
                   1712: }
                   1713:
1.1       pooka    1714: /*
                   1715:  * Backend routine for doing I/O to vnode pages.  Pages are already locked
                   1716:  * and mapped into kernel memory.  Here we just look up the underlying
                   1717:  * device block addresses and call the strategy routine.
                   1718:  */
                   1719:
                   1720: static int
                   1721: genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
                   1722:     enum uio_rw rw, void (*iodone)(struct buf *))
                   1723: {
1.36      uebayasi 1724:        int s, error;
1.1       pooka    1725:        int fs_bshift, dev_bshift;
                   1726:        off_t eof, offset, startoffset;
                   1727:        size_t bytes, iobytes, skipbytes;
                   1728:        struct buf *mbp, *bp;
1.35      uebayasi 1729:        const bool async = (flags & PGO_SYNCIO) == 0;
                   1730:        const bool iowrite = rw == UIO_WRITE;
                   1731:        const int brw = iowrite ? B_WRITE : B_READ;
1.1       pooka    1732:        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
                   1733:
                   1734:        UVMHIST_LOG(ubchist, "vp %p kva %p len 0x%x flags 0x%x",
                   1735:            vp, kva, len, flags);
                   1736:
                   1737:        KASSERT(vp->v_size <= vp->v_writesize);
                   1738:        GOP_SIZE(vp, vp->v_writesize, &eof, 0);
                   1739:        if (vp->v_type != VBLK) {
                   1740:                fs_bshift = vp->v_mount->mnt_fs_bshift;
                   1741:                dev_bshift = vp->v_mount->mnt_dev_bshift;
                   1742:        } else {
                   1743:                fs_bshift = DEV_BSHIFT;
                   1744:                dev_bshift = DEV_BSHIFT;
                   1745:        }
                   1746:        error = 0;
                   1747:        startoffset = off;
                   1748:        bytes = MIN(len, eof - startoffset);
                   1749:        skipbytes = 0;
                   1750:        KASSERT(bytes != 0);
                   1751:
1.35      uebayasi 1752:        if (iowrite) {
1.2       ad       1753:                mutex_enter(&vp->v_interlock);
1.1       pooka    1754:                vp->v_numoutput += 2;
1.2       ad       1755:                mutex_exit(&vp->v_interlock);
1.1       pooka    1756:        }
1.2       ad       1757:        mbp = getiobuf(vp, true);
1.1       pooka    1758:        UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
                   1759:            vp, mbp, vp->v_numoutput, bytes);
                   1760:        mbp->b_bufsize = len;
                   1761:        mbp->b_data = (void *)kva;
                   1762:        mbp->b_resid = mbp->b_bcount = bytes;
1.2       ad       1763:        mbp->b_cflags = BC_BUSY | BC_AGE;
                   1764:        if (async) {
                   1765:                mbp->b_flags = brw | B_ASYNC;
                   1766:                mbp->b_iodone = iodone;
                   1767:        } else {
                   1768:                mbp->b_flags = brw;
                   1769:                mbp->b_iodone = NULL;
                   1770:        }
1.1       pooka    1771:        if (curlwp == uvm.pagedaemon_lwp)
                   1772:                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
                   1773:        else if (async)
                   1774:                BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
                   1775:        else
                   1776:                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
                   1777:
                   1778:        bp = NULL;
                   1779:        for (offset = startoffset;
                   1780:            bytes > 0;
                   1781:            offset += iobytes, bytes -= iobytes) {
1.36      uebayasi 1782:                int run;
                   1783:                daddr_t lbn, blkno;
                   1784:                struct vnode *devvp;
                   1785:
                   1786:                /*
                   1787:                 * bmap the file to find out the blkno to read from and
                   1788:                 * how much we can read in one i/o.  if bmap returns an error,
                   1789:                 * skip the rest of the top-level i/o.
                   1790:                 */
                   1791:
1.1       pooka    1792:                lbn = offset >> fs_bshift;
                   1793:                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
                   1794:                if (error) {
1.36      uebayasi 1795:                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
                   1796:                            lbn,error,0,0);
1.1       pooka    1797:                        skipbytes += bytes;
                   1798:                        bytes = 0;
1.36      uebayasi 1799:                        goto loopdone;
1.1       pooka    1800:                }
                   1801:
1.36      uebayasi 1802:                /*
                   1803:                 * see how many pages can be read with this i/o.
                   1804:                 * reduce the i/o size if necessary to avoid
                   1805:                 * overwriting pages with valid data.
                   1806:                 */
                   1807:
1.1       pooka    1808:                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                   1809:                    bytes);
1.36      uebayasi 1810:
                   1811:                /*
                   1812:                 * if this block isn't allocated, zero it instead of
                   1813:                 * reading it.  unless we are going to allocate blocks,
                   1814:                 * mark the pages we zeroed PG_RDONLY.
                   1815:                 */
                   1816:
1.1       pooka    1817:                if (blkno == (daddr_t)-1) {
1.35      uebayasi 1818:                        if (!iowrite) {
1.1       pooka    1819:                                memset((char *)kva + (offset - startoffset), 0,
1.36      uebayasi 1820:                                    iobytes);
1.1       pooka    1821:                        }
                   1822:                        skipbytes += iobytes;
                   1823:                        continue;
                   1824:                }
                   1825:
1.36      uebayasi 1826:                /*
                   1827:                 * allocate a sub-buf for this piece of the i/o
                   1828:                 * (or just use mbp if there's only 1 piece),
                   1829:                 * and start it going.
                   1830:                 */
                   1831:
1.1       pooka    1832:                if (offset == startoffset && iobytes == bytes) {
                   1833:                        bp = mbp;
                   1834:                } else {
                   1835:                        UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
                   1836:                            vp, bp, vp->v_numoutput, 0);
1.2       ad       1837:                        bp = getiobuf(vp, true);
1.1       pooka    1838:                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
                   1839:                }
                   1840:                bp->b_lblkno = 0;
                   1841:
                   1842:                /* adjust physical blkno for partial blocks */
                   1843:                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
                   1844:                    dev_bshift);
1.36      uebayasi 1845:
1.1       pooka    1846:                UVMHIST_LOG(ubchist,
1.36      uebayasi 1847:                    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
                   1848:                    bp, offset, bp->b_bcount, bp->b_blkno);
1.1       pooka    1849:
                   1850:                VOP_STRATEGY(devvp, bp);
                   1851:        }
1.36      uebayasi 1852:
                   1853: loopdone:
1.1       pooka    1854:        if (skipbytes) {
                   1855:                UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
                   1856:        }
                   1857:        nestiobuf_done(mbp, skipbytes, error);
                   1858:        if (async) {
                   1859:                UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
                   1860:                return (0);
                   1861:        }
                   1862:        UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
                   1863:        error = biowait(mbp);
                   1864:        s = splbio();
                   1865:        (*iodone)(mbp);
                   1866:        splx(s);
                   1867:        UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
                   1868:        return (error);
                   1869: }
                   1870:
                   1871: int
                   1872: genfs_compat_getpages(void *v)
                   1873: {
                   1874:        struct vop_getpages_args /* {
                   1875:                struct vnode *a_vp;
                   1876:                voff_t a_offset;
                   1877:                struct vm_page **a_m;
                   1878:                int *a_count;
                   1879:                int a_centeridx;
                   1880:                vm_prot_t a_access_type;
                   1881:                int a_advice;
                   1882:                int a_flags;
                   1883:        } */ *ap = v;
                   1884:
                   1885:        off_t origoffset;
                   1886:        struct vnode *vp = ap->a_vp;
                   1887:        struct uvm_object *uobj = &vp->v_uobj;
                   1888:        struct vm_page *pg, **pgs;
                   1889:        vaddr_t kva;
                   1890:        int i, error, orignpages, npages;
                   1891:        struct iovec iov;
                   1892:        struct uio uio;
                   1893:        kauth_cred_t cred = curlwp->l_cred;
1.35      uebayasi 1894:        const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.1       pooka    1895:
                   1896:        error = 0;
                   1897:        origoffset = ap->a_offset;
                   1898:        orignpages = *ap->a_count;
                   1899:        pgs = ap->a_m;
                   1900:
                   1901:        if (ap->a_flags & PGO_LOCKED) {
                   1902:                uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1.35      uebayasi 1903:                    UFP_NOWAIT|UFP_NOALLOC| (memwrite ? UFP_NORDONLY : 0));
1.1       pooka    1904:
1.36.2.21  uebayasi 1905:                error = ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
                   1906:                if (error == 0 && memwrite) {
                   1907:                        genfs_markdirty(vp);
                   1908:                }
                   1909:                return error;
1.1       pooka    1910:        }
                   1911:        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
1.2       ad       1912:                mutex_exit(&uobj->vmobjlock);
1.36.2.21  uebayasi 1913:                return EINVAL;
1.1       pooka    1914:        }
                   1915:        if ((ap->a_flags & PGO_SYNCIO) == 0) {
1.2       ad       1916:                mutex_exit(&uobj->vmobjlock);
1.1       pooka    1917:                return 0;
                   1918:        }
                   1919:        npages = orignpages;
                   1920:        uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
1.2       ad       1921:        mutex_exit(&uobj->vmobjlock);
1.1       pooka    1922:        kva = uvm_pagermapin(pgs, npages,
                   1923:            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
                   1924:        for (i = 0; i < npages; i++) {
                   1925:                pg = pgs[i];
                   1926:                if ((pg->flags & PG_FAKE) == 0) {
                   1927:                        continue;
                   1928:                }
                   1929:                iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
                   1930:                iov.iov_len = PAGE_SIZE;
                   1931:                uio.uio_iov = &iov;
                   1932:                uio.uio_iovcnt = 1;
                   1933:                uio.uio_offset = origoffset + (i << PAGE_SHIFT);
                   1934:                uio.uio_rw = UIO_READ;
                   1935:                uio.uio_resid = PAGE_SIZE;
                   1936:                UIO_SETUP_SYSSPACE(&uio);
                   1937:                /* XXX vn_lock */
                   1938:                error = VOP_READ(vp, &uio, 0, cred);
                   1939:                if (error) {
                   1940:                        break;
                   1941:                }
                   1942:                if (uio.uio_resid) {
                   1943:                        memset(iov.iov_base, 0, uio.uio_resid);
                   1944:                }
                   1945:        }
                   1946:        uvm_pagermapout(kva, npages);
1.2       ad       1947:        mutex_enter(&uobj->vmobjlock);
                   1948:        mutex_enter(&uvm_pageqlock);
1.1       pooka    1949:        for (i = 0; i < npages; i++) {
                   1950:                pg = pgs[i];
                   1951:                if (error && (pg->flags & PG_FAKE) != 0) {
                   1952:                        pg->flags |= PG_RELEASED;
                   1953:                } else {
                   1954:                        pmap_clear_modify(pg);
                   1955:                        uvm_pageactivate(pg);
                   1956:                }
                   1957:        }
                   1958:        if (error) {
                   1959:                uvm_page_unbusy(pgs, npages);
                   1960:        }
1.2       ad       1961:        mutex_exit(&uvm_pageqlock);
1.36.2.21  uebayasi 1962:        if (error == 0 && memwrite) {
                   1963:                genfs_markdirty(vp);
                   1964:        }
1.2       ad       1965:        mutex_exit(&uobj->vmobjlock);
1.36.2.21  uebayasi 1966:        return error;
1.1       pooka    1967: }
                   1968:
                   1969: int
                   1970: genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
                   1971:     int flags)
                   1972: {
                   1973:        off_t offset;
                   1974:        struct iovec iov;
                   1975:        struct uio uio;
                   1976:        kauth_cred_t cred = curlwp->l_cred;
                   1977:        struct buf *bp;
                   1978:        vaddr_t kva;
1.2       ad       1979:        int error;
1.1       pooka    1980:
                   1981:        offset = pgs[0]->offset;
                   1982:        kva = uvm_pagermapin(pgs, npages,
                   1983:            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
                   1984:
                   1985:        iov.iov_base = (void *)kva;
                   1986:        iov.iov_len = npages << PAGE_SHIFT;
                   1987:        uio.uio_iov = &iov;
                   1988:        uio.uio_iovcnt = 1;
                   1989:        uio.uio_offset = offset;
                   1990:        uio.uio_rw = UIO_WRITE;
                   1991:        uio.uio_resid = npages << PAGE_SHIFT;
                   1992:        UIO_SETUP_SYSSPACE(&uio);
                   1993:        /* XXX vn_lock */
                   1994:        error = VOP_WRITE(vp, &uio, 0, cred);
                   1995:
1.2       ad       1996:        mutex_enter(&vp->v_interlock);
                   1997:        vp->v_numoutput++;
                   1998:        mutex_exit(&vp->v_interlock);
1.1       pooka    1999:
1.2       ad       2000:        bp = getiobuf(vp, true);
                   2001:        bp->b_cflags = BC_BUSY | BC_AGE;
1.1       pooka    2002:        bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
                   2003:        bp->b_data = (char *)kva;
                   2004:        bp->b_bcount = npages << PAGE_SHIFT;
                   2005:        bp->b_bufsize = npages << PAGE_SHIFT;
                   2006:        bp->b_resid = 0;
                   2007:        bp->b_error = error;
                   2008:        uvm_aio_aiodone(bp);
                   2009:        return (error);
                   2010: }
                   2011:
                   2012: /*
                   2013:  * Process a uio using direct I/O.  If we reach a part of the request
                   2014:  * which cannot be processed in this fashion for some reason, just return.
                   2015:  * The caller must handle some additional part of the request using
                   2016:  * buffered I/O before trying direct I/O again.
                   2017:  */
                   2018:
                   2019: void
                   2020: genfs_directio(struct vnode *vp, struct uio *uio, int ioflag)
                   2021: {
                   2022:        struct vmspace *vs;
                   2023:        struct iovec *iov;
                   2024:        vaddr_t va;
                   2025:        size_t len;
                   2026:        const int mask = DEV_BSIZE - 1;
                   2027:        int error;
1.16      joerg    2028:        bool need_wapbl = (vp->v_mount && vp->v_mount->mnt_wapbl &&
                   2029:            (ioflag & IO_JOURNALLOCKED) == 0);
1.1       pooka    2030:
                   2031:        /*
                   2032:         * We only support direct I/O to user space for now.
                   2033:         */
                   2034:
                   2035:        if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
                   2036:                return;
                   2037:        }
                   2038:
                   2039:        /*
                   2040:         * If the vnode is mapped, we would need to get the getpages lock
                   2041:         * to stabilize the bmap, but then we would get into trouble whil e
                   2042:         * locking the pages if the pages belong to this same vnode (or a
                   2043:         * multi-vnode cascade to the same effect).  Just fall back to
                   2044:         * buffered I/O if the vnode is mapped to avoid this mess.
                   2045:         */
                   2046:
                   2047:        if (vp->v_vflag & VV_MAPPED) {
                   2048:                return;
                   2049:        }
                   2050:
1.16      joerg    2051:        if (need_wapbl) {
1.13      hannken  2052:                error = WAPBL_BEGIN(vp->v_mount);
                   2053:                if (error)
                   2054:                        return;
                   2055:        }
                   2056:
1.1       pooka    2057:        /*
                   2058:         * Do as much of the uio as possible with direct I/O.
                   2059:         */
                   2060:
                   2061:        vs = uio->uio_vmspace;
                   2062:        while (uio->uio_resid) {
                   2063:                iov = uio->uio_iov;
                   2064:                if (iov->iov_len == 0) {
                   2065:                        uio->uio_iov++;
                   2066:                        uio->uio_iovcnt--;
                   2067:                        continue;
                   2068:                }
                   2069:                va = (vaddr_t)iov->iov_base;
                   2070:                len = MIN(iov->iov_len, genfs_maxdio);
                   2071:                len &= ~mask;
                   2072:
                   2073:                /*
                   2074:                 * If the next chunk is smaller than DEV_BSIZE or extends past
                   2075:                 * the current EOF, then fall back to buffered I/O.
                   2076:                 */
                   2077:
                   2078:                if (len == 0 || uio->uio_offset + len > vp->v_size) {
1.13      hannken  2079:                        break;
1.1       pooka    2080:                }
                   2081:
                   2082:                /*
                   2083:                 * Check alignment.  The file offset must be at least
                   2084:                 * sector-aligned.  The exact constraint on memory alignment
                   2085:                 * is very hardware-dependent, but requiring sector-aligned
                   2086:                 * addresses there too is safe.
                   2087:                 */
                   2088:
                   2089:                if (uio->uio_offset & mask || va & mask) {
1.13      hannken  2090:                        break;
1.1       pooka    2091:                }
                   2092:                error = genfs_do_directio(vs, va, len, vp, uio->uio_offset,
                   2093:                                          uio->uio_rw);
                   2094:                if (error) {
                   2095:                        break;
                   2096:                }
                   2097:                iov->iov_base = (char *)iov->iov_base + len;
                   2098:                iov->iov_len -= len;
                   2099:                uio->uio_offset += len;
                   2100:                uio->uio_resid -= len;
                   2101:        }
1.13      hannken  2102:
1.16      joerg    2103:        if (need_wapbl)
1.13      hannken  2104:                WAPBL_END(vp->v_mount);
1.1       pooka    2105: }
                   2106:
                   2107: /*
                   2108:  * Iodone routine for direct I/O.  We don't do much here since the request is
                   2109:  * always synchronous, so the caller will do most of the work after biowait().
                   2110:  */
                   2111:
                   2112: static void
                   2113: genfs_dio_iodone(struct buf *bp)
                   2114: {
                   2115:
                   2116:        KASSERT((bp->b_flags & B_ASYNC) == 0);
1.2       ad       2117:        if ((bp->b_flags & B_READ) == 0 && (bp->b_cflags & BC_AGE) != 0) {
                   2118:                mutex_enter(bp->b_objlock);
1.1       pooka    2119:                vwakeup(bp);
1.2       ad       2120:                mutex_exit(bp->b_objlock);
1.1       pooka    2121:        }
                   2122:        putiobuf(bp);
                   2123: }
                   2124:
                   2125: /*
                   2126:  * Process one chunk of a direct I/O request.
                   2127:  */
                   2128:
                   2129: static int
                   2130: genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
                   2131:     off_t off, enum uio_rw rw)
                   2132: {
                   2133:        struct vm_map *map;
                   2134:        struct pmap *upm, *kpm;
                   2135:        size_t klen = round_page(uva + len) - trunc_page(uva);
                   2136:        off_t spoff, epoff;
                   2137:        vaddr_t kva, puva;
                   2138:        paddr_t pa;
                   2139:        vm_prot_t prot;
                   2140:        int error, rv, poff, koff;
1.13      hannken  2141:        const int pgoflags = PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED |
1.1       pooka    2142:                (rw == UIO_WRITE ? PGO_FREE : 0);
                   2143:
                   2144:        /*
                   2145:         * For writes, verify that this range of the file already has fully
                   2146:         * allocated backing store.  If there are any holes, just punt and
                   2147:         * make the caller take the buffered write path.
                   2148:         */
                   2149:
                   2150:        if (rw == UIO_WRITE) {
                   2151:                daddr_t lbn, elbn, blkno;
                   2152:                int bsize, bshift, run;
                   2153:
                   2154:                bshift = vp->v_mount->mnt_fs_bshift;
                   2155:                bsize = 1 << bshift;
                   2156:                lbn = off >> bshift;
                   2157:                elbn = (off + len + bsize - 1) >> bshift;
                   2158:                while (lbn < elbn) {
                   2159:                        error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
                   2160:                        if (error) {
                   2161:                                return error;
                   2162:                        }
                   2163:                        if (blkno == (daddr_t)-1) {
                   2164:                                return ENOSPC;
                   2165:                        }
                   2166:                        lbn += 1 + run;
                   2167:                }
                   2168:        }
                   2169:
                   2170:        /*
                   2171:         * Flush any cached pages for parts of the file that we're about to
                   2172:         * access.  If we're writing, invalidate pages as well.
                   2173:         */
                   2174:
                   2175:        spoff = trunc_page(off);
                   2176:        epoff = round_page(off + len);
1.2       ad       2177:        mutex_enter(&vp->v_interlock);
1.1       pooka    2178:        error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags);
                   2179:        if (error) {
                   2180:                return error;
                   2181:        }
                   2182:
                   2183:        /*
                   2184:         * Wire the user pages and remap them into kernel memory.
                   2185:         */
                   2186:
                   2187:        prot = rw == UIO_READ ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
                   2188:        error = uvm_vslock(vs, (void *)uva, len, prot);
                   2189:        if (error) {
                   2190:                return error;
                   2191:        }
                   2192:
                   2193:        map = &vs->vm_map;
                   2194:        upm = vm_map_pmap(map);
                   2195:        kpm = vm_map_pmap(kernel_map);
                   2196:        kva = uvm_km_alloc(kernel_map, klen, 0,
                   2197:                           UVM_KMF_VAONLY | UVM_KMF_WAITVA);
                   2198:        puva = trunc_page(uva);
                   2199:        for (poff = 0; poff < klen; poff += PAGE_SIZE) {
                   2200:                rv = pmap_extract(upm, puva + poff, &pa);
                   2201:                KASSERT(rv);
                   2202:                pmap_enter(kpm, kva + poff, pa, prot, prot | PMAP_WIRED);
                   2203:        }
                   2204:        pmap_update(kpm);
                   2205:
                   2206:        /*
                   2207:         * Do the I/O.
                   2208:         */
                   2209:
                   2210:        koff = uva - trunc_page(uva);
                   2211:        error = genfs_do_io(vp, off, kva + koff, len, PGO_SYNCIO, rw,
                   2212:                            genfs_dio_iodone);
                   2213:
                   2214:        /*
                   2215:         * Tear down the kernel mapping.
                   2216:         */
                   2217:
                   2218:        pmap_remove(kpm, kva, kva + klen);
                   2219:        pmap_update(kpm);
                   2220:        uvm_km_free(kernel_map, kva, klen, UVM_KMF_VAONLY);
                   2221:
                   2222:        /*
                   2223:         * Unwire the user pages.
                   2224:         */
                   2225:
                   2226:        uvm_vsunlock(vs, (void *)uva, len);
                   2227:        return error;
                   2228: }
1.2       ad       2229:

CVSweb <webmaster@jp.NetBSD.org>