Annotation of src/sys/miscfs/genfs/genfs_io.c, Revision 1.36.2.46
1.36.2.31 uebayasi 1: /* $NetBSD$ */
1.1 pooka 2:
3: /*
4: * Copyright (c) 1982, 1986, 1989, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: * 3. Neither the name of the University nor the names of its contributors
16: * may be used to endorse or promote products derived from this software
17: * without specific prior written permission.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29: * SUCH DAMAGE.
30: *
31: */
32:
33: #include <sys/cdefs.h>
1.36.2.31 uebayasi 34: __KERNEL_RCSID(0, "$NetBSD$");
1.1 pooka 35:
1.36.2.1 uebayasi 36: #include "opt_xip.h"
37:
1.1 pooka 38: #include <sys/param.h>
39: #include <sys/systm.h>
40: #include <sys/proc.h>
41: #include <sys/kernel.h>
42: #include <sys/mount.h>
43: #include <sys/namei.h>
44: #include <sys/vnode.h>
45: #include <sys/fcntl.h>
46: #include <sys/kmem.h>
47: #include <sys/poll.h>
48: #include <sys/mman.h>
49: #include <sys/file.h>
50: #include <sys/kauth.h>
51: #include <sys/fstrans.h>
1.15 pooka 52: #include <sys/buf.h>
1.36.2.11 uebayasi 53: #include <sys/once.h>
1.1 pooka 54:
55: #include <miscfs/genfs/genfs.h>
56: #include <miscfs/genfs/genfs_node.h>
57: #include <miscfs/specfs/specdev.h>
58:
59: #include <uvm/uvm.h>
60: #include <uvm/uvm_pager.h>
61:
1.36.2.4 uebayasi 62: #ifdef XIP
1.36.2.35 uebayasi 63: static int genfs_do_getpages_xip_io(struct vnode *, voff_t, struct vm_page **,
1.36.2.42 uebayasi 64: int *, int, vm_prot_t, int, int, const int);
1.36.2.44 uebayasi 65: static int genfs_do_getpages_xip_io_done(struct vnode *, voff_t, struct vm_page **,
66: int *, int, vm_prot_t, int, int, const int);
1.36.2.24 uebayasi 67: static int genfs_do_putpages_xip(struct vnode *, off_t, off_t, int,
68: struct vm_page **);
1.36.2.4 uebayasi 69: #endif
1.1 pooka 70: static int genfs_do_directio(struct vmspace *, vaddr_t, size_t, struct vnode *,
71: off_t, enum uio_rw);
72: static void genfs_dio_iodone(struct buf *);
73:
74: static int genfs_do_io(struct vnode *, off_t, vaddr_t, size_t, int, enum uio_rw,
75: void (*)(struct buf *));
1.36.2.21 uebayasi 76: static void genfs_rel_pages(struct vm_page **, int);
77: static void genfs_markdirty(struct vnode *);
1.1 pooka 78:
79: int genfs_maxdio = MAXPHYS;
80:
1.36.2.21 uebayasi 81: static void
1.1 pooka 82: genfs_rel_pages(struct vm_page **pgs, int npages)
83: {
84: int i;
85:
86: for (i = 0; i < npages; i++) {
87: struct vm_page *pg = pgs[i];
88:
89: if (pg == NULL || pg == PGO_DONTCARE)
90: continue;
91: if (pg->flags & PG_FAKE) {
92: pg->flags |= PG_RELEASED;
93: }
94: }
1.2 ad 95: mutex_enter(&uvm_pageqlock);
1.1 pooka 96: uvm_page_unbusy(pgs, npages);
1.2 ad 97: mutex_exit(&uvm_pageqlock);
1.1 pooka 98: }
99:
1.36.2.21 uebayasi 100: static void
101: genfs_markdirty(struct vnode *vp)
102: {
103: struct genfs_node * const gp = VTOG(vp);
104:
105: KASSERT(mutex_owned(&vp->v_interlock));
106: gp->g_dirtygen++;
107: if ((vp->v_iflag & VI_ONWORKLST) == 0) {
108: vn_syncer_add_to_worklist(vp, filedelay);
109: }
110: if ((vp->v_iflag & (VI_WRMAP|VI_WRMAPDIRTY)) == VI_WRMAP) {
111: vp->v_iflag |= VI_WRMAPDIRTY;
112: }
113: }
114:
1.1 pooka 115: /*
116: * generic VM getpages routine.
117: * Return PG_BUSY pages for the given range,
118: * reading from backing store if necessary.
119: */
120:
121: int
122: genfs_getpages(void *v)
123: {
1.36.2.1 uebayasi 124: struct vop_getpages_args /* {
125: struct vnode *a_vp;
126: voff_t a_offset;
127: struct vm_page **a_m;
128: int *a_count;
129: int a_centeridx;
130: vm_prot_t a_access_type;
131: int a_advice;
132: int a_flags;
133: } */ * const ap = v;
1.1 pooka 134:
1.24 uebayasi 135: off_t diskeof, memeof;
1.31 uebayasi 136: int i, error, npages;
1.10 yamt 137: const int flags = ap->a_flags;
1.22 uebayasi 138: struct vnode * const vp = ap->a_vp;
139: struct uvm_object * const uobj = &vp->v_uobj;
1.31 uebayasi 140: kauth_cred_t const cred = curlwp->l_cred; /* XXXUBC curlwp */
1.10 yamt 141: const bool async = (flags & PGO_SYNCIO) == 0;
1.35 uebayasi 142: const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.1 pooka 143: bool has_trans = false;
1.10 yamt 144: const bool overwrite = (flags & PGO_OVERWRITE) != 0;
1.35 uebayasi 145: const bool blockalloc = memwrite && (flags & PGO_NOBLOCKALLOC) == 0;
1.36.2.27 uebayasi 146: const bool glocked = (flags & PGO_GLOCKHELD) != 0;
1.1 pooka 147: UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
148:
149: UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
150: vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
151:
152: KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
153: vp->v_type == VLNK || vp->v_type == VBLK);
154:
155: startover:
156: error = 0;
1.27 uebayasi 157: const voff_t origvsize = vp->v_size;
158: const off_t origoffset = ap->a_offset;
1.29 uebayasi 159: const int orignpages = *ap->a_count;
1.33 uebayasi 160:
1.1 pooka 161: GOP_SIZE(vp, origvsize, &diskeof, 0);
162: if (flags & PGO_PASTEOF) {
1.24 uebayasi 163: off_t newsize;
1.1 pooka 164: #if defined(DIAGNOSTIC)
165: off_t writeeof;
166: #endif /* defined(DIAGNOSTIC) */
167:
168: newsize = MAX(origvsize,
169: origoffset + (orignpages << PAGE_SHIFT));
170: GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM);
171: #if defined(DIAGNOSTIC)
172: GOP_SIZE(vp, vp->v_writesize, &writeeof, GOP_SIZE_MEM);
173: if (newsize > round_page(writeeof)) {
1.36.2.27 uebayasi 174: panic("%s: past eof: %" PRId64 " vs. %" PRId64,
175: __func__, newsize, round_page(writeeof));
1.1 pooka 176: }
177: #endif /* defined(DIAGNOSTIC) */
178: } else {
179: GOP_SIZE(vp, origvsize, &memeof, GOP_SIZE_MEM);
180: }
181: KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
182: KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
183: KASSERT(orignpages > 0);
184:
185: /*
186: * Bounds-check the request.
187: */
188:
189: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
190: if ((flags & PGO_LOCKED) == 0) {
1.2 ad 191: mutex_exit(&uobj->vmobjlock);
1.1 pooka 192: }
193: UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
194: origoffset, *ap->a_count, memeof,0);
195: error = EINVAL;
196: goto out_err;
197: }
198:
199: /* uobj is locked */
200:
201: if ((flags & PGO_NOTIMESTAMP) == 0 &&
202: (vp->v_type != VBLK ||
203: (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
204: int updflags = 0;
205:
206: if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
207: updflags = GOP_UPDATE_ACCESSED;
208: }
1.35 uebayasi 209: if (memwrite) {
1.1 pooka 210: updflags |= GOP_UPDATE_MODIFIED;
211: }
212: if (updflags != 0) {
213: GOP_MARKUPDATE(vp, updflags);
214: }
215: }
216:
217: /*
218: * For PGO_LOCKED requests, just return whatever's in memory.
219: */
220:
221: if (flags & PGO_LOCKED) {
1.36.2.39 uebayasi 222: #if 0
1.36.2.46! uebayasi 223: genfs_getpages_mem();
1.36.2.39 uebayasi 224: } else {
1.36.2.46! uebayasi 225: genfs_getpages_io();
1.36.2.39 uebayasi 226: }
227: }
228:
229: int
1.36.2.46! uebayasi 230: genfs_getpages_mem()
1.36.2.39 uebayasi 231: {
232: #endif
1.1 pooka 233: int nfound;
1.31 uebayasi 234: struct vm_page *pg;
1.1 pooka 235:
1.36.2.46! uebayasi 236: #ifdef XIP
1.36.2.39 uebayasi 237: if ((ap->a_vp->v_vflag & VV_XIP) != 0) {
238: *ap->a_count = 0;
239: return 0;
240: }
241: #endif
242:
1.36.2.27 uebayasi 243: KASSERT(!glocked);
1.1 pooka 244: npages = *ap->a_count;
245: #if defined(DEBUG)
246: for (i = 0; i < npages; i++) {
247: pg = ap->a_m[i];
248: KASSERT(pg == NULL || pg == PGO_DONTCARE);
249: }
250: #endif /* defined(DEBUG) */
251: nfound = uvn_findpages(uobj, origoffset, &npages,
1.35 uebayasi 252: ap->a_m, UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0));
1.1 pooka 253: KASSERT(npages == *ap->a_count);
254: if (nfound == 0) {
255: error = EBUSY;
256: goto out_err;
257: }
1.23 uebayasi 258: if (!genfs_node_rdtrylock(vp)) {
1.1 pooka 259: genfs_rel_pages(ap->a_m, npages);
260:
261: /*
262: * restore the array.
263: */
264:
265: for (i = 0; i < npages; i++) {
266: pg = ap->a_m[i];
267:
1.36.2.30 uebayasi 268: if (pg != NULL && pg != PGO_DONTCARE) {
1.1 pooka 269: ap->a_m[i] = NULL;
270: }
1.36.2.30 uebayasi 271: KASSERT(pg == NULL || pg == PGO_DONTCARE);
1.1 pooka 272: }
273: } else {
1.23 uebayasi 274: genfs_node_unlock(vp);
1.1 pooka 275: }
276: error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1.36.2.21 uebayasi 277: if (error == 0 && memwrite) {
278: genfs_markdirty(vp);
279: }
1.1 pooka 280: goto out_err;
281: }
1.2 ad 282: mutex_exit(&uobj->vmobjlock);
1.36.2.39 uebayasi 283: #if 0
284: }
285:
286: int
1.36.2.46! uebayasi 287: genfs_getpages_io()
1.36.2.39 uebayasi 288: {
289: #endif
1.1 pooka 290: /*
291: * find the requested pages and make some simple checks.
292: * leave space in the page array for a whole block.
293: */
294:
1.36.2.42 uebayasi 295: #define vp2fs_bshift(vp) \
296: (((vp)->v_type != VBLK) ? (vp)->v_mount->mnt_fs_bshift : DEV_BSHIFT)
297: #define vp2dev_bshift(vp) \
298: (((vp)->v_type != VBLK) ? (vp)->v_mount->mnt_dev_bshift : DEV_BSHIFT)
299:
300: const int fs_bshift = vp2fs_bshift(vp);
301: const int dev_bshift = vp2dev_bshift(vp);
1.27 uebayasi 302: const int fs_bsize = 1 << fs_bshift;
1.30 uebayasi 303: #define blk_mask (fs_bsize - 1)
304: #define trunc_blk(x) ((x) & ~blk_mask)
305: #define round_blk(x) (((x) + blk_mask) & ~blk_mask)
1.1 pooka 306:
1.29 uebayasi 307: const int orignmempages = MIN(orignpages,
1.1 pooka 308: round_page(memeof - origoffset) >> PAGE_SHIFT);
1.29 uebayasi 309: npages = orignmempages;
1.30 uebayasi 310: const off_t startoffset = trunc_blk(origoffset);
311: const off_t endoffset = MIN(
312: round_page(round_blk(origoffset + (npages << PAGE_SHIFT))),
313: round_page(memeof));
1.31 uebayasi 314: const int ridx = (origoffset - startoffset) >> PAGE_SHIFT;
1.1 pooka 315:
1.33 uebayasi 316: const int pgs_size = sizeof(struct vm_page *) *
1.1 pooka 317: ((endoffset - startoffset) >> PAGE_SHIFT);
1.33 uebayasi 318: struct vm_page **pgs, *pgs_onstack[UBC_MAX_PAGES];
1.31 uebayasi 319:
1.1 pooka 320: if (pgs_size > sizeof(pgs_onstack)) {
321: pgs = kmem_zalloc(pgs_size, async ? KM_NOSLEEP : KM_SLEEP);
322: if (pgs == NULL) {
323: pgs = pgs_onstack;
324: error = ENOMEM;
1.32 uebayasi 325: goto out_err;
1.1 pooka 326: }
327: } else {
1.14 christos 328: pgs = pgs_onstack;
329: (void)memset(pgs, 0, pgs_size);
1.1 pooka 330: }
1.14 christos 331:
1.1 pooka 332: UVMHIST_LOG(ubchist, "ridx %d npages %d startoff %ld endoff %ld",
333: ridx, npages, startoffset, endoffset);
1.36.2.46! uebayasi 334: #if 0
! 335: }
1.1 pooka 336:
1.36.2.46! uebayasi 337: int
! 338: genfs_getpages_io_relock()
! 339: {
! 340: #endif
1.1 pooka 341: if (!has_trans) {
342: fstrans_start(vp->v_mount, FSTRANS_SHARED);
343: has_trans = true;
344: }
345:
346: /*
347: * hold g_glock to prevent a race with truncate.
348: *
349: * check if our idea of v_size is still valid.
350: */
351:
1.36.2.27 uebayasi 352: KASSERT(!glocked || genfs_node_wrlocked(vp));
353: if (!glocked) {
354: if (blockalloc) {
355: genfs_node_wrlock(vp);
356: } else {
357: genfs_node_rdlock(vp);
358: }
1.1 pooka 359: }
1.2 ad 360: mutex_enter(&uobj->vmobjlock);
1.1 pooka 361: if (vp->v_size < origvsize) {
1.36.2.27 uebayasi 362: if (!glocked) {
363: genfs_node_unlock(vp);
364: }
1.1 pooka 365: if (pgs != pgs_onstack)
366: kmem_free(pgs, pgs_size);
367: goto startover;
368: }
1.36.2.46! uebayasi 369: #if 0
! 370: }
1.1 pooka 371:
1.36.2.46! uebayasi 372: int
! 373: genfs_getpages_io_findpages()
! 374: {
! 375: #endif
! 376: #ifdef XIP
1.36.2.43 uebayasi 377: if ((ap->a_vp->v_vflag & VV_XIP) != 0)
1.36.2.46! uebayasi 378: goto genfs_getpages_allocpages_done;
1.36.2.43 uebayasi 379: #endif
380:
1.1 pooka 381: if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx],
1.29 uebayasi 382: async ? UFP_NOWAIT : UFP_ALL) != orignmempages) {
1.36.2.27 uebayasi 383: if (!glocked) {
384: genfs_node_unlock(vp);
385: }
1.1 pooka 386: KASSERT(async != 0);
1.29 uebayasi 387: genfs_rel_pages(&pgs[ridx], orignmempages);
1.2 ad 388: mutex_exit(&uobj->vmobjlock);
1.1 pooka 389: error = EBUSY;
1.33 uebayasi 390: goto out_err_free;
1.1 pooka 391: }
392:
393: /*
394: * if the pages are already resident, just return them.
395: */
396:
397: for (i = 0; i < npages; i++) {
1.31 uebayasi 398: struct vm_page *pg = pgs[ridx + i];
1.1 pooka 399:
1.31 uebayasi 400: if ((pg->flags & PG_FAKE) ||
401: (blockalloc && (pg->flags & PG_RDONLY))) {
1.1 pooka 402: break;
403: }
404: }
405: if (i == npages) {
1.36.2.27 uebayasi 406: if (!glocked) {
407: genfs_node_unlock(vp);
408: }
1.1 pooka 409: UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
410: npages += ridx;
411: goto out;
412: }
413:
414: /*
415: * if PGO_OVERWRITE is set, don't bother reading the pages.
416: */
417:
418: if (overwrite) {
1.36.2.39 uebayasi 419: #if 0
1.36.2.46! uebayasi 420: genfs_getpages_io_overwrite();
1.36.2.39 uebayasi 421: } else {
1.36.2.46! uebayasi 422: genfs_getpages_io_read();
1.36.2.39 uebayasi 423: }
424: }
425:
426: int
1.36.2.46! uebayasi 427: genfs_getpages_io_overwrite()
1.36.2.39 uebayasi 428: {
429: {
430: #endif
1.36.2.27 uebayasi 431: if (!glocked) {
432: genfs_node_unlock(vp);
433: }
1.1 pooka 434: UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
435:
436: for (i = 0; i < npages; i++) {
1.31 uebayasi 437: struct vm_page *pg = pgs[ridx + i];
1.1 pooka 438:
1.31 uebayasi 439: pg->flags &= ~(PG_RDONLY|PG_CLEAN);
1.1 pooka 440: }
441: npages += ridx;
442: goto out;
443: }
1.36.2.39 uebayasi 444: #if 0
445: }
1.1 pooka 446:
1.36.2.39 uebayasi 447: int
1.36.2.46! uebayasi 448: genfs_getpages_io_read()
1.36.2.39 uebayasi 449: {
450: #endif
1.1 pooka 451: /*
452: * the page wasn't resident and we're not overwriting,
453: * so we're going to have to do some i/o.
454: * find any additional pages needed to cover the expanded range.
455: */
1.36.2.46! uebayasi 456: #if 0
! 457: }
1.1 pooka 458:
1.36.2.46! uebayasi 459: int
! 460: genfs_getpages_io_read_allocpages()
! 461: {
! 462: #endif
1.1 pooka 463: npages = (endoffset - startoffset) >> PAGE_SHIFT;
1.29 uebayasi 464: if (startoffset != origoffset || npages != orignmempages) {
1.31 uebayasi 465: int npgs;
1.1 pooka 466:
467: /*
468: * we need to avoid deadlocks caused by locking
469: * additional pages at lower offsets than pages we
470: * already have locked. unlock them all and start over.
471: */
472:
1.29 uebayasi 473: genfs_rel_pages(&pgs[ridx], orignmempages);
1.1 pooka 474: memset(pgs, 0, pgs_size);
475:
476: UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
477: startoffset, endoffset, 0,0);
478: npgs = npages;
479: if (uvn_findpages(uobj, startoffset, &npgs, pgs,
480: async ? UFP_NOWAIT : UFP_ALL) != npages) {
1.36.2.27 uebayasi 481: if (!glocked) {
482: genfs_node_unlock(vp);
483: }
1.1 pooka 484: KASSERT(async != 0);
485: genfs_rel_pages(pgs, npages);
1.2 ad 486: mutex_exit(&uobj->vmobjlock);
1.1 pooka 487: error = EBUSY;
1.33 uebayasi 488: goto out_err_free;
1.1 pooka 489: }
490: }
1.36.2.46! uebayasi 491: #ifdef XIP
! 492: genfs_getpages_io_read_allocpages_done:
1.36.2.43 uebayasi 493: #endif
1.36.2.46! uebayasi 494: #if 0
! 495: }
1.36.2.43 uebayasi 496:
1.36.2.46! uebayasi 497: int
! 498: genfs_getpages_io_read_bio()
! 499: {
! 500: #endif
1.2 ad 501: mutex_exit(&uobj->vmobjlock);
1.1 pooka 502:
1.34 uebayasi 503: {
504: size_t bytes, iobytes, tailstart, tailbytes, totalbytes, skipbytes;
1.36.2.44 uebayasi 505: vaddr_t kva = 0;
506: struct buf *bp = NULL, *mbp = NULL;
1.34 uebayasi 507: bool sawhole = false;
508:
1.1 pooka 509: /*
510: * read the desired page(s).
511: */
512:
513: totalbytes = npages << PAGE_SHIFT;
514: bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
515: tailbytes = totalbytes - bytes;
516: skipbytes = 0;
517:
1.36.2.44 uebayasi 518: #if 1
519: if ((ap->a_vp->v_vflag & VV_XIP) != 0)
520: goto genfs_getpages_bio_prepare_done;
521: #endif
1.36.2.46! uebayasi 522: #if 0
! 523: }
1.36.2.44 uebayasi 524:
1.36.2.46! uebayasi 525: int
! 526: genfs_getpages_io_read_bio_prepare()
! 527: {
! 528: #endif
1.1 pooka 529: kva = uvm_pagermapin(pgs, npages,
530: UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
531:
1.2 ad 532: mbp = getiobuf(vp, true);
1.1 pooka 533: mbp->b_bufsize = totalbytes;
534: mbp->b_data = (void *)kva;
535: mbp->b_resid = mbp->b_bcount = bytes;
1.2 ad 536: mbp->b_cflags = BC_BUSY;
537: if (async) {
538: mbp->b_flags = B_READ | B_ASYNC;
539: mbp->b_iodone = uvm_aio_biodone;
540: } else {
541: mbp->b_flags = B_READ;
542: mbp->b_iodone = NULL;
1.36.2.41 uebayasi 543: }
1.1 pooka 544: if (async)
545: BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
546: else
547: BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
1.36.2.46! uebayasi 548: #if 0
! 549: }
1.1 pooka 550:
1.36.2.46! uebayasi 551: #endif
1.36.2.44 uebayasi 552: #if 1
553: genfs_getpages_bio_prepare_done:
554: #endif
555:
1.1 pooka 556: /*
557: * if EOF is in the middle of the range, zero the part past EOF.
558: * skip over pages which are not PG_FAKE since in that case they have
559: * valid data that we need to preserve.
560: */
561:
562: tailstart = bytes;
563: while (tailbytes > 0) {
564: const int len = PAGE_SIZE - (tailstart & PAGE_MASK);
565:
566: KASSERT(len <= tailbytes);
567: if ((pgs[tailstart >> PAGE_SHIFT]->flags & PG_FAKE) != 0) {
568: memset((void *)(kva + tailstart), 0, len);
569: UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
570: kva, tailstart, len, 0);
571: }
572: tailstart += len;
573: tailbytes -= len;
574: }
575:
1.36.2.44 uebayasi 576: #if 1
577: if ((ap->a_vp->v_vflag & VV_XIP) != 0) {
578: error = genfs_do_getpages_xip_io(
579: ap->a_vp,
580: ap->a_offset,
581: ap->a_m,
582: ap->a_count,
583: ap->a_centeridx,
584: ap->a_access_type,
585: ap->a_advice,
586: ap->a_flags,
587: orignmempages);
588: goto loopdone;
589: }
590: #endif
1.36.2.46! uebayasi 591: #if 0
! 592: }
1.36.2.44 uebayasi 593:
1.36.2.46! uebayasi 594: int
! 595: genfs_getpages_io_read_bio_loop()
! 596: {
! 597: #endif
1.1 pooka 598: /*
599: * now loop over the pages, reading as needed.
600: */
601:
602: bp = NULL;
1.28 uebayasi 603: off_t offset;
604: for (offset = startoffset;
1.1 pooka 605: bytes > 0;
606: offset += iobytes, bytes -= iobytes) {
1.30 uebayasi 607: int run;
1.25 uebayasi 608: daddr_t lbn, blkno;
1.24 uebayasi 609: int pidx;
1.26 uebayasi 610: struct vnode *devvp;
1.1 pooka 611:
612: /*
613: * skip pages which don't need to be read.
614: */
615:
616: pidx = (offset - startoffset) >> PAGE_SHIFT;
617: while ((pgs[pidx]->flags & PG_FAKE) == 0) {
618: size_t b;
619:
620: KASSERT((offset & (PAGE_SIZE - 1)) == 0);
621: if ((pgs[pidx]->flags & PG_RDONLY)) {
622: sawhole = true;
623: }
624: b = MIN(PAGE_SIZE, bytes);
625: offset += b;
626: bytes -= b;
627: skipbytes += b;
628: pidx++;
629: UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
630: offset, 0,0,0);
631: if (bytes == 0) {
632: goto loopdone;
633: }
634: }
635:
636: /*
637: * bmap the file to find out the blkno to read from and
638: * how much we can read in one i/o. if bmap returns an error,
639: * skip the rest of the top-level i/o.
640: */
641:
642: lbn = offset >> fs_bshift;
643: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
644: if (error) {
645: UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
1.36 uebayasi 646: lbn,error,0,0);
1.1 pooka 647: skipbytes += bytes;
1.36 uebayasi 648: bytes = 0;
1.1 pooka 649: goto loopdone;
650: }
651:
652: /*
653: * see how many pages can be read with this i/o.
654: * reduce the i/o size if necessary to avoid
655: * overwriting pages with valid data.
656: */
657:
658: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
659: bytes);
660: if (offset + iobytes > round_page(offset)) {
1.24 uebayasi 661: int pcount;
662:
1.1 pooka 663: pcount = 1;
664: while (pidx + pcount < npages &&
665: pgs[pidx + pcount]->flags & PG_FAKE) {
666: pcount++;
667: }
668: iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
669: (offset - trunc_page(offset)));
670: }
671:
672: /*
673: * if this block isn't allocated, zero it instead of
674: * reading it. unless we are going to allocate blocks,
675: * mark the pages we zeroed PG_RDONLY.
676: */
677:
1.36 uebayasi 678: if (blkno == (daddr_t)-1) {
1.1 pooka 679: int holepages = (round_page(offset + iobytes) -
680: trunc_page(offset)) >> PAGE_SHIFT;
681: UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
682:
683: sawhole = true;
684: memset((char *)kva + (offset - startoffset), 0,
685: iobytes);
686: skipbytes += iobytes;
687:
688: for (i = 0; i < holepages; i++) {
1.35 uebayasi 689: if (memwrite) {
1.1 pooka 690: pgs[pidx + i]->flags &= ~PG_CLEAN;
691: }
692: if (!blockalloc) {
693: pgs[pidx + i]->flags |= PG_RDONLY;
694: }
695: }
696: continue;
697: }
698:
699: /*
700: * allocate a sub-buf for this piece of the i/o
701: * (or just use mbp if there's only 1 piece),
702: * and start it going.
703: */
704:
705: if (offset == startoffset && iobytes == bytes) {
706: bp = mbp;
707: } else {
1.36 uebayasi 708: UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
709: vp, bp, vp->v_numoutput, 0);
1.2 ad 710: bp = getiobuf(vp, true);
1.1 pooka 711: nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
712: }
713: bp->b_lblkno = 0;
714:
715: /* adjust physical blkno for partial blocks */
716: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
717: dev_bshift);
718:
719: UVMHIST_LOG(ubchist,
720: "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
1.36 uebayasi 721: bp, offset, bp->b_bcount, bp->b_blkno);
1.1 pooka 722:
723: VOP_STRATEGY(devvp, bp);
724: }
725:
726: loopdone:
1.36.2.44 uebayasi 727: #if 1
728: if ((ap->a_vp->v_vflag & VV_XIP) != 0)
729: goto genfs_getpages_biodone_done;
730: #endif
731: #if 0
732:
733: int
734: genfs_getpages_biodone()
735: {
736: #endif
1.1 pooka 737: nestiobuf_done(mbp, skipbytes, error);
738: if (async) {
739: UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
1.36.2.27 uebayasi 740: if (!glocked) {
741: genfs_node_unlock(vp);
742: }
1.1 pooka 743: error = 0;
1.33 uebayasi 744: goto out_err_free;
1.1 pooka 745: }
746: if (bp != NULL) {
747: error = biowait(mbp);
748: }
749:
1.19 rmind 750: /* Remove the mapping (make KVA available as soon as possible) */
751: uvm_pagermapout(kva, npages);
752:
1.1 pooka 753: /*
754: * if this we encountered a hole then we have to do a little more work.
755: * for read faults, we marked the page PG_RDONLY so that future
756: * write accesses to the page will fault again.
757: * for write faults, we must make sure that the backing store for
758: * the page is completely allocated while the pages are locked.
759: */
760:
761: if (!error && sawhole && blockalloc) {
1.9 simonb 762: /*
763: * XXX: This assumes that we come here only via
764: * the mmio path
765: */
1.11 yamt 766: if (vp->v_mount->mnt_wapbl) {
1.9 simonb 767: error = WAPBL_BEGIN(vp->v_mount);
768: }
769:
770: if (!error) {
771: error = GOP_ALLOC(vp, startoffset,
772: npages << PAGE_SHIFT, 0, cred);
1.11 yamt 773: if (vp->v_mount->mnt_wapbl) {
1.9 simonb 774: WAPBL_END(vp->v_mount);
775: }
776: }
777:
1.1 pooka 778: UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
779: startoffset, npages << PAGE_SHIFT, error,0);
780: if (!error) {
781: for (i = 0; i < npages; i++) {
1.31 uebayasi 782: struct vm_page *pg = pgs[i];
783:
784: if (pg == NULL) {
1.1 pooka 785: continue;
786: }
1.31 uebayasi 787: pg->flags &= ~(PG_CLEAN|PG_RDONLY);
1.1 pooka 788: UVMHIST_LOG(ubchist, "mark dirty pg %p",
1.31 uebayasi 789: pg,0,0,0);
1.1 pooka 790: }
791: }
792: }
1.36.2.44 uebayasi 793:
794: putiobuf(mbp);
795: #if 0
796: }
797:
798: #endif
799: #if 1
800: genfs_getpages_biodone_done:
801: {}
802: #endif
803: }
804:
1.36.2.27 uebayasi 805: if (!glocked) {
806: genfs_node_unlock(vp);
807: }
1.18 rmind 808:
1.36.2.44 uebayasi 809: #if 1
810: if ((ap->a_vp->v_vflag & VV_XIP) != 0) {
811: error = genfs_do_getpages_xip_io_done(
812: ap->a_vp,
813: ap->a_offset,
814: ap->a_m,
815: ap->a_count,
816: ap->a_centeridx,
817: ap->a_access_type,
818: ap->a_advice,
819: ap->a_flags,
820: orignmempages);
821: goto genfs_getpages_generic_io_done_done;
822: }
823: #endif
824: #if 0
825: else {
826: error = genfs_getpages_generic_io_done();
827: }
828: }
829:
830: int
831: genfs_getpages_generic_io_done()
832: {
833: #endif
1.18 rmind 834:
1.2 ad 835: mutex_enter(&uobj->vmobjlock);
1.1 pooka 836:
837: /*
838: * we're almost done! release the pages...
839: * for errors, we free the pages.
840: * otherwise we activate them and mark them as valid and clean.
841: * also, unbusy pages that were not actually requested.
842: */
843:
844: if (error) {
845: for (i = 0; i < npages; i++) {
1.31 uebayasi 846: struct vm_page *pg = pgs[i];
847:
848: if (pg == NULL) {
1.1 pooka 849: continue;
850: }
851: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.31 uebayasi 852: pg, pg->flags, 0,0);
853: if (pg->flags & PG_FAKE) {
854: pg->flags |= PG_RELEASED;
1.1 pooka 855: }
856: }
1.2 ad 857: mutex_enter(&uvm_pageqlock);
1.1 pooka 858: uvm_page_unbusy(pgs, npages);
1.2 ad 859: mutex_exit(&uvm_pageqlock);
860: mutex_exit(&uobj->vmobjlock);
1.1 pooka 861: UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
1.33 uebayasi 862: goto out_err_free;
1.1 pooka 863: }
864:
865: out:
866: UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
867: error = 0;
1.2 ad 868: mutex_enter(&uvm_pageqlock);
1.1 pooka 869: for (i = 0; i < npages; i++) {
1.31 uebayasi 870: struct vm_page *pg = pgs[i];
1.1 pooka 871: if (pg == NULL) {
872: continue;
873: }
874: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
875: pg, pg->flags, 0,0);
876: if (pg->flags & PG_FAKE && !overwrite) {
877: pg->flags &= ~(PG_FAKE);
878: pmap_clear_modify(pgs[i]);
879: }
1.35 uebayasi 880: KASSERT(!memwrite || !blockalloc || (pg->flags & PG_RDONLY) == 0);
1.29 uebayasi 881: if (i < ridx || i >= ridx + orignmempages || async) {
1.1 pooka 882: UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
883: pg, pg->offset,0,0);
884: if (pg->flags & PG_WANTED) {
885: wakeup(pg);
886: }
887: if (pg->flags & PG_FAKE) {
888: KASSERT(overwrite);
889: uvm_pagezero(pg);
890: }
891: if (pg->flags & PG_RELEASED) {
892: uvm_pagefree(pg);
893: continue;
894: }
895: uvm_pageenqueue(pg);
896: pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
897: UVM_PAGE_OWN(pg, NULL);
898: }
899: }
1.2 ad 900: mutex_exit(&uvm_pageqlock);
1.36.2.44 uebayasi 901:
1.36.2.21 uebayasi 902: if (memwrite) {
903: genfs_markdirty(vp);
904: }
1.2 ad 905: mutex_exit(&uobj->vmobjlock);
1.1 pooka 906: if (ap->a_m != NULL) {
907: memcpy(ap->a_m, &pgs[ridx],
1.29 uebayasi 908: orignmempages * sizeof(struct vm_page *));
1.1 pooka 909: }
1.36.2.44 uebayasi 910: #if 0
911: }
912:
913: #endif
914: #if 1
915: genfs_getpages_generic_io_done_done:
916: {}
917: #endif
1.1 pooka 918:
1.33 uebayasi 919: out_err_free:
1.14 christos 920: if (pgs != NULL && pgs != pgs_onstack)
1.1 pooka 921: kmem_free(pgs, pgs_size);
1.33 uebayasi 922: out_err:
1.1 pooka 923: if (has_trans)
924: fstrans_done(vp->v_mount);
1.36.2.21 uebayasi 925: return error;
1.1 pooka 926: }
927:
1.36.2.1 uebayasi 928: #ifdef XIP
1.36.2.9 uebayasi 929: /*
1.36.2.41 uebayasi 930: * genfs_do_getpages_xip_io
1.36.2.9 uebayasi 931: * Return "direct pages" of XIP vnode. The block addresses of XIP
932: * vnode pages are returned back to the VM fault handler as the
933: * actually mapped physical addresses.
934: */
1.36.2.1 uebayasi 935: static int
1.36.2.35 uebayasi 936: genfs_do_getpages_xip_io(
937: struct vnode *vp,
1.36.2.42 uebayasi 938: voff_t origoffset,
1.36.2.35 uebayasi 939: struct vm_page **pps,
940: int *npagesp,
941: int centeridx,
942: vm_prot_t access_type,
943: int advice,
1.36.2.42 uebayasi 944: int flags,
945: const int orignmempages)
1.36.2.35 uebayasi 946: {
1.36.2.42 uebayasi 947: const int fs_bshift = vp2fs_bshift(vp);
948: const int dev_bshift = vp2dev_bshift(vp);
949: const int fs_bsize = 1 << fs_bshift;
950:
1.36.2.1 uebayasi 951: int error;
1.36.2.42 uebayasi 952: off_t off;
1.36.2.1 uebayasi 953: int i;
954:
1.36.2.35 uebayasi 955: UVMHIST_FUNC("genfs_do_getpages_xip_io"); UVMHIST_CALLED(ubchist);
1.36.2.1 uebayasi 956:
1.36.2.44 uebayasi 957: KASSERT(((flags & PGO_GLOCKHELD) != 0) || genfs_node_rdlocked(vp));
1.36.2.43 uebayasi 958:
1.36.2.42 uebayasi 959: #ifdef UVMHIST
960: const off_t startoffset = trunc_blk(origoffset);
961: const off_t endoffset = round_blk(origoffset + PAGE_SIZE * orignmempages);
962: #endif
1.36.2.1 uebayasi 963:
1.36.2.42 uebayasi 964: UVMHIST_LOG(ubchist, "xip npages=%d startoffset=%lx endoffset=%lx",
965: orignmempages, (long)startoffset, (long)endoffset, 0);
1.36.2.1 uebayasi 966:
1.36.2.42 uebayasi 967: off = origoffset;
968: for (i = 0; i < orignmempages; i++) {
1.36.2.1 uebayasi 969: daddr_t lbn, blkno;
970: int run;
971: struct vnode *devvp;
972:
973: lbn = (off & ~(fs_bsize - 1)) >> fs_bshift;
974:
975: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
976: KASSERT(error == 0);
1.36.2.23 uebayasi 977: UVMHIST_LOG(ubchist, "xip VOP_BMAP: lbn=%ld blkno=%ld run=%d",
978: (long)lbn, (long)blkno, run, 0);
1.36.2.1 uebayasi 979:
1.36.2.12 uebayasi 980: /*
981: * XIP page metadata assignment
982: * - Unallocated block is redirected to the dedicated zero'ed
983: * page.
984: */
1.36.2.1 uebayasi 985: if (blkno < 0) {
1.36.2.37 uebayasi 986: panic("XIP hole is not supported yet!");
1.36.2.1 uebayasi 987: } else {
1.36.2.32 uebayasi 988: daddr_t blk_off, fs_off;
1.36.2.13 uebayasi 989:
1.36.2.32 uebayasi 990: blk_off = blkno << dev_bshift;
991: fs_off = off - (lbn << fs_bshift);
992:
1.36.2.34 uebayasi 993: pps[i] = uvn_findpage_xip(devvp, &vp->v_uobj,
1.36.2.32 uebayasi 994: blk_off + fs_off);
995: KASSERT(pps[i] != NULL);
1.36.2.1 uebayasi 996: }
997:
998: UVMHIST_LOG(ubchist, "xip pgs %d => phys_addr=0x%lx (%p)",
999: i,
1.36.2.22 uebayasi 1000: (long)pps[i]->phys_addr,
1.36.2.1 uebayasi 1001: pps[i],
1002: 0);
1003:
1004: off += PAGE_SIZE;
1005: }
1006:
1.36.2.44 uebayasi 1007: return 0;
1008: }
1009:
1010: int
1011: genfs_do_getpages_xip_io_done(
1012: struct vnode *vp,
1013: voff_t origoffset,
1014: struct vm_page **pps,
1015: int *npagesp,
1016: int centeridx,
1017: vm_prot_t access_type,
1018: int advice,
1019: int flags,
1020: const int orignmempages)
1021: {
1022: struct uvm_object * const uobj = &vp->v_uobj;
1023: int i;
1024:
1.36.2.26 uebayasi 1025: mutex_enter(&uobj->vmobjlock);
1.36.2.13 uebayasi 1026:
1.36.2.42 uebayasi 1027: for (i = 0; i < orignmempages; i++) {
1.36.2.13 uebayasi 1028: struct vm_page *pg = pps[i];
1029:
1.36.2.31 uebayasi 1030: KASSERT((pg->flags & PG_RDONLY) != 0);
1.36.2.33 uebayasi 1031: KASSERT((pg->flags & PG_BUSY) == 0);
1032: KASSERT((pg->flags & PG_CLEAN) != 0);
1033: KASSERT((pg->flags & PG_DEVICE) != 0);
1034: pg->flags |= PG_BUSY;
1035: pg->flags &= ~PG_FAKE;
1036: pg->uobject = &vp->v_uobj;
1.36.2.13 uebayasi 1037: }
1038:
1.36.2.36 uebayasi 1039: mutex_exit(&uobj->vmobjlock);
1.36.2.13 uebayasi 1040:
1.36.2.42 uebayasi 1041: *npagesp = orignmempages;
1.36.2.1 uebayasi 1042:
1043: return 0;
1044: }
1045: #endif
1046:
1.1 pooka 1047: /*
1048: * generic VM putpages routine.
1049: * Write the given range of pages to backing store.
1050: *
1051: * => "offhi == 0" means flush all pages at or after "offlo".
1052: * => object should be locked by caller. we return with the
1053: * object unlocked.
1054: * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
1055: * thus, a caller might want to unlock higher level resources
1056: * (e.g. vm_map) before calling flush.
1057: * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, we will not block
1058: * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
1059: * => NOTE: we rely on the fact that the object's memq is a TAILQ and
1060: * that new pages are inserted on the tail end of the list. thus,
1061: * we can make a complete pass through the object in one go by starting
1062: * at the head and working towards the tail (new pages are put in
1063: * front of us).
1064: * => NOTE: we are allowed to lock the page queues, so the caller
1065: * must not be holding the page queue lock.
1066: *
1067: * note on "cleaning" object and PG_BUSY pages:
1068: * this routine is holding the lock on the object. the only time
1069: * that it can run into a PG_BUSY page that it does not own is if
1070: * some other process has started I/O on the page (e.g. either
1071: * a pagein, or a pageout). if the PG_BUSY page is being paged
1072: * in, then it can not be dirty (!PG_CLEAN) because no one has
1073: * had a chance to modify it yet. if the PG_BUSY page is being
1074: * paged out then it means that someone else has already started
1075: * cleaning the page for us (how nice!). in this case, if we
1076: * have syncio specified, then after we make our pass through the
1077: * object we need to wait for the other PG_BUSY pages to clear
1078: * off (i.e. we need to do an iosync). also note that once a
1079: * page is PG_BUSY it must stay in its object until it is un-busyed.
1080: *
1081: * note on page traversal:
1082: * we can traverse the pages in an object either by going down the
1083: * linked list in "uobj->memq", or we can go over the address range
1084: * by page doing hash table lookups for each address. depending
1085: * on how many pages are in the object it may be cheaper to do one
1086: * or the other. we set "by_list" to true if we are using memq.
1087: * if the cost of a hash lookup was equal to the cost of the list
1088: * traversal we could compare the number of pages in the start->stop
1089: * range to the total number of pages in the object. however, it
1090: * seems that a hash table lookup is more expensive than the linked
1091: * list traversal, so we multiply the number of pages in the
1092: * range by an estimate of the relatively higher cost of the hash lookup.
1093: */
1094:
1095: int
1096: genfs_putpages(void *v)
1097: {
1098: struct vop_putpages_args /* {
1099: struct vnode *a_vp;
1100: voff_t a_offlo;
1101: voff_t a_offhi;
1102: int a_flags;
1.22 uebayasi 1103: } */ * const ap = v;
1.1 pooka 1104:
1.36.2.24 uebayasi 1105: #ifdef XIP
1106: if ((ap->a_vp->v_vflag & VV_XIP) != 0)
1107: return genfs_do_putpages_xip(ap->a_vp, ap->a_offlo, ap->a_offhi,
1108: ap->a_flags, NULL);
1109: else
1110: #endif
1.1 pooka 1111: return genfs_do_putpages(ap->a_vp, ap->a_offlo, ap->a_offhi,
1112: ap->a_flags, NULL);
1113: }
1114:
1115: int
1.4 yamt 1116: genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff,
1117: int origflags, struct vm_page **busypg)
1.1 pooka 1118: {
1.22 uebayasi 1119: struct uvm_object * const uobj = &vp->v_uobj;
1120: kmutex_t * const slock = &uobj->vmobjlock;
1.1 pooka 1121: off_t off;
1122: /* Even for strange MAXPHYS, the shift rounds down to a page */
1123: #define maxpages (MAXPHYS >> PAGE_SHIFT)
1.2 ad 1124: int i, error, npages, nback;
1.1 pooka 1125: int freeflag;
1126: struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
1127: bool wasclean, by_list, needs_clean, yld;
1.4 yamt 1128: bool async = (origflags & PGO_SYNCIO) == 0;
1.1 pooka 1129: bool pagedaemon = curlwp == uvm.pagedaemon_lwp;
1.22 uebayasi 1130: struct lwp * const l = curlwp ? curlwp : &lwp0;
1131: struct genfs_node * const gp = VTOG(vp);
1.4 yamt 1132: int flags;
1.1 pooka 1133: int dirtygen;
1.4 yamt 1134: bool modified;
1.12 hannken 1135: bool need_wapbl;
1.4 yamt 1136: bool has_trans;
1.1 pooka 1137: bool cleanall;
1.4 yamt 1138: bool onworklst;
1.1 pooka 1139:
1140: UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
1141:
1.4 yamt 1142: KASSERT(origflags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1.1 pooka 1143: KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
1144: KASSERT(startoff < endoff || endoff == 0);
1145:
1146: UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1147: vp, uobj->uo_npages, startoff, endoff - startoff);
1148:
1.6 hannken 1149: has_trans = false;
1.12 hannken 1150: need_wapbl = (!pagedaemon && vp->v_mount && vp->v_mount->mnt_wapbl &&
1151: (origflags & PGO_JOURNALLOCKED) == 0);
1.6 hannken 1152:
1.4 yamt 1153: retry:
1154: modified = false;
1155: flags = origflags;
1.1 pooka 1156: KASSERT((vp->v_iflag & VI_ONWORKLST) != 0 ||
1157: (vp->v_iflag & VI_WRMAPDIRTY) == 0);
1158: if (uobj->uo_npages == 0) {
1159: if (vp->v_iflag & VI_ONWORKLST) {
1160: vp->v_iflag &= ~VI_WRMAPDIRTY;
1161: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
1162: vn_syncer_remove_from_worklist(vp);
1163: }
1.12 hannken 1164: if (has_trans) {
1165: if (need_wapbl)
1166: WAPBL_END(vp->v_mount);
1.6 hannken 1167: fstrans_done(vp->v_mount);
1.12 hannken 1168: }
1.2 ad 1169: mutex_exit(slock);
1.1 pooka 1170: return (0);
1171: }
1172:
1173: /*
1174: * the vnode has pages, set up to process the request.
1175: */
1176:
1.6 hannken 1177: if (!has_trans && (flags & PGO_CLEANIT) != 0) {
1.2 ad 1178: mutex_exit(slock);
1.1 pooka 1179: if (pagedaemon) {
1180: error = fstrans_start_nowait(vp->v_mount, FSTRANS_LAZY);
1181: if (error)
1182: return error;
1183: } else
1184: fstrans_start(vp->v_mount, FSTRANS_LAZY);
1.12 hannken 1185: if (need_wapbl) {
1186: error = WAPBL_BEGIN(vp->v_mount);
1187: if (error) {
1188: fstrans_done(vp->v_mount);
1189: return error;
1190: }
1191: }
1.1 pooka 1192: has_trans = true;
1.2 ad 1193: mutex_enter(slock);
1.6 hannken 1194: goto retry;
1.1 pooka 1195: }
1196:
1197: error = 0;
1198: wasclean = (vp->v_numoutput == 0);
1199: off = startoff;
1200: if (endoff == 0 || flags & PGO_ALLPAGES) {
1201: endoff = trunc_page(LLONG_MAX);
1202: }
1203: by_list = (uobj->uo_npages <=
1.17 yamt 1204: ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_TREE_PENALTY);
1.1 pooka 1205:
1206: #if !defined(DEBUG)
1207: /*
1208: * if this vnode is known not to have dirty pages,
1209: * don't bother to clean it out.
1210: */
1211:
1212: if ((vp->v_iflag & VI_ONWORKLST) == 0) {
1213: if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) {
1214: goto skip_scan;
1215: }
1216: flags &= ~PGO_CLEANIT;
1217: }
1218: #endif /* !defined(DEBUG) */
1219:
1220: /*
1221: * start the loop. when scanning by list, hold the last page
1222: * in the list before we start. pages allocated after we start
1223: * will be added to the end of the list, so we can stop at the
1224: * current last page.
1225: */
1226:
1227: cleanall = (flags & PGO_CLEANIT) != 0 && wasclean &&
1228: startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
1229: (vp->v_iflag & VI_ONWORKLST) != 0;
1230: dirtygen = gp->g_dirtygen;
1231: freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED;
1232: if (by_list) {
1.36.2.21 uebayasi 1233: curmp.flags = PG_MARKER;
1234: endmp.flags = PG_MARKER;
1.1 pooka 1235: pg = TAILQ_FIRST(&uobj->memq);
1.8 ad 1236: TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq.queue);
1.1 pooka 1237: } else {
1238: pg = uvm_pagelookup(uobj, off);
1239: }
1240: nextpg = NULL;
1241: while (by_list || off < endoff) {
1242:
1243: /*
1244: * if the current page is not interesting, move on to the next.
1245: */
1246:
1.36.2.21 uebayasi 1247: KASSERT(pg == NULL || pg->uobject == uobj ||
1248: (pg->flags & PG_MARKER) != 0);
1.1 pooka 1249: KASSERT(pg == NULL ||
1250: (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1.36.2.21 uebayasi 1251: (pg->flags & (PG_BUSY|PG_MARKER)) != 0);
1.1 pooka 1252: if (by_list) {
1253: if (pg == &endmp) {
1254: break;
1255: }
1.36.2.21 uebayasi 1256: if (pg->flags & PG_MARKER) {
1257: pg = TAILQ_NEXT(pg, listq.queue);
1258: continue;
1259: }
1.1 pooka 1260: if (pg->offset < startoff || pg->offset >= endoff ||
1261: pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1262: if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1263: wasclean = false;
1264: }
1.8 ad 1265: pg = TAILQ_NEXT(pg, listq.queue);
1.1 pooka 1266: continue;
1267: }
1268: off = pg->offset;
1269: } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1270: if (pg != NULL) {
1271: wasclean = false;
1272: }
1273: off += PAGE_SIZE;
1274: if (off < endoff) {
1275: pg = uvm_pagelookup(uobj, off);
1276: }
1277: continue;
1278: }
1279:
1280: /*
1281: * if the current page needs to be cleaned and it's busy,
1282: * wait for it to become unbusy.
1283: */
1284:
1285: yld = (l->l_cpu->ci_schedstate.spc_flags &
1286: SPCF_SHOULDYIELD) && !pagedaemon;
1287: if (pg->flags & PG_BUSY || yld) {
1288: UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
1289: if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) {
1290: UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0);
1291: error = EDEADLK;
1292: if (busypg != NULL)
1293: *busypg = pg;
1294: break;
1295: }
1296: if (pagedaemon) {
1297: /*
1298: * someone has taken the page while we
1299: * dropped the lock for fstrans_start.
1300: */
1301: break;
1302: }
1303: if (by_list) {
1.8 ad 1304: TAILQ_INSERT_BEFORE(pg, &curmp, listq.queue);
1.1 pooka 1305: UVMHIST_LOG(ubchist, "curmp next %p",
1.8 ad 1306: TAILQ_NEXT(&curmp, listq.queue), 0,0,0);
1.1 pooka 1307: }
1308: if (yld) {
1.2 ad 1309: mutex_exit(slock);
1.1 pooka 1310: preempt();
1.2 ad 1311: mutex_enter(slock);
1.1 pooka 1312: } else {
1313: pg->flags |= PG_WANTED;
1314: UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
1.2 ad 1315: mutex_enter(slock);
1.1 pooka 1316: }
1317: if (by_list) {
1318: UVMHIST_LOG(ubchist, "after next %p",
1.8 ad 1319: TAILQ_NEXT(&curmp, listq.queue), 0,0,0);
1320: pg = TAILQ_NEXT(&curmp, listq.queue);
1321: TAILQ_REMOVE(&uobj->memq, &curmp, listq.queue);
1.1 pooka 1322: } else {
1323: pg = uvm_pagelookup(uobj, off);
1324: }
1325: continue;
1326: }
1327:
1328: /*
1329: * if we're freeing, remove all mappings of the page now.
1330: * if we're cleaning, check if the page is needs to be cleaned.
1331: */
1332:
1333: if (flags & PGO_FREE) {
1334: pmap_page_protect(pg, VM_PROT_NONE);
1335: } else if (flags & PGO_CLEANIT) {
1336:
1337: /*
1338: * if we still have some hope to pull this vnode off
1339: * from the syncer queue, write-protect the page.
1340: */
1341:
1342: if (cleanall && wasclean &&
1343: gp->g_dirtygen == dirtygen) {
1344:
1345: /*
1346: * uobj pages get wired only by uvm_fault
1347: * where uobj is locked.
1348: */
1349:
1350: if (pg->wire_count == 0) {
1351: pmap_page_protect(pg,
1352: VM_PROT_READ|VM_PROT_EXECUTE);
1353: } else {
1354: cleanall = false;
1355: }
1356: }
1357: }
1358:
1359: if (flags & PGO_CLEANIT) {
1360: needs_clean = pmap_clear_modify(pg) ||
1361: (pg->flags & PG_CLEAN) == 0;
1362: pg->flags |= PG_CLEAN;
1363: } else {
1364: needs_clean = false;
1365: }
1366:
1367: /*
1368: * if we're cleaning, build a cluster.
1369: * the cluster will consist of pages which are currently dirty,
1370: * but they will be returned to us marked clean.
1371: * if not cleaning, just operate on the one page.
1372: */
1373:
1374: if (needs_clean) {
1375: KDASSERT((vp->v_iflag & VI_ONWORKLST));
1376: wasclean = false;
1377: memset(pgs, 0, sizeof(pgs));
1378: pg->flags |= PG_BUSY;
1379: UVM_PAGE_OWN(pg, "genfs_putpages");
1380:
1381: /*
1382: * first look backward.
1383: */
1384:
1385: npages = MIN(maxpages >> 1, off >> PAGE_SHIFT);
1386: nback = npages;
1387: uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
1388: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
1389: if (nback) {
1390: memmove(&pgs[0], &pgs[npages - nback],
1391: nback * sizeof(pgs[0]));
1392: if (npages - nback < nback)
1393: memset(&pgs[nback], 0,
1394: (npages - nback) * sizeof(pgs[0]));
1395: else
1396: memset(&pgs[npages - nback], 0,
1397: nback * sizeof(pgs[0]));
1398: }
1399:
1400: /*
1401: * then plug in our page of interest.
1402: */
1403:
1404: pgs[nback] = pg;
1405:
1406: /*
1407: * then look forward to fill in the remaining space in
1408: * the array of pages.
1409: */
1410:
1411: npages = maxpages - nback - 1;
1412: uvn_findpages(uobj, off + PAGE_SIZE, &npages,
1413: &pgs[nback + 1],
1414: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
1415: npages += nback + 1;
1416: } else {
1417: pgs[0] = pg;
1418: npages = 1;
1419: nback = 0;
1420: }
1421:
1422: /*
1423: * apply FREE or DEACTIVATE options if requested.
1424: */
1425:
1426: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1.2 ad 1427: mutex_enter(&uvm_pageqlock);
1.1 pooka 1428: }
1429: for (i = 0; i < npages; i++) {
1430: tpg = pgs[i];
1431: KASSERT(tpg->uobject == uobj);
1.8 ad 1432: if (by_list && tpg == TAILQ_NEXT(pg, listq.queue))
1.1 pooka 1433: pg = tpg;
1434: if (tpg->offset < startoff || tpg->offset >= endoff)
1435: continue;
1436: if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) {
1437: uvm_pagedeactivate(tpg);
1438: } else if (flags & PGO_FREE) {
1439: pmap_page_protect(tpg, VM_PROT_NONE);
1440: if (tpg->flags & PG_BUSY) {
1441: tpg->flags |= freeflag;
1442: if (pagedaemon) {
1.2 ad 1443: uvm_pageout_start(1);
1.1 pooka 1444: uvm_pagedequeue(tpg);
1445: }
1446: } else {
1447:
1448: /*
1449: * ``page is not busy''
1450: * implies that npages is 1
1451: * and needs_clean is false.
1452: */
1453:
1.8 ad 1454: nextpg = TAILQ_NEXT(tpg, listq.queue);
1.1 pooka 1455: uvm_pagefree(tpg);
1456: if (pagedaemon)
1457: uvmexp.pdfreed++;
1458: }
1459: }
1460: }
1461: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1.2 ad 1462: mutex_exit(&uvm_pageqlock);
1.1 pooka 1463: }
1464: if (needs_clean) {
1465: modified = true;
1466:
1467: /*
1468: * start the i/o. if we're traversing by list,
1469: * keep our place in the list with a marker page.
1470: */
1471:
1472: if (by_list) {
1473: TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1.8 ad 1474: listq.queue);
1.1 pooka 1475: }
1.2 ad 1476: mutex_exit(slock);
1.1 pooka 1477: error = GOP_WRITE(vp, pgs, npages, flags);
1.2 ad 1478: mutex_enter(slock);
1.1 pooka 1479: if (by_list) {
1.8 ad 1480: pg = TAILQ_NEXT(&curmp, listq.queue);
1481: TAILQ_REMOVE(&uobj->memq, &curmp, listq.queue);
1.1 pooka 1482: }
1483: if (error) {
1484: break;
1485: }
1486: if (by_list) {
1487: continue;
1488: }
1489: }
1490:
1491: /*
1492: * find the next page and continue if there was no error.
1493: */
1494:
1495: if (by_list) {
1496: if (nextpg) {
1497: pg = nextpg;
1498: nextpg = NULL;
1499: } else {
1.8 ad 1500: pg = TAILQ_NEXT(pg, listq.queue);
1.1 pooka 1501: }
1502: } else {
1503: off += (npages - nback) << PAGE_SHIFT;
1504: if (off < endoff) {
1505: pg = uvm_pagelookup(uobj, off);
1506: }
1507: }
1508: }
1509: if (by_list) {
1.8 ad 1510: TAILQ_REMOVE(&uobj->memq, &endmp, listq.queue);
1.1 pooka 1511: }
1512:
1513: if (modified && (vp->v_iflag & VI_WRMAPDIRTY) != 0 &&
1514: (vp->v_type != VBLK ||
1515: (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
1516: GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED);
1517: }
1518:
1519: /*
1520: * if we're cleaning and there was nothing to clean,
1521: * take us off the syncer list. if we started any i/o
1522: * and we're doing sync i/o, wait for all writes to finish.
1523: */
1524:
1525: if (cleanall && wasclean && gp->g_dirtygen == dirtygen &&
1526: (vp->v_iflag & VI_ONWORKLST) != 0) {
1.5 yamt 1527: #if defined(DEBUG)
1.8 ad 1528: TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
1.36.2.21 uebayasi 1529: if ((pg->flags & PG_MARKER) != 0) {
1530: continue;
1531: }
1.5 yamt 1532: if ((pg->flags & PG_CLEAN) == 0) {
1533: printf("%s: %p: !CLEAN\n", __func__, pg);
1534: }
1535: if (pmap_is_modified(pg)) {
1536: printf("%s: %p: modified\n", __func__, pg);
1537: }
1538: }
1539: #endif /* defined(DEBUG) */
1.1 pooka 1540: vp->v_iflag &= ~VI_WRMAPDIRTY;
1541: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
1542: vn_syncer_remove_from_worklist(vp);
1543: }
1544:
1545: #if !defined(DEBUG)
1546: skip_scan:
1547: #endif /* !defined(DEBUG) */
1.2 ad 1548:
1549: /* Wait for output to complete. */
1550: if (!wasclean && !async && vp->v_numoutput != 0) {
1551: while (vp->v_numoutput != 0)
1552: cv_wait(&vp->v_cv, slock);
1.1 pooka 1553: }
1.4 yamt 1554: onworklst = (vp->v_iflag & VI_ONWORKLST) != 0;
1.2 ad 1555: mutex_exit(slock);
1.1 pooka 1556:
1.4 yamt 1557: if ((flags & PGO_RECLAIM) != 0 && onworklst) {
1558: /*
1559: * in the case of PGO_RECLAIM, ensure to make the vnode clean.
1560: * retrying is not a big deal because, in many cases,
1561: * uobj->uo_npages is already 0 here.
1562: */
1563: mutex_enter(slock);
1564: goto retry;
1565: }
1566:
1.12 hannken 1567: if (has_trans) {
1568: if (need_wapbl)
1569: WAPBL_END(vp->v_mount);
1.6 hannken 1570: fstrans_done(vp->v_mount);
1.12 hannken 1571: }
1.6 hannken 1572:
1.1 pooka 1573: return (error);
1574: }
1575:
1.36.2.24 uebayasi 1576: #ifdef XIP
1577: int
1578: genfs_do_putpages_xip(struct vnode *vp, off_t startoff, off_t endoff,
1579: int flags, struct vm_page **busypg)
1580: {
1581: struct uvm_object *uobj = &vp->v_uobj;
1.36.2.25 uebayasi 1582: #ifdef DIAGNOSTIC
1.36.2.24 uebayasi 1583: struct genfs_node * const gp = VTOG(vp);
1.36.2.25 uebayasi 1584: #endif
1.36.2.24 uebayasi 1585:
1586: UVMHIST_FUNC("genfs_do_putpages_xip"); UVMHIST_CALLED(ubchist);
1587:
1588: KASSERT(mutex_owned(&uobj->vmobjlock));
1589: KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1590: KASSERT(vp->v_numoutput == 0);
1591: KASSERT(gp->g_dirtygen == 0);
1592:
1593: UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1594: vp, uobj->uo_npages, startoff, endoff - startoff);
1595:
1596: /*
1597: * XIP pages are read-only, and never become dirty. They're also never
1598: * queued. PGO_DEACTIVATE and PGO_CLEANIT are meaningless for XIP
1599: * pages, so we ignore them.
1600: */
1601: if ((flags & PGO_FREE) == 0)
1602: goto done;
1603:
1604: /*
1605: * For PGO_FREE (or (PGO_CLEANIT | PGO_FREE)), we invalidate MMU
1606: * mappings of both XIP pages and XIP zero pages.
1607: *
1608: * Zero page is freed when one of its mapped offset is freed, even if
1609: * one file (vnode) has many holes and mapping its zero page to all
1610: * of those hole pages.
1611: *
1.36.2.25 uebayasi 1612: * We don't know which pages are currently mapped in the given vnode,
1613: * because XIP pages are not added to vnode. What we can do is to
1614: * locate pages by querying the filesystem as done in getpages. Call
1.36.2.40 uebayasi 1615: * genfs_do_getpages_xip_io().
1.36.2.24 uebayasi 1616: */
1617:
1618: off_t off, eof;
1619:
1620: off = trunc_page(startoff);
1621: if (endoff == 0 || (flags & PGO_ALLPAGES))
1622: GOP_SIZE(vp, vp->v_size, &eof, GOP_SIZE_MEM);
1623: else
1624: eof = endoff;
1625:
1626: while (off < eof) {
1627: int npages, orignpages, error, i;
1628: struct vm_page *pgs[maxpages], *pg;
1629:
1630: npages = round_page(eof - off) >> PAGE_SHIFT;
1631: if (npages > maxpages)
1632: npages = maxpages;
1633:
1634: orignpages = npages;
1.36.2.26 uebayasi 1635: KASSERT(mutex_owned(&uobj->vmobjlock));
1.36.2.39 uebayasi 1636: mutex_exit(&uobj->vmobjlock);
1.36.2.40 uebayasi 1637: error = genfs_do_getpages_xip_io(vp, off, pgs, &npages, 0,
1.36.2.43 uebayasi 1638: VM_PROT_ALL, 0, PGO_GLOCKHELD, orignpages);
1.36.2.24 uebayasi 1639: KASSERT(error == 0);
1640: KASSERT(npages == orignpages);
1.36.2.36 uebayasi 1641: mutex_enter(&uobj->vmobjlock);
1.36.2.24 uebayasi 1642: for (i = 0; i < npages; i++) {
1643: pg = pgs[i];
1644: if (pg == NULL || pg == PGO_DONTCARE)
1645: continue;
1.36.2.33 uebayasi 1646: /*
1647: * Freeing normal XIP pages; nothing to do.
1648: */
1649: pmap_page_protect(pg, VM_PROT_NONE);
1650: KASSERT((pg->flags & PG_RDONLY) != 0);
1651: KASSERT((pg->flags & PG_CLEAN) != 0);
1652: KASSERT((pg->flags & PG_FAKE) == 0);
1653: KASSERT((pg->flags & PG_DEVICE) != 0);
1654: pg->flags &= ~PG_BUSY;
1.36.2.24 uebayasi 1655: }
1656: off += npages << PAGE_SHIFT;
1657: }
1658:
1659: KASSERT(uobj->uo_npages == 0);
1660:
1661: done:
1.36.2.26 uebayasi 1662: KASSERT(mutex_owned(&uobj->vmobjlock));
1.36.2.24 uebayasi 1663: mutex_exit(&uobj->vmobjlock);
1664: return 0;
1665: }
1666: #endif
1667:
1.1 pooka 1668: int
1669: genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1670: {
1671: off_t off;
1672: vaddr_t kva;
1673: size_t len;
1674: int error;
1675: UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
1676:
1677: UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1678: vp, pgs, npages, flags);
1679:
1680: off = pgs[0]->offset;
1681: kva = uvm_pagermapin(pgs, npages,
1682: UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1683: len = npages << PAGE_SHIFT;
1684:
1685: error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
1686: uvm_aio_biodone);
1687:
1688: return error;
1689: }
1690:
1.7 reinoud 1691: int
1692: genfs_gop_write_rwmap(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1693: {
1694: off_t off;
1695: vaddr_t kva;
1696: size_t len;
1697: int error;
1698: UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
1699:
1700: UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1701: vp, pgs, npages, flags);
1702:
1703: off = pgs[0]->offset;
1704: kva = uvm_pagermapin(pgs, npages,
1705: UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1706: len = npages << PAGE_SHIFT;
1707:
1708: error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
1709: uvm_aio_biodone);
1710:
1711: return error;
1712: }
1713:
1.1 pooka 1714: /*
1715: * Backend routine for doing I/O to vnode pages. Pages are already locked
1716: * and mapped into kernel memory. Here we just look up the underlying
1717: * device block addresses and call the strategy routine.
1718: */
1719:
1720: static int
1721: genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
1722: enum uio_rw rw, void (*iodone)(struct buf *))
1723: {
1.36 uebayasi 1724: int s, error;
1.1 pooka 1725: int fs_bshift, dev_bshift;
1726: off_t eof, offset, startoffset;
1727: size_t bytes, iobytes, skipbytes;
1728: struct buf *mbp, *bp;
1.35 uebayasi 1729: const bool async = (flags & PGO_SYNCIO) == 0;
1730: const bool iowrite = rw == UIO_WRITE;
1731: const int brw = iowrite ? B_WRITE : B_READ;
1.1 pooka 1732: UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
1733:
1734: UVMHIST_LOG(ubchist, "vp %p kva %p len 0x%x flags 0x%x",
1735: vp, kva, len, flags);
1736:
1737: KASSERT(vp->v_size <= vp->v_writesize);
1738: GOP_SIZE(vp, vp->v_writesize, &eof, 0);
1739: if (vp->v_type != VBLK) {
1740: fs_bshift = vp->v_mount->mnt_fs_bshift;
1741: dev_bshift = vp->v_mount->mnt_dev_bshift;
1742: } else {
1743: fs_bshift = DEV_BSHIFT;
1744: dev_bshift = DEV_BSHIFT;
1745: }
1746: error = 0;
1747: startoffset = off;
1748: bytes = MIN(len, eof - startoffset);
1749: skipbytes = 0;
1750: KASSERT(bytes != 0);
1751:
1.35 uebayasi 1752: if (iowrite) {
1.2 ad 1753: mutex_enter(&vp->v_interlock);
1.1 pooka 1754: vp->v_numoutput += 2;
1.2 ad 1755: mutex_exit(&vp->v_interlock);
1.1 pooka 1756: }
1.2 ad 1757: mbp = getiobuf(vp, true);
1.1 pooka 1758: UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1759: vp, mbp, vp->v_numoutput, bytes);
1760: mbp->b_bufsize = len;
1761: mbp->b_data = (void *)kva;
1762: mbp->b_resid = mbp->b_bcount = bytes;
1.2 ad 1763: mbp->b_cflags = BC_BUSY | BC_AGE;
1764: if (async) {
1765: mbp->b_flags = brw | B_ASYNC;
1766: mbp->b_iodone = iodone;
1767: } else {
1768: mbp->b_flags = brw;
1769: mbp->b_iodone = NULL;
1770: }
1.1 pooka 1771: if (curlwp == uvm.pagedaemon_lwp)
1772: BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
1773: else if (async)
1774: BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
1775: else
1776: BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
1777:
1778: bp = NULL;
1779: for (offset = startoffset;
1780: bytes > 0;
1781: offset += iobytes, bytes -= iobytes) {
1.36 uebayasi 1782: int run;
1783: daddr_t lbn, blkno;
1784: struct vnode *devvp;
1785:
1786: /*
1787: * bmap the file to find out the blkno to read from and
1788: * how much we can read in one i/o. if bmap returns an error,
1789: * skip the rest of the top-level i/o.
1790: */
1791:
1.1 pooka 1792: lbn = offset >> fs_bshift;
1793: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1794: if (error) {
1.36 uebayasi 1795: UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
1796: lbn,error,0,0);
1.1 pooka 1797: skipbytes += bytes;
1798: bytes = 0;
1.36 uebayasi 1799: goto loopdone;
1.1 pooka 1800: }
1801:
1.36 uebayasi 1802: /*
1803: * see how many pages can be read with this i/o.
1804: * reduce the i/o size if necessary to avoid
1805: * overwriting pages with valid data.
1806: */
1807:
1.1 pooka 1808: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1809: bytes);
1.36 uebayasi 1810:
1811: /*
1812: * if this block isn't allocated, zero it instead of
1813: * reading it. unless we are going to allocate blocks,
1814: * mark the pages we zeroed PG_RDONLY.
1815: */
1816:
1.1 pooka 1817: if (blkno == (daddr_t)-1) {
1.35 uebayasi 1818: if (!iowrite) {
1.1 pooka 1819: memset((char *)kva + (offset - startoffset), 0,
1.36 uebayasi 1820: iobytes);
1.1 pooka 1821: }
1822: skipbytes += iobytes;
1823: continue;
1824: }
1825:
1.36 uebayasi 1826: /*
1827: * allocate a sub-buf for this piece of the i/o
1828: * (or just use mbp if there's only 1 piece),
1829: * and start it going.
1830: */
1831:
1.1 pooka 1832: if (offset == startoffset && iobytes == bytes) {
1833: bp = mbp;
1834: } else {
1835: UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1836: vp, bp, vp->v_numoutput, 0);
1.2 ad 1837: bp = getiobuf(vp, true);
1.1 pooka 1838: nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
1839: }
1840: bp->b_lblkno = 0;
1841:
1842: /* adjust physical blkno for partial blocks */
1843: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1844: dev_bshift);
1.36 uebayasi 1845:
1.1 pooka 1846: UVMHIST_LOG(ubchist,
1.36 uebayasi 1847: "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
1848: bp, offset, bp->b_bcount, bp->b_blkno);
1.1 pooka 1849:
1850: VOP_STRATEGY(devvp, bp);
1851: }
1.36 uebayasi 1852:
1853: loopdone:
1.1 pooka 1854: if (skipbytes) {
1855: UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1856: }
1857: nestiobuf_done(mbp, skipbytes, error);
1858: if (async) {
1859: UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1860: return (0);
1861: }
1862: UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1863: error = biowait(mbp);
1864: s = splbio();
1865: (*iodone)(mbp);
1866: splx(s);
1867: UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1868: return (error);
1869: }
1870:
1871: int
1872: genfs_compat_getpages(void *v)
1873: {
1874: struct vop_getpages_args /* {
1875: struct vnode *a_vp;
1876: voff_t a_offset;
1877: struct vm_page **a_m;
1878: int *a_count;
1879: int a_centeridx;
1880: vm_prot_t a_access_type;
1881: int a_advice;
1882: int a_flags;
1883: } */ *ap = v;
1884:
1885: off_t origoffset;
1886: struct vnode *vp = ap->a_vp;
1887: struct uvm_object *uobj = &vp->v_uobj;
1888: struct vm_page *pg, **pgs;
1889: vaddr_t kva;
1890: int i, error, orignpages, npages;
1891: struct iovec iov;
1892: struct uio uio;
1893: kauth_cred_t cred = curlwp->l_cred;
1.35 uebayasi 1894: const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.1 pooka 1895:
1896: error = 0;
1897: origoffset = ap->a_offset;
1898: orignpages = *ap->a_count;
1899: pgs = ap->a_m;
1900:
1901: if (ap->a_flags & PGO_LOCKED) {
1902: uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1.35 uebayasi 1903: UFP_NOWAIT|UFP_NOALLOC| (memwrite ? UFP_NORDONLY : 0));
1.1 pooka 1904:
1.36.2.21 uebayasi 1905: error = ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
1906: if (error == 0 && memwrite) {
1907: genfs_markdirty(vp);
1908: }
1909: return error;
1.1 pooka 1910: }
1911: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
1.2 ad 1912: mutex_exit(&uobj->vmobjlock);
1.36.2.21 uebayasi 1913: return EINVAL;
1.1 pooka 1914: }
1915: if ((ap->a_flags & PGO_SYNCIO) == 0) {
1.2 ad 1916: mutex_exit(&uobj->vmobjlock);
1.1 pooka 1917: return 0;
1918: }
1919: npages = orignpages;
1920: uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
1.2 ad 1921: mutex_exit(&uobj->vmobjlock);
1.1 pooka 1922: kva = uvm_pagermapin(pgs, npages,
1923: UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1924: for (i = 0; i < npages; i++) {
1925: pg = pgs[i];
1926: if ((pg->flags & PG_FAKE) == 0) {
1927: continue;
1928: }
1929: iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
1930: iov.iov_len = PAGE_SIZE;
1931: uio.uio_iov = &iov;
1932: uio.uio_iovcnt = 1;
1933: uio.uio_offset = origoffset + (i << PAGE_SHIFT);
1934: uio.uio_rw = UIO_READ;
1935: uio.uio_resid = PAGE_SIZE;
1936: UIO_SETUP_SYSSPACE(&uio);
1937: /* XXX vn_lock */
1938: error = VOP_READ(vp, &uio, 0, cred);
1939: if (error) {
1940: break;
1941: }
1942: if (uio.uio_resid) {
1943: memset(iov.iov_base, 0, uio.uio_resid);
1944: }
1945: }
1946: uvm_pagermapout(kva, npages);
1.2 ad 1947: mutex_enter(&uobj->vmobjlock);
1948: mutex_enter(&uvm_pageqlock);
1.1 pooka 1949: for (i = 0; i < npages; i++) {
1950: pg = pgs[i];
1951: if (error && (pg->flags & PG_FAKE) != 0) {
1952: pg->flags |= PG_RELEASED;
1953: } else {
1954: pmap_clear_modify(pg);
1955: uvm_pageactivate(pg);
1956: }
1957: }
1958: if (error) {
1959: uvm_page_unbusy(pgs, npages);
1960: }
1.2 ad 1961: mutex_exit(&uvm_pageqlock);
1.36.2.21 uebayasi 1962: if (error == 0 && memwrite) {
1963: genfs_markdirty(vp);
1964: }
1.2 ad 1965: mutex_exit(&uobj->vmobjlock);
1.36.2.21 uebayasi 1966: return error;
1.1 pooka 1967: }
1968:
1969: int
1970: genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
1971: int flags)
1972: {
1973: off_t offset;
1974: struct iovec iov;
1975: struct uio uio;
1976: kauth_cred_t cred = curlwp->l_cred;
1977: struct buf *bp;
1978: vaddr_t kva;
1.2 ad 1979: int error;
1.1 pooka 1980:
1981: offset = pgs[0]->offset;
1982: kva = uvm_pagermapin(pgs, npages,
1983: UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1984:
1985: iov.iov_base = (void *)kva;
1986: iov.iov_len = npages << PAGE_SHIFT;
1987: uio.uio_iov = &iov;
1988: uio.uio_iovcnt = 1;
1989: uio.uio_offset = offset;
1990: uio.uio_rw = UIO_WRITE;
1991: uio.uio_resid = npages << PAGE_SHIFT;
1992: UIO_SETUP_SYSSPACE(&uio);
1993: /* XXX vn_lock */
1994: error = VOP_WRITE(vp, &uio, 0, cred);
1995:
1.2 ad 1996: mutex_enter(&vp->v_interlock);
1997: vp->v_numoutput++;
1998: mutex_exit(&vp->v_interlock);
1.1 pooka 1999:
1.2 ad 2000: bp = getiobuf(vp, true);
2001: bp->b_cflags = BC_BUSY | BC_AGE;
1.1 pooka 2002: bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
2003: bp->b_data = (char *)kva;
2004: bp->b_bcount = npages << PAGE_SHIFT;
2005: bp->b_bufsize = npages << PAGE_SHIFT;
2006: bp->b_resid = 0;
2007: bp->b_error = error;
2008: uvm_aio_aiodone(bp);
2009: return (error);
2010: }
2011:
2012: /*
2013: * Process a uio using direct I/O. If we reach a part of the request
2014: * which cannot be processed in this fashion for some reason, just return.
2015: * The caller must handle some additional part of the request using
2016: * buffered I/O before trying direct I/O again.
2017: */
2018:
2019: void
2020: genfs_directio(struct vnode *vp, struct uio *uio, int ioflag)
2021: {
2022: struct vmspace *vs;
2023: struct iovec *iov;
2024: vaddr_t va;
2025: size_t len;
2026: const int mask = DEV_BSIZE - 1;
2027: int error;
1.16 joerg 2028: bool need_wapbl = (vp->v_mount && vp->v_mount->mnt_wapbl &&
2029: (ioflag & IO_JOURNALLOCKED) == 0);
1.1 pooka 2030:
2031: /*
2032: * We only support direct I/O to user space for now.
2033: */
2034:
2035: if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
2036: return;
2037: }
2038:
2039: /*
2040: * If the vnode is mapped, we would need to get the getpages lock
2041: * to stabilize the bmap, but then we would get into trouble whil e
2042: * locking the pages if the pages belong to this same vnode (or a
2043: * multi-vnode cascade to the same effect). Just fall back to
2044: * buffered I/O if the vnode is mapped to avoid this mess.
2045: */
2046:
2047: if (vp->v_vflag & VV_MAPPED) {
2048: return;
2049: }
2050:
1.16 joerg 2051: if (need_wapbl) {
1.13 hannken 2052: error = WAPBL_BEGIN(vp->v_mount);
2053: if (error)
2054: return;
2055: }
2056:
1.1 pooka 2057: /*
2058: * Do as much of the uio as possible with direct I/O.
2059: */
2060:
2061: vs = uio->uio_vmspace;
2062: while (uio->uio_resid) {
2063: iov = uio->uio_iov;
2064: if (iov->iov_len == 0) {
2065: uio->uio_iov++;
2066: uio->uio_iovcnt--;
2067: continue;
2068: }
2069: va = (vaddr_t)iov->iov_base;
2070: len = MIN(iov->iov_len, genfs_maxdio);
2071: len &= ~mask;
2072:
2073: /*
2074: * If the next chunk is smaller than DEV_BSIZE or extends past
2075: * the current EOF, then fall back to buffered I/O.
2076: */
2077:
2078: if (len == 0 || uio->uio_offset + len > vp->v_size) {
1.13 hannken 2079: break;
1.1 pooka 2080: }
2081:
2082: /*
2083: * Check alignment. The file offset must be at least
2084: * sector-aligned. The exact constraint on memory alignment
2085: * is very hardware-dependent, but requiring sector-aligned
2086: * addresses there too is safe.
2087: */
2088:
2089: if (uio->uio_offset & mask || va & mask) {
1.13 hannken 2090: break;
1.1 pooka 2091: }
2092: error = genfs_do_directio(vs, va, len, vp, uio->uio_offset,
2093: uio->uio_rw);
2094: if (error) {
2095: break;
2096: }
2097: iov->iov_base = (char *)iov->iov_base + len;
2098: iov->iov_len -= len;
2099: uio->uio_offset += len;
2100: uio->uio_resid -= len;
2101: }
1.13 hannken 2102:
1.16 joerg 2103: if (need_wapbl)
1.13 hannken 2104: WAPBL_END(vp->v_mount);
1.1 pooka 2105: }
2106:
2107: /*
2108: * Iodone routine for direct I/O. We don't do much here since the request is
2109: * always synchronous, so the caller will do most of the work after biowait().
2110: */
2111:
2112: static void
2113: genfs_dio_iodone(struct buf *bp)
2114: {
2115:
2116: KASSERT((bp->b_flags & B_ASYNC) == 0);
1.2 ad 2117: if ((bp->b_flags & B_READ) == 0 && (bp->b_cflags & BC_AGE) != 0) {
2118: mutex_enter(bp->b_objlock);
1.1 pooka 2119: vwakeup(bp);
1.2 ad 2120: mutex_exit(bp->b_objlock);
1.1 pooka 2121: }
2122: putiobuf(bp);
2123: }
2124:
2125: /*
2126: * Process one chunk of a direct I/O request.
2127: */
2128:
2129: static int
2130: genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
2131: off_t off, enum uio_rw rw)
2132: {
2133: struct vm_map *map;
2134: struct pmap *upm, *kpm;
2135: size_t klen = round_page(uva + len) - trunc_page(uva);
2136: off_t spoff, epoff;
2137: vaddr_t kva, puva;
2138: paddr_t pa;
2139: vm_prot_t prot;
2140: int error, rv, poff, koff;
1.13 hannken 2141: const int pgoflags = PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED |
1.1 pooka 2142: (rw == UIO_WRITE ? PGO_FREE : 0);
2143:
2144: /*
2145: * For writes, verify that this range of the file already has fully
2146: * allocated backing store. If there are any holes, just punt and
2147: * make the caller take the buffered write path.
2148: */
2149:
2150: if (rw == UIO_WRITE) {
2151: daddr_t lbn, elbn, blkno;
2152: int bsize, bshift, run;
2153:
2154: bshift = vp->v_mount->mnt_fs_bshift;
2155: bsize = 1 << bshift;
2156: lbn = off >> bshift;
2157: elbn = (off + len + bsize - 1) >> bshift;
2158: while (lbn < elbn) {
2159: error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
2160: if (error) {
2161: return error;
2162: }
2163: if (blkno == (daddr_t)-1) {
2164: return ENOSPC;
2165: }
2166: lbn += 1 + run;
2167: }
2168: }
2169:
2170: /*
2171: * Flush any cached pages for parts of the file that we're about to
2172: * access. If we're writing, invalidate pages as well.
2173: */
2174:
2175: spoff = trunc_page(off);
2176: epoff = round_page(off + len);
1.2 ad 2177: mutex_enter(&vp->v_interlock);
1.1 pooka 2178: error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags);
2179: if (error) {
2180: return error;
2181: }
2182:
2183: /*
2184: * Wire the user pages and remap them into kernel memory.
2185: */
2186:
2187: prot = rw == UIO_READ ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
2188: error = uvm_vslock(vs, (void *)uva, len, prot);
2189: if (error) {
2190: return error;
2191: }
2192:
2193: map = &vs->vm_map;
2194: upm = vm_map_pmap(map);
2195: kpm = vm_map_pmap(kernel_map);
2196: kva = uvm_km_alloc(kernel_map, klen, 0,
2197: UVM_KMF_VAONLY | UVM_KMF_WAITVA);
2198: puva = trunc_page(uva);
2199: for (poff = 0; poff < klen; poff += PAGE_SIZE) {
2200: rv = pmap_extract(upm, puva + poff, &pa);
2201: KASSERT(rv);
2202: pmap_enter(kpm, kva + poff, pa, prot, prot | PMAP_WIRED);
2203: }
2204: pmap_update(kpm);
2205:
2206: /*
2207: * Do the I/O.
2208: */
2209:
2210: koff = uva - trunc_page(uva);
2211: error = genfs_do_io(vp, off, kva + koff, len, PGO_SYNCIO, rw,
2212: genfs_dio_iodone);
2213:
2214: /*
2215: * Tear down the kernel mapping.
2216: */
2217:
2218: pmap_remove(kpm, kva, kva + klen);
2219: pmap_update(kpm);
2220: uvm_km_free(kernel_map, kva, klen, UVM_KMF_VAONLY);
2221:
2222: /*
2223: * Unwire the user pages.
2224: */
2225:
2226: uvm_vsunlock(vs, (void *)uva, len);
2227: return error;
2228: }
1.2 ad 2229:
CVSweb <webmaster@jp.NetBSD.org>