Annotation of src/sys/miscfs/genfs/genfs_vnops.c, Revision 1.151
1.151 ! perseant 1: /* $NetBSD: genfs_vnops.c,v 1.150 2007/03/04 06:03:14 christos Exp $ */
1.6 fvdl 2:
3: /*
4: * Copyright (c) 1982, 1986, 1989, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
1.81 agc 15: * 3. Neither the name of the University nor the names of its contributors
1.6 fvdl 16: * may be used to endorse or promote products derived from this software
17: * without specific prior written permission.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29: * SUCH DAMAGE.
30: *
31: */
1.40 lukem 32:
33: #include <sys/cdefs.h>
1.151 ! perseant 34: __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.150 2007/03/04 06:03:14 christos Exp $");
1.8 thorpej 35:
1.1 mycroft 36: #include <sys/param.h>
37: #include <sys/systm.h>
1.6 fvdl 38: #include <sys/proc.h>
1.1 mycroft 39: #include <sys/kernel.h>
40: #include <sys/mount.h>
41: #include <sys/namei.h>
42: #include <sys/vnode.h>
1.13 wrstuden 43: #include <sys/fcntl.h>
1.135 yamt 44: #include <sys/kmem.h>
1.3 mycroft 45: #include <sys/poll.h>
1.37 chs 46: #include <sys/mman.h>
1.66 jdolecek 47: #include <sys/file.h>
1.125 elad 48: #include <sys/kauth.h>
1.143 hannken 49: #include <sys/fstrans.h>
1.1 mycroft 50:
51: #include <miscfs/genfs/genfs.h>
1.37 chs 52: #include <miscfs/genfs/genfs_node.h>
1.6 fvdl 53: #include <miscfs/specfs/specdev.h>
1.1 mycroft 54:
1.21 chs 55: #include <uvm/uvm.h>
56: #include <uvm/uvm_pager.h>
57:
1.130 chs 58: static int genfs_do_directio(struct vmspace *, vaddr_t, size_t, struct vnode *,
59: off_t, enum uio_rw);
60: static void genfs_dio_iodone(struct buf *);
61:
62: static int genfs_do_io(struct vnode *, off_t, vaddr_t, size_t, int, enum uio_rw,
63: void (*)(struct buf *));
1.118 perry 64: static inline void genfs_rel_pages(struct vm_page **, int);
1.70 christos 65: static void filt_genfsdetach(struct knote *);
66: static int filt_genfsread(struct knote *, long);
67: static int filt_genfsvnode(struct knote *, long);
68:
1.110 yamt 69: #define MAX_READ_PAGES 16 /* XXXUBC 16 */
1.41 christos 70:
1.130 chs 71: int genfs_maxdio = MAXPHYS;
72:
1.1 mycroft 73: int
1.53 enami 74: genfs_poll(void *v)
1.1 mycroft 75: {
1.3 mycroft 76: struct vop_poll_args /* {
1.1 mycroft 77: struct vnode *a_vp;
1.3 mycroft 78: int a_events;
1.116 christos 79: struct lwp *a_l;
1.1 mycroft 80: } */ *ap = v;
81:
1.3 mycroft 82: return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1.1 mycroft 83: }
84:
85: int
1.53 enami 86: genfs_seek(void *v)
1.4 kleink 87: {
88: struct vop_seek_args /* {
89: struct vnode *a_vp;
90: off_t a_oldoff;
91: off_t a_newoff;
1.125 elad 92: kauth_cred_t cred;
1.4 kleink 93: } */ *ap = v;
94:
95: if (ap->a_newoff < 0)
96: return (EINVAL);
97:
98: return (0);
99: }
100:
101: int
1.53 enami 102: genfs_abortop(void *v)
1.1 mycroft 103: {
104: struct vop_abortop_args /* {
105: struct vnode *a_dvp;
106: struct componentname *a_cnp;
107: } */ *ap = v;
1.53 enami 108:
1.1 mycroft 109: if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1.19 thorpej 110: PNBUF_PUT(ap->a_cnp->cn_pnbuf);
1.1 mycroft 111: return (0);
1.13 wrstuden 112: }
113:
114: int
1.53 enami 115: genfs_fcntl(void *v)
1.13 wrstuden 116: {
117: struct vop_fcntl_args /* {
118: struct vnode *a_vp;
119: u_int a_command;
1.150 christos 120: void *a_data;
1.13 wrstuden 121: int a_fflag;
1.125 elad 122: kauth_cred_t a_cred;
1.116 christos 123: struct lwp *a_l;
1.13 wrstuden 124: } */ *ap = v;
125:
126: if (ap->a_command == F_SETFL)
127: return (0);
128: else
129: return (EOPNOTSUPP);
1.1 mycroft 130: }
131:
132: /*ARGSUSED*/
133: int
1.138 christos 134: genfs_badop(void *v)
1.1 mycroft 135: {
136:
137: panic("genfs: bad op");
138: }
139:
140: /*ARGSUSED*/
141: int
1.138 christos 142: genfs_nullop(void *v)
1.1 mycroft 143: {
144:
145: return (0);
1.10 kleink 146: }
147:
148: /*ARGSUSED*/
149: int
1.138 christos 150: genfs_einval(void *v)
1.10 kleink 151: {
152:
153: return (EINVAL);
1.1 mycroft 154: }
155:
1.12 wrstuden 156: /*
1.74 jdolecek 157: * Called when an fs doesn't support a particular vop.
158: * This takes care to vrele, vput, or vunlock passed in vnodes.
1.12 wrstuden 159: */
160: int
1.75 jdolecek 161: genfs_eopnotsupp(void *v)
1.12 wrstuden 162: {
163: struct vop_generic_args /*
164: struct vnodeop_desc *a_desc;
1.53 enami 165: / * other random data follows, presumably * /
1.12 wrstuden 166: } */ *ap = v;
167: struct vnodeop_desc *desc = ap->a_desc;
1.74 jdolecek 168: struct vnode *vp, *vp_last = NULL;
1.12 wrstuden 169: int flags, i, j, offset;
170:
171: flags = desc->vdesc_flags;
172: for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
173: if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
174: break; /* stop at end of list */
175: if ((j = flags & VDESC_VP0_WILLPUT)) {
1.53 enami 176: vp = *VOPARG_OFFSETTO(struct vnode **, offset, ap);
1.74 jdolecek 177:
178: /* Skip if NULL */
179: if (!vp)
180: continue;
181:
1.12 wrstuden 182: switch (j) {
183: case VDESC_VP0_WILLPUT:
1.74 jdolecek 184: /* Check for dvp == vp cases */
185: if (vp == vp_last)
186: vrele(vp);
187: else {
188: vput(vp);
189: vp_last = vp;
190: }
1.12 wrstuden 191: break;
192: case VDESC_VP0_WILLUNLOCK:
193: VOP_UNLOCK(vp, 0);
194: break;
195: case VDESC_VP0_WILLRELE:
196: vrele(vp);
197: break;
198: }
199: }
200: }
201:
202: return (EOPNOTSUPP);
203: }
204:
1.1 mycroft 205: /*ARGSUSED*/
206: int
1.138 christos 207: genfs_ebadf(void *v)
1.1 mycroft 208: {
209:
210: return (EBADF);
1.9 matthias 211: }
212:
213: /* ARGSUSED */
214: int
1.138 christos 215: genfs_enoioctl(void *v)
1.9 matthias 216: {
217:
1.51 atatat 218: return (EPASSTHROUGH);
1.6 fvdl 219: }
220:
221:
222: /*
1.15 fvdl 223: * Eliminate all activity associated with the requested vnode
1.6 fvdl 224: * and with all vnodes aliased to the requested vnode.
225: */
226: int
1.53 enami 227: genfs_revoke(void *v)
1.6 fvdl 228: {
229: struct vop_revoke_args /* {
230: struct vnode *a_vp;
231: int a_flags;
232: } */ *ap = v;
233: struct vnode *vp, *vq;
1.116 christos 234: struct lwp *l = curlwp; /* XXX */
1.6 fvdl 235:
236: #ifdef DIAGNOSTIC
237: if ((ap->a_flags & REVOKEALL) == 0)
238: panic("genfs_revoke: not revokeall");
239: #endif
240:
241: vp = ap->a_vp;
242: simple_lock(&vp->v_interlock);
243:
244: if (vp->v_flag & VALIASED) {
245: /*
246: * If a vgone (or vclean) is already in progress,
247: * wait until it is done and return.
248: */
249: if (vp->v_flag & VXLOCK) {
250: vp->v_flag |= VXWANT;
1.83 pk 251: ltsleep(vp, PINOD|PNORELOCK, "vop_revokeall", 0,
252: &vp->v_interlock);
1.6 fvdl 253: return (0);
254: }
255: /*
256: * Ensure that vp will not be vgone'd while we
257: * are eliminating its aliases.
258: */
259: vp->v_flag |= VXLOCK;
260: simple_unlock(&vp->v_interlock);
261: while (vp->v_flag & VALIASED) {
262: simple_lock(&spechash_slock);
263: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
264: if (vq->v_rdev != vp->v_rdev ||
265: vq->v_type != vp->v_type || vp == vq)
266: continue;
267: simple_unlock(&spechash_slock);
268: vgone(vq);
269: break;
270: }
271: if (vq == NULLVP)
272: simple_unlock(&spechash_slock);
273: }
274: /*
275: * Remove the lock so that vgone below will
276: * really eliminate the vnode after which time
277: * vgone will awaken any sleepers.
278: */
279: simple_lock(&vp->v_interlock);
280: vp->v_flag &= ~VXLOCK;
281: }
1.116 christos 282: vgonel(vp, l);
1.6 fvdl 283: return (0);
284: }
285:
286: /*
1.12 wrstuden 287: * Lock the node.
1.6 fvdl 288: */
289: int
1.53 enami 290: genfs_lock(void *v)
1.6 fvdl 291: {
292: struct vop_lock_args /* {
293: struct vnode *a_vp;
294: int a_flags;
295: } */ *ap = v;
296: struct vnode *vp = ap->a_vp;
297:
1.86 hannken 298: return (lockmgr(vp->v_vnlock, ap->a_flags, &vp->v_interlock));
1.6 fvdl 299: }
300:
301: /*
1.12 wrstuden 302: * Unlock the node.
1.6 fvdl 303: */
304: int
1.53 enami 305: genfs_unlock(void *v)
1.6 fvdl 306: {
307: struct vop_unlock_args /* {
308: struct vnode *a_vp;
309: int a_flags;
310: } */ *ap = v;
311: struct vnode *vp = ap->a_vp;
312:
1.86 hannken 313: return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE,
1.53 enami 314: &vp->v_interlock));
1.6 fvdl 315: }
316:
317: /*
1.12 wrstuden 318: * Return whether or not the node is locked.
1.6 fvdl 319: */
320: int
1.53 enami 321: genfs_islocked(void *v)
1.6 fvdl 322: {
323: struct vop_islocked_args /* {
324: struct vnode *a_vp;
325: } */ *ap = v;
326: struct vnode *vp = ap->a_vp;
327:
1.86 hannken 328: return (lockstatus(vp->v_vnlock));
1.12 wrstuden 329: }
330:
331: /*
332: * Stubs to use when there is no locking to be done on the underlying object.
333: */
334: int
1.53 enami 335: genfs_nolock(void *v)
1.12 wrstuden 336: {
337: struct vop_lock_args /* {
338: struct vnode *a_vp;
339: int a_flags;
1.116 christos 340: struct lwp *a_l;
1.12 wrstuden 341: } */ *ap = v;
342:
343: /*
344: * Since we are not using the lock manager, we must clear
345: * the interlock here.
346: */
347: if (ap->a_flags & LK_INTERLOCK)
348: simple_unlock(&ap->a_vp->v_interlock);
349: return (0);
350: }
351:
352: int
1.138 christos 353: genfs_nounlock(void *v)
1.12 wrstuden 354: {
1.53 enami 355:
1.12 wrstuden 356: return (0);
357: }
358:
359: int
1.138 christos 360: genfs_noislocked(void *v)
1.12 wrstuden 361: {
1.53 enami 362:
1.12 wrstuden 363: return (0);
1.8 thorpej 364: }
365:
366: /*
1.142 yamt 367: * Local lease check.
1.8 thorpej 368: */
369: int
1.53 enami 370: genfs_lease_check(void *v)
1.8 thorpej 371: {
372:
373: return (0);
1.34 chs 374: }
375:
376: int
1.138 christos 377: genfs_mmap(void *v)
1.34 chs 378: {
1.53 enami 379:
380: return (0);
1.21 chs 381: }
382:
1.118 perry 383: static inline void
1.63 enami 384: genfs_rel_pages(struct vm_page **pgs, int npages)
385: {
386: int i;
387:
388: for (i = 0; i < npages; i++) {
389: struct vm_page *pg = pgs[i];
390:
1.127 yamt 391: if (pg == NULL || pg == PGO_DONTCARE)
1.63 enami 392: continue;
393: if (pg->flags & PG_FAKE) {
394: pg->flags |= PG_RELEASED;
395: }
396: }
1.64 enami 397: uvm_lock_pageq();
1.63 enami 398: uvm_page_unbusy(pgs, npages);
1.64 enami 399: uvm_unlock_pageq();
1.63 enami 400: }
401:
1.21 chs 402: /*
403: * generic VM getpages routine.
404: * Return PG_BUSY pages for the given range,
405: * reading from backing store if necessary.
406: */
407:
408: int
1.53 enami 409: genfs_getpages(void *v)
1.21 chs 410: {
411: struct vop_getpages_args /* {
412: struct vnode *a_vp;
413: voff_t a_offset;
1.33 chs 414: struct vm_page **a_m;
1.21 chs 415: int *a_count;
416: int a_centeridx;
417: vm_prot_t a_access_type;
418: int a_advice;
419: int a_flags;
420: } */ *ap = v;
421:
1.30 chs 422: off_t newsize, diskeof, memeof;
1.124 yamt 423: off_t offset, origoffset, startoffset, endoffset;
1.21 chs 424: daddr_t lbn, blkno;
1.120 yamt 425: int i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
1.37 chs 426: int fs_bshift, fs_bsize, dev_bshift;
1.21 chs 427: int flags = ap->a_flags;
428: size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
429: vaddr_t kva;
430: struct buf *bp, *mbp;
431: struct vnode *vp = ap->a_vp;
1.36 chs 432: struct vnode *devvp;
1.37 chs 433: struct genfs_node *gp = VTOG(vp);
434: struct uvm_object *uobj = &vp->v_uobj;
1.110 yamt 435: struct vm_page *pg, **pgs, *pgs_onstack[MAX_READ_PAGES];
1.77 yamt 436: int pgs_size;
1.128 ad 437: kauth_cred_t cred = curlwp->l_cred; /* XXXUBC curlwp */
1.148 thorpej 438: bool async = (flags & PGO_SYNCIO) == 0;
439: bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.149 thorpej 440: bool sawhole = false;
441: bool has_trans = false;
1.148 thorpej 442: bool overwrite = (flags & PGO_OVERWRITE) != 0;
443: bool blockalloc = write && (flags & PGO_NOBLOCKALLOC) == 0;
1.126 yamt 444: voff_t origvsize;
1.21 chs 445: UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
446:
1.30 chs 447: UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
1.53 enami 448: vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
1.30 chs 449:
1.121 reinoud 450: KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
451: vp->v_type == VLNK || vp->v_type == VBLK);
1.109 yamt 452:
1.21 chs 453: /* XXXUBC temp limit */
1.110 yamt 454: if (*ap->a_count > MAX_READ_PAGES) {
1.37 chs 455: panic("genfs_getpages: too many pages");
1.21 chs 456: }
457:
1.143 hannken 458: pgs = pgs_onstack;
459: pgs_size = sizeof(pgs_onstack);
460:
1.126 yamt 461: startover:
1.26 chs 462: error = 0;
1.126 yamt 463: origvsize = vp->v_size;
1.26 chs 464: origoffset = ap->a_offset;
465: orignpages = *ap->a_count;
1.123 yamt 466: GOP_SIZE(vp, vp->v_size, &diskeof, 0);
1.26 chs 467: if (flags & PGO_PASTEOF) {
1.37 chs 468: newsize = MAX(vp->v_size,
1.53 enami 469: origoffset + (orignpages << PAGE_SHIFT));
1.123 yamt 470: GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM);
1.26 chs 471: } else {
1.123 yamt 472: GOP_SIZE(vp, vp->v_size, &memeof, GOP_SIZE_MEM);
1.21 chs 473: }
1.30 chs 474: KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
475: KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
476: KASSERT(orignpages > 0);
1.95 chs 477:
478: /*
479: * Bounds-check the request.
480: */
481:
482: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
483: if ((flags & PGO_LOCKED) == 0) {
484: simple_unlock(&uobj->vmobjlock);
485: }
486: UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
487: origoffset, *ap->a_count, memeof,0);
1.143 hannken 488: error = EINVAL;
489: goto out_err;
1.95 chs 490: }
1.21 chs 491:
1.99 yamt 492: /* uobj is locked */
493:
1.103 yamt 494: if ((flags & PGO_NOTIMESTAMP) == 0 &&
1.121 reinoud 495: (vp->v_type != VBLK ||
1.103 yamt 496: (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
497: int updflags = 0;
498:
499: if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
500: updflags = GOP_UPDATE_ACCESSED;
501: }
502: if (write) {
503: updflags |= GOP_UPDATE_MODIFIED;
504: }
505: if (updflags != 0) {
506: GOP_MARKUPDATE(vp, updflags);
507: }
508: }
509:
1.101 yamt 510: if (write) {
511: gp->g_dirtygen++;
512: if ((vp->v_flag & VONWORKLST) == 0) {
513: vn_syncer_add_to_worklist(vp, filedelay);
514: }
1.103 yamt 515: if ((vp->v_flag & (VWRITEMAP|VWRITEMAPDIRTY)) == VWRITEMAP) {
516: vp->v_flag |= VWRITEMAPDIRTY;
517: }
1.99 yamt 518: }
519:
1.21 chs 520: /*
521: * For PGO_LOCKED requests, just return whatever's in memory.
522: */
523:
524: if (flags & PGO_LOCKED) {
1.127 yamt 525: int nfound;
526:
527: npages = *ap->a_count;
528: #if defined(DEBUG)
529: for (i = 0; i < npages; i++) {
530: pg = ap->a_m[i];
531: KASSERT(pg == NULL || pg == PGO_DONTCARE);
532: }
533: #endif /* defined(DEBUG) */
534: nfound = uvn_findpages(uobj, origoffset, &npages,
535: ap->a_m, UFP_NOWAIT|UFP_NOALLOC|(write ? UFP_NORDONLY : 0));
536: KASSERT(npages == *ap->a_count);
537: if (nfound == 0) {
1.143 hannken 538: error = EBUSY;
539: goto out_err;
1.127 yamt 540: }
1.146 ad 541: if (!rw_tryenter(&gp->g_glock, RW_READER)) {
1.127 yamt 542: genfs_rel_pages(ap->a_m, npages);
543:
544: /*
545: * restore the array.
546: */
547:
548: for (i = 0; i < npages; i++) {
549: pg = ap->a_m[i];
1.21 chs 550:
1.127 yamt 551: if (pg != NULL || pg != PGO_DONTCARE) {
552: ap->a_m[i] = NULL;
553: }
554: }
555: } else {
1.146 ad 556: rw_exit(&gp->g_glock);
1.127 yamt 557: }
1.143 hannken 558: error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
559: goto out_err;
1.21 chs 560: }
1.126 yamt 561: simple_unlock(&uobj->vmobjlock);
1.21 chs 562:
563: /*
564: * find the requested pages and make some simple checks.
565: * leave space in the page array for a whole block.
566: */
567:
1.121 reinoud 568: if (vp->v_type != VBLK) {
1.36 chs 569: fs_bshift = vp->v_mount->mnt_fs_bshift;
570: dev_bshift = vp->v_mount->mnt_dev_bshift;
571: } else {
572: fs_bshift = DEV_BSHIFT;
573: dev_bshift = DEV_BSHIFT;
574: }
1.21 chs 575: fs_bsize = 1 << fs_bshift;
576:
1.30 chs 577: orignpages = MIN(orignpages,
578: round_page(memeof - origoffset) >> PAGE_SHIFT);
1.21 chs 579: npages = orignpages;
580: startoffset = origoffset & ~(fs_bsize - 1);
1.53 enami 581: endoffset = round_page((origoffset + (npages << PAGE_SHIFT) +
582: fs_bsize - 1) & ~(fs_bsize - 1));
1.30 chs 583: endoffset = MIN(endoffset, round_page(memeof));
1.21 chs 584: ridx = (origoffset - startoffset) >> PAGE_SHIFT;
585:
1.77 yamt 586: pgs_size = sizeof(struct vm_page *) *
587: ((endoffset - startoffset) >> PAGE_SHIFT);
588: if (pgs_size > sizeof(pgs_onstack)) {
1.135 yamt 589: pgs = kmem_zalloc(pgs_size, async ? KM_NOSLEEP : KM_SLEEP);
1.78 simonb 590: if (pgs == NULL) {
1.143 hannken 591: pgs = pgs_onstack;
592: error = ENOMEM;
593: goto out_err;
1.78 simonb 594: }
1.77 yamt 595: } else {
1.143 hannken 596: /* pgs == pgs_onstack */
1.77 yamt 597: memset(pgs, 0, pgs_size);
598: }
1.63 enami 599: UVMHIST_LOG(ubchist, "ridx %d npages %d startoff %ld endoff %ld",
600: ridx, npages, startoffset, endoffset);
1.126 yamt 601:
1.143 hannken 602: if (!has_trans &&
1.144 hannken 603: (error = fstrans_start(vp->v_mount, FSTRANS_SHARED)) != 0) {
1.143 hannken 604: goto out_err;
605: }
1.149 thorpej 606: has_trans = true;
1.143 hannken 607:
1.126 yamt 608: /*
609: * hold g_glock to prevent a race with truncate.
610: *
611: * check if our idea of v_size is still valid.
612: */
613:
614: if (blockalloc) {
1.146 ad 615: rw_enter(&gp->g_glock, RW_WRITER);
1.126 yamt 616: } else {
1.146 ad 617: rw_enter(&gp->g_glock, RW_READER);
1.126 yamt 618: }
619: simple_lock(&uobj->vmobjlock);
620: if (vp->v_size < origvsize) {
1.146 ad 621: rw_exit(&gp->g_glock);
1.126 yamt 622: if (pgs != pgs_onstack)
1.135 yamt 623: kmem_free(pgs, pgs_size);
1.126 yamt 624: goto startover;
625: }
626:
1.63 enami 627: if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx],
628: async ? UFP_NOWAIT : UFP_ALL) != orignpages) {
1.146 ad 629: rw_exit(&gp->g_glock);
1.63 enami 630: KASSERT(async != 0);
631: genfs_rel_pages(&pgs[ridx], orignpages);
632: simple_unlock(&uobj->vmobjlock);
1.143 hannken 633: error = EBUSY;
634: goto out_err;
1.63 enami 635: }
1.21 chs 636:
637: /*
638: * if the pages are already resident, just return them.
639: */
640:
641: for (i = 0; i < npages; i++) {
1.97 christos 642: struct vm_page *pg1 = pgs[ridx + i];
1.21 chs 643:
1.97 christos 644: if ((pg1->flags & PG_FAKE) ||
1.100 yamt 645: (blockalloc && (pg1->flags & PG_RDONLY))) {
1.21 chs 646: break;
647: }
648: }
649: if (i == npages) {
1.146 ad 650: rw_exit(&gp->g_glock);
1.21 chs 651: UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
1.26 chs 652: npages += ridx;
1.110 yamt 653: goto out;
1.21 chs 654: }
655:
656: /*
1.37 chs 657: * if PGO_OVERWRITE is set, don't bother reading the pages.
658: */
659:
1.124 yamt 660: if (overwrite) {
1.146 ad 661: rw_exit(&gp->g_glock);
1.37 chs 662: UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
663:
664: for (i = 0; i < npages; i++) {
1.97 christos 665: struct vm_page *pg1 = pgs[ridx + i];
1.37 chs 666:
1.97 christos 667: pg1->flags &= ~(PG_RDONLY|PG_CLEAN);
1.37 chs 668: }
669: npages += ridx;
670: goto out;
671: }
672:
673: /*
1.21 chs 674: * the page wasn't resident and we're not overwriting,
675: * so we're going to have to do some i/o.
676: * find any additional pages needed to cover the expanded range.
677: */
678:
1.35 chs 679: npages = (endoffset - startoffset) >> PAGE_SHIFT;
680: if (startoffset != origoffset || npages != orignpages) {
1.21 chs 681:
682: /*
1.37 chs 683: * we need to avoid deadlocks caused by locking
1.21 chs 684: * additional pages at lower offsets than pages we
1.37 chs 685: * already have locked. unlock them all and start over.
1.21 chs 686: */
687:
1.63 enami 688: genfs_rel_pages(&pgs[ridx], orignpages);
1.77 yamt 689: memset(pgs, 0, pgs_size);
1.21 chs 690:
691: UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
1.53 enami 692: startoffset, endoffset, 0,0);
1.21 chs 693: npgs = npages;
1.63 enami 694: if (uvn_findpages(uobj, startoffset, &npgs, pgs,
695: async ? UFP_NOWAIT : UFP_ALL) != npages) {
1.146 ad 696: rw_exit(&gp->g_glock);
1.63 enami 697: KASSERT(async != 0);
698: genfs_rel_pages(pgs, npages);
699: simple_unlock(&uobj->vmobjlock);
1.143 hannken 700: error = EBUSY;
701: goto out_err;
1.63 enami 702: }
1.21 chs 703: }
704: simple_unlock(&uobj->vmobjlock);
705:
706: /*
707: * read the desired page(s).
708: */
709:
710: totalbytes = npages << PAGE_SHIFT;
1.30 chs 711: bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
1.21 chs 712: tailbytes = totalbytes - bytes;
713: skipbytes = 0;
714:
1.53 enami 715: kva = uvm_pagermapin(pgs, npages,
716: UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1.21 chs 717:
1.119 yamt 718: mbp = getiobuf();
1.21 chs 719: mbp->b_bufsize = totalbytes;
720: mbp->b_data = (void *)kva;
721: mbp->b_resid = mbp->b_bcount = bytes;
1.65 fvdl 722: mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
1.37 chs 723: mbp->b_iodone = (async ? uvm_aio_biodone : 0);
1.21 chs 724: mbp->b_vp = vp;
1.120 yamt 725: if (async)
726: BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
727: else
728: BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
1.21 chs 729:
730: /*
1.31 chs 731: * if EOF is in the middle of the range, zero the part past EOF.
1.38 chs 732: * if the page including EOF is not PG_FAKE, skip over it since
733: * in that case it has valid data that we need to preserve.
1.21 chs 734: */
735:
1.31 chs 736: if (tailbytes > 0) {
1.38 chs 737: size_t tailstart = bytes;
738:
739: if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
740: tailstart = round_page(tailstart);
741: tailbytes -= tailstart - bytes;
742: }
1.37 chs 743: UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
1.53 enami 744: kva, tailstart, tailbytes,0);
1.38 chs 745: memset((void *)(kva + tailstart), 0, tailbytes);
1.21 chs 746: }
747:
748: /*
749: * now loop over the pages, reading as needed.
750: */
751:
752: bp = NULL;
753: for (offset = startoffset;
1.53 enami 754: bytes > 0;
755: offset += iobytes, bytes -= iobytes) {
1.21 chs 756:
757: /*
758: * skip pages which don't need to be read.
759: */
760:
761: pidx = (offset - startoffset) >> PAGE_SHIFT;
1.100 yamt 762: while ((pgs[pidx]->flags & PG_FAKE) == 0) {
1.21 chs 763: size_t b;
764:
1.24 chs 765: KASSERT((offset & (PAGE_SIZE - 1)) == 0);
1.100 yamt 766: if ((pgs[pidx]->flags & PG_RDONLY)) {
1.149 thorpej 767: sawhole = true;
1.100 yamt 768: }
1.26 chs 769: b = MIN(PAGE_SIZE, bytes);
1.21 chs 770: offset += b;
771: bytes -= b;
772: skipbytes += b;
773: pidx++;
774: UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
1.53 enami 775: offset, 0,0,0);
1.21 chs 776: if (bytes == 0) {
777: goto loopdone;
778: }
779: }
780:
781: /*
782: * bmap the file to find out the blkno to read from and
783: * how much we can read in one i/o. if bmap returns an error,
784: * skip the rest of the top-level i/o.
785: */
786:
787: lbn = offset >> fs_bshift;
1.36 chs 788: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21 chs 789: if (error) {
790: UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
1.53 enami 791: lbn, error,0,0);
1.21 chs 792: skipbytes += bytes;
793: goto loopdone;
794: }
795:
796: /*
797: * see how many pages can be read with this i/o.
798: * reduce the i/o size if necessary to avoid
799: * overwriting pages with valid data.
800: */
801:
1.26 chs 802: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
803: bytes);
1.21 chs 804: if (offset + iobytes > round_page(offset)) {
805: pcount = 1;
806: while (pidx + pcount < npages &&
1.53 enami 807: pgs[pidx + pcount]->flags & PG_FAKE) {
1.21 chs 808: pcount++;
809: }
1.26 chs 810: iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
1.53 enami 811: (offset - trunc_page(offset)));
1.21 chs 812: }
813:
814: /*
1.53 enami 815: * if this block isn't allocated, zero it instead of
1.100 yamt 816: * reading it. unless we are going to allocate blocks,
817: * mark the pages we zeroed PG_RDONLY.
1.21 chs 818: */
819:
820: if (blkno < 0) {
1.53 enami 821: int holepages = (round_page(offset + iobytes) -
822: trunc_page(offset)) >> PAGE_SHIFT;
1.21 chs 823: UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
824:
1.149 thorpej 825: sawhole = true;
1.21 chs 826: memset((char *)kva + (offset - startoffset), 0,
1.53 enami 827: iobytes);
1.21 chs 828: skipbytes += iobytes;
829:
1.35 chs 830: for (i = 0; i < holepages; i++) {
831: if (write) {
832: pgs[pidx + i]->flags &= ~PG_CLEAN;
1.100 yamt 833: }
834: if (!blockalloc) {
1.21 chs 835: pgs[pidx + i]->flags |= PG_RDONLY;
836: }
837: }
838: continue;
839: }
840:
841: /*
842: * allocate a sub-buf for this piece of the i/o
843: * (or just use mbp if there's only 1 piece),
844: * and start it going.
845: */
846:
847: if (offset == startoffset && iobytes == bytes) {
848: bp = mbp;
849: } else {
1.119 yamt 850: bp = getiobuf();
1.120 yamt 851: nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
1.21 chs 852: }
1.112 yamt 853: bp->b_lblkno = 0;
1.21 chs 854:
855: /* adjust physical blkno for partial blocks */
1.25 fvdl 856: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.53 enami 857: dev_bshift);
1.21 chs 858:
1.53 enami 859: UVMHIST_LOG(ubchist,
860: "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
861: bp, offset, iobytes, bp->b_blkno);
1.21 chs 862:
1.109 yamt 863: VOP_STRATEGY(devvp, bp);
1.21 chs 864: }
865:
866: loopdone:
1.120 yamt 867: nestiobuf_done(mbp, skipbytes, error);
1.21 chs 868: if (async) {
1.32 chs 869: UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
1.146 ad 870: rw_exit(&gp->g_glock);
1.143 hannken 871: error = 0;
872: goto out_err;
1.21 chs 873: }
874: if (bp != NULL) {
875: error = biowait(mbp);
876: }
1.119 yamt 877: putiobuf(mbp);
1.21 chs 878: uvm_pagermapout(kva, npages);
879:
880: /*
881: * if this we encountered a hole then we have to do a little more work.
882: * for read faults, we marked the page PG_RDONLY so that future
883: * write accesses to the page will fault again.
884: * for write faults, we must make sure that the backing store for
885: * the page is completely allocated while the pages are locked.
886: */
887:
1.100 yamt 888: if (!error && sawhole && blockalloc) {
1.37 chs 889: error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
1.53 enami 890: cred);
1.37 chs 891: UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
892: startoffset, npages << PAGE_SHIFT, error,0);
1.100 yamt 893: if (!error) {
894: for (i = 0; i < npages; i++) {
895: if (pgs[i] == NULL) {
896: continue;
897: }
898: pgs[i]->flags &= ~(PG_CLEAN|PG_RDONLY);
899: UVMHIST_LOG(ubchist, "mark dirty pg %p",
900: pgs[i],0,0,0);
901: }
902: }
1.21 chs 903: }
1.146 ad 904: rw_exit(&gp->g_glock);
1.21 chs 905: simple_lock(&uobj->vmobjlock);
906:
907: /*
908: * we're almost done! release the pages...
909: * for errors, we free the pages.
910: * otherwise we activate them and mark them as valid and clean.
911: * also, unbusy pages that were not actually requested.
912: */
913:
914: if (error) {
915: for (i = 0; i < npages; i++) {
916: if (pgs[i] == NULL) {
917: continue;
918: }
919: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.53 enami 920: pgs[i], pgs[i]->flags, 0,0);
1.26 chs 921: if (pgs[i]->flags & PG_FAKE) {
1.37 chs 922: pgs[i]->flags |= PG_RELEASED;
1.21 chs 923: }
924: }
1.37 chs 925: uvm_lock_pageq();
926: uvm_page_unbusy(pgs, npages);
1.21 chs 927: uvm_unlock_pageq();
928: simple_unlock(&uobj->vmobjlock);
929: UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
1.143 hannken 930: goto out_err;
1.21 chs 931: }
932:
1.37 chs 933: out:
1.21 chs 934: UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
1.143 hannken 935: error = 0;
1.26 chs 936: uvm_lock_pageq();
1.21 chs 937: for (i = 0; i < npages; i++) {
1.37 chs 938: pg = pgs[i];
939: if (pg == NULL) {
1.21 chs 940: continue;
941: }
942: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.53 enami 943: pg, pg->flags, 0,0);
1.37 chs 944: if (pg->flags & PG_FAKE && !overwrite) {
945: pg->flags &= ~(PG_FAKE);
1.21 chs 946: pmap_clear_modify(pgs[i]);
947: }
1.100 yamt 948: KASSERT(!write || !blockalloc || (pg->flags & PG_RDONLY) == 0);
1.21 chs 949: if (i < ridx || i >= ridx + orignpages || async) {
950: UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
1.53 enami 951: pg, pg->offset,0,0);
1.37 chs 952: if (pg->flags & PG_WANTED) {
953: wakeup(pg);
954: }
955: if (pg->flags & PG_FAKE) {
956: KASSERT(overwrite);
957: uvm_pagezero(pg);
958: }
959: if (pg->flags & PG_RELEASED) {
960: uvm_pagefree(pg);
1.26 chs 961: continue;
1.21 chs 962: }
1.129 yamt 963: uvm_pageenqueue(pg);
1.37 chs 964: pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
965: UVM_PAGE_OWN(pg, NULL);
1.21 chs 966: }
967: }
1.26 chs 968: uvm_unlock_pageq();
1.21 chs 969: simple_unlock(&uobj->vmobjlock);
970: if (ap->a_m != NULL) {
971: memcpy(ap->a_m, &pgs[ridx],
1.53 enami 972: orignpages * sizeof(struct vm_page *));
1.21 chs 973: }
1.143 hannken 974:
975: out_err:
1.77 yamt 976: if (pgs != pgs_onstack)
1.135 yamt 977: kmem_free(pgs, pgs_size);
1.143 hannken 978: if (has_trans)
979: fstrans_done(vp->v_mount);
980: return (error);
1.21 chs 981: }
982:
983: /*
984: * generic VM putpages routine.
985: * Write the given range of pages to backing store.
1.37 chs 986: *
987: * => "offhi == 0" means flush all pages at or after "offlo".
1.140 pooka 988: * => object should be locked by caller. we return with the
989: * object unlocked.
1.37 chs 990: * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
991: * thus, a caller might want to unlock higher level resources
992: * (e.g. vm_map) before calling flush.
1.140 pooka 993: * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, we will not block
1.37 chs 994: * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
995: * => NOTE: we rely on the fact that the object's memq is a TAILQ and
996: * that new pages are inserted on the tail end of the list. thus,
997: * we can make a complete pass through the object in one go by starting
998: * at the head and working towards the tail (new pages are put in
999: * front of us).
1000: * => NOTE: we are allowed to lock the page queues, so the caller
1001: * must not be holding the page queue lock.
1002: *
1003: * note on "cleaning" object and PG_BUSY pages:
1004: * this routine is holding the lock on the object. the only time
1005: * that it can run into a PG_BUSY page that it does not own is if
1006: * some other process has started I/O on the page (e.g. either
1007: * a pagein, or a pageout). if the PG_BUSY page is being paged
1008: * in, then it can not be dirty (!PG_CLEAN) because no one has
1009: * had a chance to modify it yet. if the PG_BUSY page is being
1010: * paged out then it means that someone else has already started
1.53 enami 1011: * cleaning the page for us (how nice!). in this case, if we
1.37 chs 1012: * have syncio specified, then after we make our pass through the
1.53 enami 1013: * object we need to wait for the other PG_BUSY pages to clear
1.37 chs 1014: * off (i.e. we need to do an iosync). also note that once a
1015: * page is PG_BUSY it must stay in its object until it is un-busyed.
1016: *
1017: * note on page traversal:
1018: * we can traverse the pages in an object either by going down the
1019: * linked list in "uobj->memq", or we can go over the address range
1020: * by page doing hash table lookups for each address. depending
1.53 enami 1021: * on how many pages are in the object it may be cheaper to do one
1.37 chs 1022: * or the other. we set "by_list" to true if we are using memq.
1023: * if the cost of a hash lookup was equal to the cost of the list
1024: * traversal we could compare the number of pages in the start->stop
1025: * range to the total number of pages in the object. however, it
1026: * seems that a hash table lookup is more expensive than the linked
1.53 enami 1027: * list traversal, so we multiply the number of pages in the
1.37 chs 1028: * range by an estimate of the relatively higher cost of the hash lookup.
1.21 chs 1029: */
1030:
1031: int
1.53 enami 1032: genfs_putpages(void *v)
1.21 chs 1033: {
1034: struct vop_putpages_args /* {
1035: struct vnode *a_vp;
1.37 chs 1036: voff_t a_offlo;
1037: voff_t a_offhi;
1.21 chs 1038: int a_flags;
1039: } */ *ap = v;
1.151 ! perseant 1040:
! 1041: return genfs_do_putpages(ap->a_vp, ap->a_offlo, ap->a_offhi,
! 1042: ap->a_flags, NULL);
! 1043: }
! 1044:
! 1045: int
! 1046: genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
! 1047: struct vm_page **busypg)
! 1048: {
1.37 chs 1049: struct uvm_object *uobj = &vp->v_uobj;
1.46 chs 1050: struct simplelock *slock = &uobj->vmobjlock;
1.37 chs 1051: off_t off;
1.76 tls 1052: /* Even for strange MAXPHYS, the shift rounds down to a page */
1.139 christos 1053: #define maxpages (MAXPHYS >> PAGE_SHIFT)
1.37 chs 1054: int i, s, error, npages, nback;
1055: int freeflag;
1.60 enami 1056: struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
1.148 thorpej 1057: bool wasclean, by_list, needs_clean, yld;
1058: bool async = (flags & PGO_SYNCIO) == 0;
1059: bool pagedaemon = curproc == uvm.pagedaemon_proc;
1.70 christos 1060: struct lwp *l = curlwp ? curlwp : &lwp0;
1.101 yamt 1061: struct genfs_node *gp = VTOG(vp);
1062: int dirtygen;
1.149 thorpej 1063: bool modified = false;
1064: bool has_trans = false;
1.148 thorpej 1065: bool cleanall;
1.70 christos 1066:
1.37 chs 1067: UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
1068:
1069: KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1070: KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
1071: KASSERT(startoff < endoff || endoff == 0);
1072:
1073: UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1074: vp, uobj->uo_npages, startoff, endoff - startoff);
1.103 yamt 1075:
1076: KASSERT((vp->v_flag & VONWORKLST) != 0 ||
1077: (vp->v_flag & VWRITEMAPDIRTY) == 0);
1.37 chs 1078: if (uobj->uo_npages == 0) {
1.62 perseant 1079: s = splbio();
1.103 yamt 1080: if (vp->v_flag & VONWORKLST) {
1081: vp->v_flag &= ~VWRITEMAPDIRTY;
1.137 reinoud 1082: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
1083: vn_syncer_remove_from_worklist(vp);
1.37 chs 1084: }
1.62 perseant 1085: splx(s);
1.46 chs 1086: simple_unlock(slock);
1.53 enami 1087: return (0);
1.37 chs 1088: }
1089:
1090: /*
1091: * the vnode has pages, set up to process the request.
1092: */
1093:
1.143 hannken 1094: if ((flags & PGO_CLEANIT) != 0) {
1095: simple_unlock(slock);
1096: if (pagedaemon)
1.144 hannken 1097: error = fstrans_start_nowait(vp->v_mount, FSTRANS_LAZY);
1.143 hannken 1098: else
1.144 hannken 1099: error = fstrans_start(vp->v_mount, FSTRANS_LAZY);
1.143 hannken 1100: if (error)
1101: return error;
1.149 thorpej 1102: has_trans = true;
1.143 hannken 1103: simple_lock(slock);
1104: }
1105:
1.37 chs 1106: error = 0;
1.44 chs 1107: s = splbio();
1.71 pk 1108: simple_lock(&global_v_numoutput_slock);
1.44 chs 1109: wasclean = (vp->v_numoutput == 0);
1.71 pk 1110: simple_unlock(&global_v_numoutput_slock);
1.44 chs 1111: splx(s);
1.37 chs 1112: off = startoff;
1113: if (endoff == 0 || flags & PGO_ALLPAGES) {
1114: endoff = trunc_page(LLONG_MAX);
1115: }
1116: by_list = (uobj->uo_npages <=
1117: ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
1118:
1.102 yamt 1119: #if !defined(DEBUG)
1120: /*
1121: * if this vnode is known not to have dirty pages,
1122: * don't bother to clean it out.
1123: */
1124:
1125: if ((vp->v_flag & VONWORKLST) == 0) {
1126: if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) {
1127: goto skip_scan;
1128: }
1129: flags &= ~PGO_CLEANIT;
1130: }
1131: #endif /* !defined(DEBUG) */
1132:
1.37 chs 1133: /*
1134: * start the loop. when scanning by list, hold the last page
1135: * in the list before we start. pages allocated after we start
1136: * will be added to the end of the list, so we can stop at the
1137: * current last page.
1138: */
1139:
1.104 yamt 1140: cleanall = (flags & PGO_CLEANIT) != 0 && wasclean &&
1141: startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
1142: (vp->v_flag & VONWORKLST) != 0;
1.101 yamt 1143: dirtygen = gp->g_dirtygen;
1.56 enami 1144: freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED;
1.37 chs 1145: if (by_list) {
1.113 yamt 1146: curmp.uobject = uobj;
1147: curmp.offset = (voff_t)-1;
1148: curmp.flags = PG_BUSY;
1149: endmp.uobject = uobj;
1150: endmp.offset = (voff_t)-1;
1151: endmp.flags = PG_BUSY;
1.37 chs 1152: pg = TAILQ_FIRST(&uobj->memq);
1153: TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
1.70 christos 1154: PHOLD(l);
1.37 chs 1155: } else {
1156: pg = uvm_pagelookup(uobj, off);
1157: }
1158: nextpg = NULL;
1159: while (by_list || off < endoff) {
1160:
1161: /*
1162: * if the current page is not interesting, move on to the next.
1163: */
1164:
1165: KASSERT(pg == NULL || pg->uobject == uobj);
1166: KASSERT(pg == NULL ||
1.53 enami 1167: (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1168: (pg->flags & PG_BUSY) != 0);
1.37 chs 1169: if (by_list) {
1170: if (pg == &endmp) {
1171: break;
1172: }
1173: if (pg->offset < startoff || pg->offset >= endoff ||
1174: pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1.101 yamt 1175: if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1.149 thorpej 1176: wasclean = false;
1.101 yamt 1177: }
1.37 chs 1178: pg = TAILQ_NEXT(pg, listq);
1179: continue;
1180: }
1181: off = pg->offset;
1.101 yamt 1182: } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1183: if (pg != NULL) {
1.149 thorpej 1184: wasclean = false;
1.101 yamt 1185: }
1.37 chs 1186: off += PAGE_SIZE;
1187: if (off < endoff) {
1188: pg = uvm_pagelookup(uobj, off);
1189: }
1190: continue;
1191: }
1.21 chs 1192:
1.37 chs 1193: /*
1194: * if the current page needs to be cleaned and it's busy,
1195: * wait for it to become unbusy.
1196: */
1197:
1.97 christos 1198: yld = (l->l_cpu->ci_schedstate.spc_flags &
1.56 enami 1199: SPCF_SHOULDYIELD) && !pagedaemon;
1.97 christos 1200: if (pg->flags & PG_BUSY || yld) {
1.72 perseant 1201: UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
1202: if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) {
1203: UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0);
1204: error = EDEADLK;
1.151 ! perseant 1205: if (busypg != NULL)
! 1206: *busypg = pg;
1.72 perseant 1207: break;
1208: }
1.56 enami 1209: KASSERT(!pagedaemon);
1.37 chs 1210: if (by_list) {
1211: TAILQ_INSERT_BEFORE(pg, &curmp, listq);
1212: UVMHIST_LOG(ubchist, "curmp next %p",
1.53 enami 1213: TAILQ_NEXT(&curmp, listq), 0,0,0);
1.37 chs 1214: }
1.97 christos 1215: if (yld) {
1.49 chs 1216: simple_unlock(slock);
1.145 ad 1217: preempt();
1.49 chs 1218: simple_lock(slock);
1219: } else {
1220: pg->flags |= PG_WANTED;
1221: UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
1222: simple_lock(slock);
1223: }
1.37 chs 1224: if (by_list) {
1225: UVMHIST_LOG(ubchist, "after next %p",
1.53 enami 1226: TAILQ_NEXT(&curmp, listq), 0,0,0);
1.37 chs 1227: pg = TAILQ_NEXT(&curmp, listq);
1228: TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1229: } else {
1230: pg = uvm_pagelookup(uobj, off);
1231: }
1232: continue;
1.49 chs 1233: }
1234:
1235: /*
1236: * if we're freeing, remove all mappings of the page now.
1237: * if we're cleaning, check if the page is needs to be cleaned.
1238: */
1239:
1240: if (flags & PGO_FREE) {
1241: pmap_page_protect(pg, VM_PROT_NONE);
1.101 yamt 1242: } else if (flags & PGO_CLEANIT) {
1243:
1244: /*
1245: * if we still have some hope to pull this vnode off
1246: * from the syncer queue, write-protect the page.
1247: */
1248:
1.104 yamt 1249: if (cleanall && wasclean &&
1250: gp->g_dirtygen == dirtygen) {
1251:
1252: /*
1253: * uobj pages get wired only by uvm_fault
1254: * where uobj is locked.
1255: */
1256:
1257: if (pg->wire_count == 0) {
1258: pmap_page_protect(pg,
1259: VM_PROT_READ|VM_PROT_EXECUTE);
1260: } else {
1.149 thorpej 1261: cleanall = false;
1.104 yamt 1262: }
1.101 yamt 1263: }
1.49 chs 1264: }
1.101 yamt 1265:
1.49 chs 1266: if (flags & PGO_CLEANIT) {
1267: needs_clean = pmap_clear_modify(pg) ||
1.53 enami 1268: (pg->flags & PG_CLEAN) == 0;
1.49 chs 1269: pg->flags |= PG_CLEAN;
1270: } else {
1.149 thorpej 1271: needs_clean = false;
1.37 chs 1272: }
1273:
1274: /*
1275: * if we're cleaning, build a cluster.
1276: * the cluster will consist of pages which are currently dirty,
1277: * but they will be returned to us marked clean.
1278: * if not cleaning, just operate on the one page.
1279: */
1280:
1281: if (needs_clean) {
1.101 yamt 1282: KDASSERT((vp->v_flag & VONWORKLST));
1.149 thorpej 1283: wasclean = false;
1.37 chs 1284: memset(pgs, 0, sizeof(pgs));
1285: pg->flags |= PG_BUSY;
1286: UVM_PAGE_OWN(pg, "genfs_putpages");
1287:
1288: /*
1289: * first look backward.
1290: */
1291:
1.60 enami 1292: npages = MIN(maxpages >> 1, off >> PAGE_SHIFT);
1.37 chs 1293: nback = npages;
1294: uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
1295: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
1296: if (nback) {
1297: memmove(&pgs[0], &pgs[npages - nback],
1298: nback * sizeof(pgs[0]));
1.47 enami 1299: if (npages - nback < nback)
1300: memset(&pgs[nback], 0,
1301: (npages - nback) * sizeof(pgs[0]));
1302: else
1303: memset(&pgs[npages - nback], 0,
1304: nback * sizeof(pgs[0]));
1.37 chs 1305: }
1306:
1307: /*
1308: * then plug in our page of interest.
1309: */
1310:
1311: pgs[nback] = pg;
1312:
1313: /*
1314: * then look forward to fill in the remaining space in
1315: * the array of pages.
1316: */
1317:
1.60 enami 1318: npages = maxpages - nback - 1;
1.37 chs 1319: uvn_findpages(uobj, off + PAGE_SIZE, &npages,
1320: &pgs[nback + 1],
1321: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
1322: npages += nback + 1;
1323: } else {
1324: pgs[0] = pg;
1325: npages = 1;
1.61 enami 1326: nback = 0;
1.37 chs 1327: }
1328:
1329: /*
1330: * apply FREE or DEACTIVATE options if requested.
1331: */
1332:
1333: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1334: uvm_lock_pageq();
1335: }
1336: for (i = 0; i < npages; i++) {
1337: tpg = pgs[i];
1338: KASSERT(tpg->uobject == uobj);
1.59 enami 1339: if (by_list && tpg == TAILQ_NEXT(pg, listq))
1340: pg = tpg;
1.91 enami 1341: if (tpg->offset < startoff || tpg->offset >= endoff)
1342: continue;
1.141 yamt 1343: if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) {
1.37 chs 1344: (void) pmap_clear_reference(tpg);
1345: uvm_pagedeactivate(tpg);
1346: } else if (flags & PGO_FREE) {
1347: pmap_page_protect(tpg, VM_PROT_NONE);
1348: if (tpg->flags & PG_BUSY) {
1349: tpg->flags |= freeflag;
1.56 enami 1350: if (pagedaemon) {
1.37 chs 1351: uvmexp.paging++;
1352: uvm_pagedequeue(tpg);
1353: }
1354: } else {
1.59 enami 1355:
1356: /*
1357: * ``page is not busy''
1358: * implies that npages is 1
1359: * and needs_clean is false.
1360: */
1361:
1.37 chs 1362: nextpg = TAILQ_NEXT(tpg, listq);
1363: uvm_pagefree(tpg);
1.89 enami 1364: if (pagedaemon)
1365: uvmexp.pdfreed++;
1.37 chs 1366: }
1367: }
1368: }
1369: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1370: uvm_unlock_pageq();
1371: }
1372: if (needs_clean) {
1.149 thorpej 1373: modified = true;
1.37 chs 1374:
1375: /*
1376: * start the i/o. if we're traversing by list,
1377: * keep our place in the list with a marker page.
1378: */
1379:
1380: if (by_list) {
1381: TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1382: listq);
1383: }
1.46 chs 1384: simple_unlock(slock);
1.37 chs 1385: error = GOP_WRITE(vp, pgs, npages, flags);
1.46 chs 1386: simple_lock(slock);
1.37 chs 1387: if (by_list) {
1388: pg = TAILQ_NEXT(&curmp, listq);
1389: TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1390: }
1391: if (error) {
1392: break;
1393: }
1394: if (by_list) {
1395: continue;
1396: }
1397: }
1398:
1399: /*
1400: * find the next page and continue if there was no error.
1401: */
1402:
1403: if (by_list) {
1404: if (nextpg) {
1405: pg = nextpg;
1406: nextpg = NULL;
1407: } else {
1408: pg = TAILQ_NEXT(pg, listq);
1409: }
1410: } else {
1.61 enami 1411: off += (npages - nback) << PAGE_SHIFT;
1.37 chs 1412: if (off < endoff) {
1413: pg = uvm_pagelookup(uobj, off);
1414: }
1415: }
1416: }
1417: if (by_list) {
1418: TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1.70 christos 1419: PRELE(l);
1.37 chs 1420: }
1421:
1.103 yamt 1422: if (modified && (vp->v_flag & VWRITEMAPDIRTY) != 0 &&
1.121 reinoud 1423: (vp->v_type != VBLK ||
1.103 yamt 1424: (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
1425: GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED);
1426: }
1427:
1.37 chs 1428: /*
1429: * if we're cleaning and there was nothing to clean,
1430: * take us off the syncer list. if we started any i/o
1431: * and we're doing sync i/o, wait for all writes to finish.
1432: */
1433:
1.62 perseant 1434: s = splbio();
1.104 yamt 1435: if (cleanall && wasclean && gp->g_dirtygen == dirtygen &&
1436: (vp->v_flag & VONWORKLST) != 0) {
1.103 yamt 1437: vp->v_flag &= ~VWRITEMAPDIRTY;
1.137 reinoud 1438: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
1439: vn_syncer_remove_from_worklist(vp);
1.37 chs 1440: }
1.62 perseant 1441: splx(s);
1.102 yamt 1442:
1443: #if !defined(DEBUG)
1444: skip_scan:
1445: #endif /* !defined(DEBUG) */
1.37 chs 1446: if (!wasclean && !async) {
1447: s = splbio();
1.71 pk 1448: /*
1449: * XXX - we want simple_unlock(&global_v_numoutput_slock);
1450: * but the slot in ltsleep() is taken!
1451: * XXX - try to recover from missed wakeups with a timeout..
1452: * must think of something better.
1453: */
1.37 chs 1454: while (vp->v_numoutput != 0) {
1455: vp->v_flag |= VBWAIT;
1.149 thorpej 1456: UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, false,
1.71 pk 1457: "genput2", hz);
1.46 chs 1458: simple_lock(slock);
1.37 chs 1459: }
1460: splx(s);
1461: }
1.140 pooka 1462: simple_unlock(slock);
1.143 hannken 1463:
1464: if (has_trans)
1465: fstrans_done(vp->v_mount);
1466:
1.53 enami 1467: return (error);
1.37 chs 1468: }
1469:
1470: int
1471: genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1472: {
1.130 chs 1473: off_t off;
1474: vaddr_t kva;
1475: size_t len;
1476: int error;
1477: UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
1478:
1479: UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1480: vp, pgs, npages, flags);
1481:
1482: off = pgs[0]->offset;
1483: kva = uvm_pagermapin(pgs, npages,
1484: UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1485: len = npages << PAGE_SHIFT;
1486:
1487: error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
1488: uvm_aio_biodone);
1489:
1490: return error;
1491: }
1492:
1493: /*
1494: * Backend routine for doing I/O to vnode pages. Pages are already locked
1495: * and mapped into kernel memory. Here we just look up the underlying
1496: * device block addresses and call the strategy routine.
1497: */
1498:
1499: static int
1500: genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
1501: enum uio_rw rw, void (*iodone)(struct buf *))
1502: {
1.37 chs 1503: int s, error, run;
1504: int fs_bshift, dev_bshift;
1.21 chs 1505: off_t eof, offset, startoffset;
1506: size_t bytes, iobytes, skipbytes;
1507: daddr_t lbn, blkno;
1508: struct buf *mbp, *bp;
1.36 chs 1509: struct vnode *devvp;
1.148 thorpej 1510: bool async = (flags & PGO_SYNCIO) == 0;
1511: bool write = rw == UIO_WRITE;
1.130 chs 1512: int brw = write ? B_WRITE : B_READ;
1513: UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
1.21 chs 1514:
1.130 chs 1515: UVMHIST_LOG(ubchist, "vp %p kva %p len 0x%x flags 0x%x",
1516: vp, kva, len, flags);
1.21 chs 1517:
1.123 yamt 1518: GOP_SIZE(vp, vp->v_size, &eof, 0);
1.121 reinoud 1519: if (vp->v_type != VBLK) {
1.36 chs 1520: fs_bshift = vp->v_mount->mnt_fs_bshift;
1521: dev_bshift = vp->v_mount->mnt_dev_bshift;
1522: } else {
1523: fs_bshift = DEV_BSHIFT;
1524: dev_bshift = DEV_BSHIFT;
1525: }
1.37 chs 1526: error = 0;
1.130 chs 1527: startoffset = off;
1528: bytes = MIN(len, eof - startoffset);
1.21 chs 1529: skipbytes = 0;
1530: KASSERT(bytes != 0);
1531:
1.130 chs 1532: if (write) {
1533: s = splbio();
1534: simple_lock(&global_v_numoutput_slock);
1535: vp->v_numoutput += 2;
1536: simple_unlock(&global_v_numoutput_slock);
1537: splx(s);
1538: }
1.119 yamt 1539: mbp = getiobuf();
1.21 chs 1540: UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1.53 enami 1541: vp, mbp, vp->v_numoutput, bytes);
1.130 chs 1542: mbp->b_bufsize = len;
1.21 chs 1543: mbp->b_data = (void *)kva;
1544: mbp->b_resid = mbp->b_bcount = bytes;
1.130 chs 1545: mbp->b_flags = B_BUSY | brw | B_AGE | (async ? (B_CALL | B_ASYNC) : 0);
1546: mbp->b_iodone = iodone;
1.21 chs 1547: mbp->b_vp = vp;
1.120 yamt 1548: if (curproc == uvm.pagedaemon_proc)
1549: BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
1550: else if (async)
1551: BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
1552: else
1553: BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
1.21 chs 1554:
1555: bp = NULL;
1556: for (offset = startoffset;
1.53 enami 1557: bytes > 0;
1558: offset += iobytes, bytes -= iobytes) {
1.21 chs 1559: lbn = offset >> fs_bshift;
1.36 chs 1560: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21 chs 1561: if (error) {
1562: UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
1563: skipbytes += bytes;
1564: bytes = 0;
1565: break;
1566: }
1567:
1.26 chs 1568: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1569: bytes);
1.21 chs 1570: if (blkno == (daddr_t)-1) {
1.130 chs 1571: if (!write) {
1572: memset((char *)kva + (offset - startoffset), 0,
1573: iobytes);
1574: }
1.21 chs 1575: skipbytes += iobytes;
1576: continue;
1577: }
1578:
1579: /* if it's really one i/o, don't make a second buf */
1580: if (offset == startoffset && iobytes == bytes) {
1581: bp = mbp;
1582: } else {
1583: UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1.53 enami 1584: vp, bp, vp->v_numoutput, 0);
1.120 yamt 1585: bp = getiobuf();
1.130 chs 1586: nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
1.21 chs 1587: }
1588: bp->b_lblkno = 0;
1589:
1590: /* adjust physical blkno for partial blocks */
1.25 fvdl 1591: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.53 enami 1592: dev_bshift);
1593: UVMHIST_LOG(ubchist,
1594: "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
1595: vp, offset, bp->b_bcount, bp->b_blkno);
1.114 yamt 1596:
1597: VOP_STRATEGY(devvp, bp);
1.21 chs 1598: }
1599: if (skipbytes) {
1.29 chs 1600: UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1.21 chs 1601: }
1.120 yamt 1602: nestiobuf_done(mbp, skipbytes, error);
1.21 chs 1603: if (async) {
1.32 chs 1604: UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1.53 enami 1605: return (0);
1.21 chs 1606: }
1.37 chs 1607: UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1608: error = biowait(mbp);
1.134 yamt 1609: s = splbio();
1.130 chs 1610: (*iodone)(mbp);
1.134 yamt 1611: splx(s);
1.21 chs 1612: UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1.53 enami 1613: return (error);
1.42 chs 1614: }
1615:
1616: /*
1617: * VOP_PUTPAGES() for vnodes which never have pages.
1618: */
1619:
1620: int
1621: genfs_null_putpages(void *v)
1622: {
1623: struct vop_putpages_args /* {
1624: struct vnode *a_vp;
1625: voff_t a_offlo;
1626: voff_t a_offhi;
1627: int a_flags;
1628: } */ *ap = v;
1629: struct vnode *vp = ap->a_vp;
1630:
1631: KASSERT(vp->v_uobj.uo_npages == 0);
1632: simple_unlock(&vp->v_interlock);
1633: return (0);
1.21 chs 1634: }
1635:
1.37 chs 1636: void
1.98 yamt 1637: genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
1.37 chs 1638: {
1639: struct genfs_node *gp = VTOG(vp);
1640:
1.146 ad 1641: rw_init(&gp->g_glock);
1.37 chs 1642: gp->g_op = ops;
1643: }
1644:
1645: void
1.147 ad 1646: genfs_node_destroy(struct vnode *vp)
1647: {
1648: struct genfs_node *gp = VTOG(vp);
1649:
1650: rw_destroy(&gp->g_glock);
1651: }
1652:
1653: void
1.138 christos 1654: genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
1.21 chs 1655: {
1656: int bsize;
1657:
1.37 chs 1658: bsize = 1 << vp->v_mount->mnt_fs_bshift;
1659: *eobp = (size + bsize - 1) & ~(bsize - 1);
1.43 chs 1660: }
1661:
1662: int
1663: genfs_compat_getpages(void *v)
1664: {
1665: struct vop_getpages_args /* {
1666: struct vnode *a_vp;
1667: voff_t a_offset;
1668: struct vm_page **a_m;
1669: int *a_count;
1670: int a_centeridx;
1671: vm_prot_t a_access_type;
1672: int a_advice;
1673: int a_flags;
1674: } */ *ap = v;
1675:
1676: off_t origoffset;
1677: struct vnode *vp = ap->a_vp;
1678: struct uvm_object *uobj = &vp->v_uobj;
1679: struct vm_page *pg, **pgs;
1680: vaddr_t kva;
1681: int i, error, orignpages, npages;
1682: struct iovec iov;
1683: struct uio uio;
1.128 ad 1684: kauth_cred_t cred = curlwp->l_cred;
1.148 thorpej 1685: bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
1.43 chs 1686:
1687: error = 0;
1688: origoffset = ap->a_offset;
1689: orignpages = *ap->a_count;
1690: pgs = ap->a_m;
1691:
1692: if (write && (vp->v_flag & VONWORKLST) == 0) {
1693: vn_syncer_add_to_worklist(vp, filedelay);
1694: }
1695: if (ap->a_flags & PGO_LOCKED) {
1696: uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1.54 enami 1697: UFP_NOWAIT|UFP_NOALLOC| (write ? UFP_NORDONLY : 0));
1.43 chs 1698:
1.53 enami 1699: return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1.43 chs 1700: }
1701: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
1702: simple_unlock(&uobj->vmobjlock);
1.53 enami 1703: return (EINVAL);
1.43 chs 1704: }
1.115 yamt 1705: if ((ap->a_flags & PGO_SYNCIO) == 0) {
1.117 yamt 1706: simple_unlock(&uobj->vmobjlock);
1.115 yamt 1707: return 0;
1708: }
1.43 chs 1709: npages = orignpages;
1710: uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
1711: simple_unlock(&uobj->vmobjlock);
1.53 enami 1712: kva = uvm_pagermapin(pgs, npages,
1713: UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1.43 chs 1714: for (i = 0; i < npages; i++) {
1715: pg = pgs[i];
1716: if ((pg->flags & PG_FAKE) == 0) {
1717: continue;
1718: }
1719: iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
1720: iov.iov_len = PAGE_SIZE;
1721: uio.uio_iov = &iov;
1722: uio.uio_iovcnt = 1;
1723: uio.uio_offset = origoffset + (i << PAGE_SHIFT);
1724: uio.uio_rw = UIO_READ;
1725: uio.uio_resid = PAGE_SIZE;
1.122 yamt 1726: UIO_SETUP_SYSSPACE(&uio);
1.87 yamt 1727: /* XXX vn_lock */
1.43 chs 1728: error = VOP_READ(vp, &uio, 0, cred);
1729: if (error) {
1730: break;
1.52 chs 1731: }
1732: if (uio.uio_resid) {
1733: memset(iov.iov_base, 0, uio.uio_resid);
1.43 chs 1734: }
1735: }
1736: uvm_pagermapout(kva, npages);
1737: simple_lock(&uobj->vmobjlock);
1738: uvm_lock_pageq();
1739: for (i = 0; i < npages; i++) {
1740: pg = pgs[i];
1741: if (error && (pg->flags & PG_FAKE) != 0) {
1742: pg->flags |= PG_RELEASED;
1743: } else {
1744: pmap_clear_modify(pg);
1745: uvm_pageactivate(pg);
1746: }
1747: }
1748: if (error) {
1749: uvm_page_unbusy(pgs, npages);
1750: }
1751: uvm_unlock_pageq();
1752: simple_unlock(&uobj->vmobjlock);
1.53 enami 1753: return (error);
1.43 chs 1754: }
1755:
1756: int
1757: genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
1.138 christos 1758: int flags)
1.43 chs 1759: {
1760: off_t offset;
1761: struct iovec iov;
1762: struct uio uio;
1.128 ad 1763: kauth_cred_t cred = curlwp->l_cred;
1.43 chs 1764: struct buf *bp;
1765: vaddr_t kva;
1766: int s, error;
1767:
1768: offset = pgs[0]->offset;
1.53 enami 1769: kva = uvm_pagermapin(pgs, npages,
1770: UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1.43 chs 1771:
1772: iov.iov_base = (void *)kva;
1773: iov.iov_len = npages << PAGE_SHIFT;
1774: uio.uio_iov = &iov;
1.68 yamt 1775: uio.uio_iovcnt = 1;
1.43 chs 1776: uio.uio_offset = offset;
1777: uio.uio_rw = UIO_WRITE;
1778: uio.uio_resid = npages << PAGE_SHIFT;
1.122 yamt 1779: UIO_SETUP_SYSSPACE(&uio);
1.87 yamt 1780: /* XXX vn_lock */
1.43 chs 1781: error = VOP_WRITE(vp, &uio, 0, cred);
1782:
1783: s = splbio();
1.71 pk 1784: V_INCR_NUMOUTPUT(vp);
1.43 chs 1785: splx(s);
1786:
1.119 yamt 1787: bp = getiobuf();
1.43 chs 1788: bp->b_flags = B_BUSY | B_WRITE | B_AGE;
1789: bp->b_vp = vp;
1790: bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
1791: bp->b_data = (char *)kva;
1792: bp->b_bcount = npages << PAGE_SHIFT;
1793: bp->b_bufsize = npages << PAGE_SHIFT;
1794: bp->b_resid = 0;
1795: if (error) {
1796: bp->b_flags |= B_ERROR;
1797: bp->b_error = error;
1798: }
1799: uvm_aio_aiodone(bp);
1.53 enami 1800: return (error);
1.66 jdolecek 1801: }
1802:
1.130 chs 1803: /*
1804: * Process a uio using direct I/O. If we reach a part of the request
1805: * which cannot be processed in this fashion for some reason, just return.
1806: * The caller must handle some additional part of the request using
1807: * buffered I/O before trying direct I/O again.
1808: */
1809:
1810: void
1.138 christos 1811: genfs_directio(struct vnode *vp, struct uio *uio, int ioflag)
1.130 chs 1812: {
1813: struct vmspace *vs;
1814: struct iovec *iov;
1815: vaddr_t va;
1816: size_t len;
1817: const int mask = DEV_BSIZE - 1;
1818: int error;
1819:
1820: /*
1821: * We only support direct I/O to user space for now.
1822: */
1823:
1824: if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
1825: return;
1826: }
1827:
1828: /*
1829: * If the vnode is mapped, we would need to get the getpages lock
1830: * to stabilize the bmap, but then we would get into trouble whil e
1831: * locking the pages if the pages belong to this same vnode (or a
1832: * multi-vnode cascade to the same effect). Just fall back to
1833: * buffered I/O if the vnode is mapped to avoid this mess.
1834: */
1835:
1836: if (vp->v_flag & VMAPPED) {
1837: return;
1838: }
1839:
1840: /*
1841: * Do as much of the uio as possible with direct I/O.
1842: */
1843:
1844: vs = uio->uio_vmspace;
1845: while (uio->uio_resid) {
1846: iov = uio->uio_iov;
1847: if (iov->iov_len == 0) {
1848: uio->uio_iov++;
1849: uio->uio_iovcnt--;
1850: continue;
1851: }
1852: va = (vaddr_t)iov->iov_base;
1853: len = MIN(iov->iov_len, genfs_maxdio);
1854: len &= ~mask;
1855:
1856: /*
1857: * If the next chunk is smaller than DEV_BSIZE or extends past
1858: * the current EOF, then fall back to buffered I/O.
1859: */
1860:
1861: if (len == 0 || uio->uio_offset + len > vp->v_size) {
1862: return;
1863: }
1864:
1865: /*
1866: * Check alignment. The file offset must be at least
1867: * sector-aligned. The exact constraint on memory alignment
1868: * is very hardware-dependent, but requiring sector-aligned
1869: * addresses there too is safe.
1870: */
1871:
1872: if (uio->uio_offset & mask || va & mask) {
1873: return;
1874: }
1875: error = genfs_do_directio(vs, va, len, vp, uio->uio_offset,
1876: uio->uio_rw);
1877: if (error) {
1878: break;
1879: }
1.150 christos 1880: iov->iov_base = (char *)iov->iov_base + len;
1.130 chs 1881: iov->iov_len -= len;
1882: uio->uio_offset += len;
1883: uio->uio_resid -= len;
1884: }
1885: }
1886:
1887: /*
1888: * Iodone routine for direct I/O. We don't do much here since the request is
1889: * always synchronous, so the caller will do most of the work after biowait().
1890: */
1891:
1892: static void
1893: genfs_dio_iodone(struct buf *bp)
1894: {
1895: int s;
1896:
1897: KASSERT((bp->b_flags & B_ASYNC) == 0);
1898: s = splbio();
1899: if ((bp->b_flags & (B_READ | B_AGE)) == B_AGE) {
1900: vwakeup(bp);
1901: }
1902: putiobuf(bp);
1903: splx(s);
1904: }
1905:
1906: /*
1907: * Process one chunk of a direct I/O request.
1908: */
1909:
1910: static int
1911: genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
1912: off_t off, enum uio_rw rw)
1913: {
1914: struct vm_map *map;
1915: struct pmap *upm, *kpm;
1916: size_t klen = round_page(uva + len) - trunc_page(uva);
1917: off_t spoff, epoff;
1918: vaddr_t kva, puva;
1919: paddr_t pa;
1920: vm_prot_t prot;
1921: int error, rv, poff, koff;
1922: const int pgoflags = PGO_CLEANIT | PGO_SYNCIO |
1923: (rw == UIO_WRITE ? PGO_FREE : 0);
1924:
1925: /*
1926: * For writes, verify that this range of the file already has fully
1927: * allocated backing store. If there are any holes, just punt and
1928: * make the caller take the buffered write path.
1929: */
1930:
1931: if (rw == UIO_WRITE) {
1932: daddr_t lbn, elbn, blkno;
1933: int bsize, bshift, run;
1934:
1935: bshift = vp->v_mount->mnt_fs_bshift;
1936: bsize = 1 << bshift;
1937: lbn = off >> bshift;
1938: elbn = (off + len + bsize - 1) >> bshift;
1939: while (lbn < elbn) {
1940: error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
1941: if (error) {
1942: return error;
1943: }
1944: if (blkno == (daddr_t)-1) {
1945: return ENOSPC;
1946: }
1947: lbn += 1 + run;
1948: }
1949: }
1950:
1951: /*
1952: * Flush any cached pages for parts of the file that we're about to
1953: * access. If we're writing, invalidate pages as well.
1954: */
1955:
1956: spoff = trunc_page(off);
1957: epoff = round_page(off + len);
1958: simple_lock(&vp->v_interlock);
1959: error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags);
1960: if (error) {
1961: return error;
1962: }
1963:
1964: /*
1965: * Wire the user pages and remap them into kernel memory.
1966: */
1967:
1968: prot = rw == UIO_READ ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
1969: error = uvm_vslock(vs, (void *)uva, len, prot);
1970: if (error) {
1971: return error;
1972: }
1973:
1974: map = &vs->vm_map;
1975: upm = vm_map_pmap(map);
1976: kpm = vm_map_pmap(kernel_map);
1977: kva = uvm_km_alloc(kernel_map, klen, 0,
1978: UVM_KMF_VAONLY | UVM_KMF_WAITVA);
1979: puva = trunc_page(uva);
1980: for (poff = 0; poff < klen; poff += PAGE_SIZE) {
1981: rv = pmap_extract(upm, puva + poff, &pa);
1982: KASSERT(rv);
1983: pmap_enter(kpm, kva + poff, pa, prot, prot | PMAP_WIRED);
1984: }
1985: pmap_update(kpm);
1986:
1987: /*
1988: * Do the I/O.
1989: */
1990:
1991: koff = uva - trunc_page(uva);
1992: error = genfs_do_io(vp, off, kva + koff, len, PGO_SYNCIO, rw,
1993: genfs_dio_iodone);
1994:
1995: /*
1996: * Tear down the kernel mapping.
1997: */
1998:
1999: pmap_remove(kpm, kva, kva + klen);
2000: pmap_update(kpm);
2001: uvm_km_free(kernel_map, kva, klen, UVM_KMF_VAONLY);
2002:
2003: /*
2004: * Unwire the user pages.
2005: */
2006:
2007: uvm_vsunlock(vs, (void *)uva, len);
2008: return error;
2009: }
2010:
2011:
1.66 jdolecek 2012: static void
2013: filt_genfsdetach(struct knote *kn)
2014: {
2015: struct vnode *vp = (struct vnode *)kn->kn_hook;
2016:
2017: /* XXXLUKEM lock the struct? */
2018: SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext);
2019: }
2020:
2021: static int
2022: filt_genfsread(struct knote *kn, long hint)
2023: {
2024: struct vnode *vp = (struct vnode *)kn->kn_hook;
2025:
2026: /*
2027: * filesystem is gone, so set the EOF flag and schedule
2028: * the knote for deletion.
2029: */
2030: if (hint == NOTE_REVOKE) {
2031: kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2032: return (1);
2033: }
2034:
2035: /* XXXLUKEM lock the struct? */
2036: kn->kn_data = vp->v_size - kn->kn_fp->f_offset;
2037: return (kn->kn_data != 0);
2038: }
2039:
2040: static int
2041: filt_genfsvnode(struct knote *kn, long hint)
2042: {
2043:
2044: if (kn->kn_sfflags & hint)
2045: kn->kn_fflags |= hint;
2046: if (hint == NOTE_REVOKE) {
2047: kn->kn_flags |= EV_EOF;
2048: return (1);
2049: }
2050: return (kn->kn_fflags != 0);
2051: }
2052:
1.96 perry 2053: static const struct filterops genfsread_filtops =
1.66 jdolecek 2054: { 1, NULL, filt_genfsdetach, filt_genfsread };
1.96 perry 2055: static const struct filterops genfsvnode_filtops =
1.66 jdolecek 2056: { 1, NULL, filt_genfsdetach, filt_genfsvnode };
2057:
2058: int
2059: genfs_kqfilter(void *v)
2060: {
2061: struct vop_kqfilter_args /* {
2062: struct vnode *a_vp;
2063: struct knote *a_kn;
2064: } */ *ap = v;
2065: struct vnode *vp;
2066: struct knote *kn;
2067:
2068: vp = ap->a_vp;
2069: kn = ap->a_kn;
2070: switch (kn->kn_filter) {
2071: case EVFILT_READ:
2072: kn->kn_fop = &genfsread_filtops;
2073: break;
2074: case EVFILT_VNODE:
2075: kn->kn_fop = &genfsvnode_filtops;
2076: break;
2077: default:
2078: return (1);
2079: }
2080:
2081: kn->kn_hook = vp;
2082:
2083: /* XXXLUKEM lock the struct? */
2084: SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
2085:
2086: return (0);
1.1 mycroft 2087: }
1.136 yamt 2088:
2089: void
2090: genfs_node_wrlock(struct vnode *vp)
2091: {
2092: struct genfs_node *gp = VTOG(vp);
2093:
1.146 ad 2094: rw_enter(&gp->g_glock, RW_WRITER);
1.136 yamt 2095: }
2096:
2097: void
2098: genfs_node_rdlock(struct vnode *vp)
2099: {
2100: struct genfs_node *gp = VTOG(vp);
2101:
1.146 ad 2102: rw_enter(&gp->g_glock, RW_READER);
1.136 yamt 2103: }
2104:
2105: void
2106: genfs_node_unlock(struct vnode *vp)
2107: {
2108: struct genfs_node *gp = VTOG(vp);
2109:
1.146 ad 2110: rw_exit(&gp->g_glock);
1.136 yamt 2111: }
CVSweb <webmaster@jp.NetBSD.org>