Annotation of src/sys/miscfs/genfs/genfs_vnops.c, Revision 1.31.2.13
1.31.2.12 nathanw 1: /* $NetBSD$ */
1.6 fvdl 2:
3: /*
4: * Copyright (c) 1982, 1986, 1989, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: * 3. All advertising materials mentioning features or use of this software
16: * must display the following acknowledgement:
17: * This product includes software developed by the University of
18: * California, Berkeley and its contributors.
19: * 4. Neither the name of the University nor the names of its contributors
20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: *
35: */
1.31.2.7 nathanw 36:
37: #include <sys/cdefs.h>
1.31.2.12 nathanw 38: __KERNEL_RCSID(0, "$NetBSD$");
1.5 perry 39:
1.8 thorpej 40: #include "opt_nfsserver.h"
41:
1.1 mycroft 42: #include <sys/param.h>
43: #include <sys/systm.h>
1.31.2.1 nathanw 44: #include <sys/lwp.h>
1.6 fvdl 45: #include <sys/proc.h>
1.1 mycroft 46: #include <sys/kernel.h>
47: #include <sys/mount.h>
48: #include <sys/namei.h>
49: #include <sys/vnode.h>
1.13 wrstuden 50: #include <sys/fcntl.h>
1.1 mycroft 51: #include <sys/malloc.h>
1.3 mycroft 52: #include <sys/poll.h>
1.31.2.5 nathanw 53: #include <sys/mman.h>
1.1 mycroft 54:
55: #include <miscfs/genfs/genfs.h>
1.31.2.5 nathanw 56: #include <miscfs/genfs/genfs_node.h>
1.6 fvdl 57: #include <miscfs/specfs/specdev.h>
1.1 mycroft 58:
1.21 chs 59: #include <uvm/uvm.h>
60: #include <uvm/uvm_pager.h>
61:
1.8 thorpej 62: #ifdef NFSSERVER
63: #include <nfs/rpcv2.h>
64: #include <nfs/nfsproto.h>
65: #include <nfs/nfs.h>
66: #include <nfs/nqnfs.h>
67: #include <nfs/nfs_var.h>
68: #endif
69:
1.31.2.8 nathanw 70: #define MAX_READ_AHEAD 16 /* XXXUBC 16 */
71:
1.1 mycroft 72: int
1.31.2.13! nathanw 73: genfs_poll(void *v)
1.1 mycroft 74: {
1.3 mycroft 75: struct vop_poll_args /* {
1.1 mycroft 76: struct vnode *a_vp;
1.3 mycroft 77: int a_events;
1.1 mycroft 78: struct proc *a_p;
79: } */ *ap = v;
80:
1.3 mycroft 81: return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1.1 mycroft 82: }
83:
84: int
1.31.2.13! nathanw 85: genfs_fsync(void *v)
1.1 mycroft 86: {
87: struct vop_fsync_args /* {
88: struct vnode *a_vp;
89: struct ucred *a_cred;
1.7 kleink 90: int a_flags;
1.20 fvdl 91: off_t offlo;
92: off_t offhi;
1.1 mycroft 93: struct proc *a_p;
94: } */ *ap = v;
1.16 augustss 95: struct vnode *vp = ap->a_vp;
1.11 mycroft 96: int wait;
1.1 mycroft 97:
1.11 mycroft 98: wait = (ap->a_flags & FSYNC_WAIT) != 0;
99: vflushbuf(vp, wait);
100: if ((ap->a_flags & FSYNC_DATAONLY) != 0)
1.7 kleink 101: return (0);
1.11 mycroft 102: else
1.18 mycroft 103: return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0));
1.1 mycroft 104: }
105:
106: int
1.31.2.13! nathanw 107: genfs_seek(void *v)
1.4 kleink 108: {
109: struct vop_seek_args /* {
110: struct vnode *a_vp;
111: off_t a_oldoff;
112: off_t a_newoff;
113: struct ucred *a_ucred;
114: } */ *ap = v;
115:
116: if (ap->a_newoff < 0)
117: return (EINVAL);
118:
119: return (0);
120: }
121:
122: int
1.31.2.13! nathanw 123: genfs_abortop(void *v)
1.1 mycroft 124: {
125: struct vop_abortop_args /* {
126: struct vnode *a_dvp;
127: struct componentname *a_cnp;
128: } */ *ap = v;
1.31.2.13! nathanw 129:
1.1 mycroft 130: if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1.19 thorpej 131: PNBUF_PUT(ap->a_cnp->cn_pnbuf);
1.1 mycroft 132: return (0);
1.13 wrstuden 133: }
134:
135: int
1.31.2.13! nathanw 136: genfs_fcntl(void *v)
1.13 wrstuden 137: {
138: struct vop_fcntl_args /* {
139: struct vnode *a_vp;
140: u_int a_command;
141: caddr_t a_data;
142: int a_fflag;
143: struct ucred *a_cred;
144: struct proc *a_p;
145: } */ *ap = v;
146:
147: if (ap->a_command == F_SETFL)
148: return (0);
149: else
150: return (EOPNOTSUPP);
1.1 mycroft 151: }
152:
153: /*ARGSUSED*/
154: int
1.31.2.13! nathanw 155: genfs_badop(void *v)
1.1 mycroft 156: {
157:
158: panic("genfs: bad op");
159: }
160:
161: /*ARGSUSED*/
162: int
1.31.2.13! nathanw 163: genfs_nullop(void *v)
1.1 mycroft 164: {
165:
166: return (0);
1.10 kleink 167: }
168:
169: /*ARGSUSED*/
170: int
1.31.2.13! nathanw 171: genfs_einval(void *v)
1.10 kleink 172: {
173:
174: return (EINVAL);
1.1 mycroft 175: }
176:
177: /*ARGSUSED*/
178: int
1.31.2.13! nathanw 179: genfs_eopnotsupp(void *v)
1.1 mycroft 180: {
181:
182: return (EOPNOTSUPP);
183: }
184:
1.12 wrstuden 185: /*
186: * Called when an fs doesn't support a particular vop but the vop needs to
187: * vrele, vput, or vunlock passed in vnodes.
188: */
189: int
1.31.2.13! nathanw 190: genfs_eopnotsupp_rele(void *v)
1.12 wrstuden 191: {
192: struct vop_generic_args /*
193: struct vnodeop_desc *a_desc;
1.31.2.13! nathanw 194: / * other random data follows, presumably * /
1.12 wrstuden 195: } */ *ap = v;
196: struct vnodeop_desc *desc = ap->a_desc;
197: struct vnode *vp;
198: int flags, i, j, offset;
199:
200: flags = desc->vdesc_flags;
201: for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
202: if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
203: break; /* stop at end of list */
204: if ((j = flags & VDESC_VP0_WILLPUT)) {
1.31.2.13! nathanw 205: vp = *VOPARG_OFFSETTO(struct vnode **, offset, ap);
1.12 wrstuden 206: switch (j) {
207: case VDESC_VP0_WILLPUT:
208: vput(vp);
209: break;
210: case VDESC_VP0_WILLUNLOCK:
211: VOP_UNLOCK(vp, 0);
212: break;
213: case VDESC_VP0_WILLRELE:
214: vrele(vp);
215: break;
216: }
217: }
218: }
219:
220: return (EOPNOTSUPP);
221: }
222:
1.1 mycroft 223: /*ARGSUSED*/
224: int
1.31.2.13! nathanw 225: genfs_ebadf(void *v)
1.1 mycroft 226: {
227:
228: return (EBADF);
1.9 matthias 229: }
230:
231: /* ARGSUSED */
232: int
1.31.2.13! nathanw 233: genfs_enoioctl(void *v)
1.9 matthias 234: {
235:
1.31.2.12 nathanw 236: return (EPASSTHROUGH);
1.6 fvdl 237: }
238:
239:
240: /*
1.15 fvdl 241: * Eliminate all activity associated with the requested vnode
1.6 fvdl 242: * and with all vnodes aliased to the requested vnode.
243: */
244: int
1.31.2.13! nathanw 245: genfs_revoke(void *v)
1.6 fvdl 246: {
247: struct vop_revoke_args /* {
248: struct vnode *a_vp;
249: int a_flags;
250: } */ *ap = v;
251: struct vnode *vp, *vq;
1.31.2.1 nathanw 252: struct proc *p = curproc->l_proc; /* XXX */
1.6 fvdl 253:
254: #ifdef DIAGNOSTIC
255: if ((ap->a_flags & REVOKEALL) == 0)
256: panic("genfs_revoke: not revokeall");
257: #endif
258:
259: vp = ap->a_vp;
260: simple_lock(&vp->v_interlock);
261:
262: if (vp->v_flag & VALIASED) {
263: /*
264: * If a vgone (or vclean) is already in progress,
265: * wait until it is done and return.
266: */
267: if (vp->v_flag & VXLOCK) {
268: vp->v_flag |= VXWANT;
269: simple_unlock(&vp->v_interlock);
270: tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
271: return (0);
272: }
273: /*
274: * Ensure that vp will not be vgone'd while we
275: * are eliminating its aliases.
276: */
277: vp->v_flag |= VXLOCK;
278: simple_unlock(&vp->v_interlock);
279: while (vp->v_flag & VALIASED) {
280: simple_lock(&spechash_slock);
281: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
282: if (vq->v_rdev != vp->v_rdev ||
283: vq->v_type != vp->v_type || vp == vq)
284: continue;
285: simple_unlock(&spechash_slock);
286: vgone(vq);
287: break;
288: }
289: if (vq == NULLVP)
290: simple_unlock(&spechash_slock);
291: }
292: /*
293: * Remove the lock so that vgone below will
294: * really eliminate the vnode after which time
295: * vgone will awaken any sleepers.
296: */
297: simple_lock(&vp->v_interlock);
298: vp->v_flag &= ~VXLOCK;
299: }
300: vgonel(vp, p);
301: return (0);
302: }
303:
304: /*
1.12 wrstuden 305: * Lock the node.
1.6 fvdl 306: */
307: int
1.31.2.13! nathanw 308: genfs_lock(void *v)
1.6 fvdl 309: {
310: struct vop_lock_args /* {
311: struct vnode *a_vp;
312: int a_flags;
313: } */ *ap = v;
314: struct vnode *vp = ap->a_vp;
315:
1.12 wrstuden 316: return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock));
1.6 fvdl 317: }
318:
319: /*
1.12 wrstuden 320: * Unlock the node.
1.6 fvdl 321: */
322: int
1.31.2.13! nathanw 323: genfs_unlock(void *v)
1.6 fvdl 324: {
325: struct vop_unlock_args /* {
326: struct vnode *a_vp;
327: int a_flags;
328: } */ *ap = v;
329: struct vnode *vp = ap->a_vp;
330:
1.12 wrstuden 331: return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE,
1.31.2.13! nathanw 332: &vp->v_interlock));
1.6 fvdl 333: }
334:
335: /*
1.12 wrstuden 336: * Return whether or not the node is locked.
1.6 fvdl 337: */
338: int
1.31.2.13! nathanw 339: genfs_islocked(void *v)
1.6 fvdl 340: {
341: struct vop_islocked_args /* {
342: struct vnode *a_vp;
343: } */ *ap = v;
344: struct vnode *vp = ap->a_vp;
345:
1.12 wrstuden 346: return (lockstatus(&vp->v_lock));
347: }
348:
349: /*
350: * Stubs to use when there is no locking to be done on the underlying object.
351: */
352: int
1.31.2.13! nathanw 353: genfs_nolock(void *v)
1.12 wrstuden 354: {
355: struct vop_lock_args /* {
356: struct vnode *a_vp;
357: int a_flags;
358: struct proc *a_p;
359: } */ *ap = v;
360:
361: /*
362: * Since we are not using the lock manager, we must clear
363: * the interlock here.
364: */
365: if (ap->a_flags & LK_INTERLOCK)
366: simple_unlock(&ap->a_vp->v_interlock);
367: return (0);
368: }
369:
370: int
1.31.2.13! nathanw 371: genfs_nounlock(void *v)
1.12 wrstuden 372: {
1.31.2.13! nathanw 373:
1.12 wrstuden 374: return (0);
375: }
376:
377: int
1.31.2.13! nathanw 378: genfs_noislocked(void *v)
1.12 wrstuden 379: {
1.31.2.13! nathanw 380:
1.12 wrstuden 381: return (0);
1.8 thorpej 382: }
383:
384: /*
385: * Local lease check for NFS servers. Just set up args and let
386: * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel,
387: * this is a null operation.
388: */
389: int
1.31.2.13! nathanw 390: genfs_lease_check(void *v)
1.8 thorpej 391: {
392: #ifdef NFSSERVER
393: struct vop_lease_args /* {
394: struct vnode *a_vp;
395: struct proc *a_p;
396: struct ucred *a_cred;
397: int a_flag;
398: } */ *ap = v;
399: u_int32_t duration = 0;
400: int cache;
401: u_quad_t frev;
402:
403: (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
404: NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
405: return (0);
406: #else
407: return (0);
408: #endif /* NFSSERVER */
1.21 chs 409: }
410:
1.31.2.3 nathanw 411: int
1.31.2.13! nathanw 412: genfs_mmap(void *v)
1.31.2.3 nathanw 413: {
1.31.2.13! nathanw 414:
! 415: return (0);
1.31.2.3 nathanw 416: }
417:
1.21 chs 418: /*
419: * generic VM getpages routine.
420: * Return PG_BUSY pages for the given range,
421: * reading from backing store if necessary.
422: */
423:
424: int
1.31.2.13! nathanw 425: genfs_getpages(void *v)
1.21 chs 426: {
427: struct vop_getpages_args /* {
428: struct vnode *a_vp;
429: voff_t a_offset;
1.31.2.3 nathanw 430: struct vm_page **a_m;
1.21 chs 431: int *a_count;
432: int a_centeridx;
433: vm_prot_t a_access_type;
434: int a_advice;
435: int a_flags;
436: } */ *ap = v;
437:
1.30 chs 438: off_t newsize, diskeof, memeof;
1.26 chs 439: off_t offset, origoffset, startoffset, endoffset, raoffset;
1.21 chs 440: daddr_t lbn, blkno;
441: int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
1.31.2.5 nathanw 442: int fs_bshift, fs_bsize, dev_bshift;
1.21 chs 443: int flags = ap->a_flags;
444: size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
445: vaddr_t kva;
446: struct buf *bp, *mbp;
447: struct vnode *vp = ap->a_vp;
1.31.2.4 nathanw 448: struct vnode *devvp;
1.31.2.5 nathanw 449: struct genfs_node *gp = VTOG(vp);
450: struct uvm_object *uobj = &vp->v_uobj;
1.31.2.8 nathanw 451: struct vm_page *pg, *pgs[MAX_READ_AHEAD];
1.31.2.1 nathanw 452: struct ucred *cred = curproc->l_proc->p_ucred; /* XXXUBC curproc */
1.21 chs 453: boolean_t async = (flags & PGO_SYNCIO) == 0;
454: boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
455: boolean_t sawhole = FALSE;
1.31.2.5 nathanw 456: boolean_t overwrite = (flags & PGO_OVERWRITE) != 0;
1.21 chs 457: UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
458:
1.30 chs 459: UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
1.31.2.13! nathanw 460: vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
1.30 chs 461:
1.21 chs 462: /* XXXUBC temp limit */
1.31.2.8 nathanw 463: if (*ap->a_count > MAX_READ_AHEAD) {
1.31.2.5 nathanw 464: panic("genfs_getpages: too many pages");
1.21 chs 465: }
466:
1.26 chs 467: error = 0;
468: origoffset = ap->a_offset;
469: orignpages = *ap->a_count;
1.31.2.5 nathanw 470: GOP_SIZE(vp, vp->v_size, &diskeof);
1.26 chs 471: if (flags & PGO_PASTEOF) {
1.31.2.5 nathanw 472: newsize = MAX(vp->v_size,
1.31.2.13! nathanw 473: origoffset + (orignpages << PAGE_SHIFT));
1.31.2.5 nathanw 474: GOP_SIZE(vp, newsize, &memeof);
1.26 chs 475: } else {
1.30 chs 476: memeof = diskeof;
1.21 chs 477: }
1.30 chs 478: KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
479: KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
480: KASSERT(orignpages > 0);
1.21 chs 481:
482: /*
483: * Bounds-check the request.
484: */
485:
1.30 chs 486: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
1.21 chs 487: if ((flags & PGO_LOCKED) == 0) {
488: simple_unlock(&uobj->vmobjlock);
489: }
490: UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
1.31.2.13! nathanw 491: origoffset, *ap->a_count, memeof,0);
! 492: return (EINVAL);
1.21 chs 493: }
494:
495: /*
496: * For PGO_LOCKED requests, just return whatever's in memory.
497: */
498:
499: if (flags & PGO_LOCKED) {
500: uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1.31.2.13! nathanw 501: UFP_NOWAIT|UFP_NOALLOC| (write ? UFP_NORDONLY : 0));
1.21 chs 502:
1.31.2.13! nathanw 503: return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1.21 chs 504: }
505:
506: /* vnode is VOP_LOCKed, uobj is locked */
507:
508: if (write && (vp->v_flag & VONWORKLST) == 0) {
509: vn_syncer_add_to_worklist(vp, filedelay);
510: }
511:
512: /*
513: * find the requested pages and make some simple checks.
514: * leave space in the page array for a whole block.
515: */
516:
1.31.2.4 nathanw 517: if (vp->v_type == VREG) {
518: fs_bshift = vp->v_mount->mnt_fs_bshift;
519: dev_bshift = vp->v_mount->mnt_dev_bshift;
520: } else {
521: fs_bshift = DEV_BSHIFT;
522: dev_bshift = DEV_BSHIFT;
523: }
1.21 chs 524: fs_bsize = 1 << fs_bshift;
525:
1.30 chs 526: orignpages = MIN(orignpages,
527: round_page(memeof - origoffset) >> PAGE_SHIFT);
1.21 chs 528: npages = orignpages;
529: startoffset = origoffset & ~(fs_bsize - 1);
1.31.2.13! nathanw 530: endoffset = round_page((origoffset + (npages << PAGE_SHIFT) +
! 531: fs_bsize - 1) & ~(fs_bsize - 1));
1.30 chs 532: endoffset = MIN(endoffset, round_page(memeof));
1.21 chs 533: ridx = (origoffset - startoffset) >> PAGE_SHIFT;
534:
535: memset(pgs, 0, sizeof(pgs));
536: uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
537:
538: /*
539: * if the pages are already resident, just return them.
540: */
541:
542: for (i = 0; i < npages; i++) {
543: struct vm_page *pg = pgs[ridx + i];
544:
545: if ((pg->flags & PG_FAKE) ||
546: (write && (pg->flags & PG_RDONLY))) {
547: break;
548: }
549: }
550: if (i == npages) {
551: UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
552: raoffset = origoffset + (orignpages << PAGE_SHIFT);
1.26 chs 553: npages += ridx;
1.21 chs 554: goto raout;
555: }
556:
557: /*
1.31.2.5 nathanw 558: * if PGO_OVERWRITE is set, don't bother reading the pages.
559: */
560:
561: if (flags & PGO_OVERWRITE) {
562: UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
563:
564: for (i = 0; i < npages; i++) {
565: struct vm_page *pg = pgs[ridx + i];
566:
567: pg->flags &= ~(PG_RDONLY|PG_CLEAN);
568: }
569: npages += ridx;
570: goto out;
571: }
572:
573: /*
1.21 chs 574: * the page wasn't resident and we're not overwriting,
575: * so we're going to have to do some i/o.
576: * find any additional pages needed to cover the expanded range.
577: */
578:
1.31.2.3 nathanw 579: npages = (endoffset - startoffset) >> PAGE_SHIFT;
580: if (startoffset != origoffset || npages != orignpages) {
1.21 chs 581:
582: /*
1.31.2.5 nathanw 583: * we need to avoid deadlocks caused by locking
1.21 chs 584: * additional pages at lower offsets than pages we
1.31.2.5 nathanw 585: * already have locked. unlock them all and start over.
1.21 chs 586: */
587:
1.31.2.3 nathanw 588: for (i = 0; i < orignpages; i++) {
1.21 chs 589: struct vm_page *pg = pgs[ridx + i];
590:
591: if (pg->flags & PG_FAKE) {
592: pg->flags |= PG_RELEASED;
593: }
594: }
1.31.2.3 nathanw 595: uvm_page_unbusy(&pgs[ridx], orignpages);
1.21 chs 596: memset(pgs, 0, sizeof(pgs));
597:
598: UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
1.31.2.13! nathanw 599: startoffset, endoffset, 0,0);
1.21 chs 600: npgs = npages;
601: uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
602: }
603: simple_unlock(&uobj->vmobjlock);
604:
605: /*
606: * read the desired page(s).
607: */
608:
609: totalbytes = npages << PAGE_SHIFT;
1.30 chs 610: bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
1.21 chs 611: tailbytes = totalbytes - bytes;
612: skipbytes = 0;
613:
1.31.2.13! nathanw 614: kva = uvm_pagermapin(pgs, npages,
! 615: UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1.21 chs 616:
617: s = splbio();
618: mbp = pool_get(&bufpool, PR_WAITOK);
619: splx(s);
620: mbp->b_bufsize = totalbytes;
621: mbp->b_data = (void *)kva;
622: mbp->b_resid = mbp->b_bcount = bytes;
623: mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
1.31.2.5 nathanw 624: mbp->b_iodone = (async ? uvm_aio_biodone : 0);
1.21 chs 625: mbp->b_vp = vp;
626: LIST_INIT(&mbp->b_dep);
627:
628: /*
1.31 chs 629: * if EOF is in the middle of the range, zero the part past EOF.
1.31.2.5 nathanw 630: * if the page including EOF is not PG_FAKE, skip over it since
631: * in that case it has valid data that we need to preserve.
1.21 chs 632: */
633:
1.31 chs 634: if (tailbytes > 0) {
1.31.2.5 nathanw 635: size_t tailstart = bytes;
636:
637: if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
638: tailstart = round_page(tailstart);
639: tailbytes -= tailstart - bytes;
640: }
641: UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
1.31.2.13! nathanw 642: kva, tailstart, tailbytes,0);
1.31.2.5 nathanw 643: memset((void *)(kva + tailstart), 0, tailbytes);
1.21 chs 644: }
645:
646: /*
647: * now loop over the pages, reading as needed.
648: */
649:
650: if (write) {
1.31.2.5 nathanw 651: lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
1.21 chs 652: } else {
1.31.2.5 nathanw 653: lockmgr(&gp->g_glock, LK_SHARED, NULL);
1.21 chs 654: }
655:
656: bp = NULL;
657: for (offset = startoffset;
1.31.2.13! nathanw 658: bytes > 0;
! 659: offset += iobytes, bytes -= iobytes) {
1.21 chs 660:
661: /*
662: * skip pages which don't need to be read.
663: */
664:
665: pidx = (offset - startoffset) >> PAGE_SHIFT;
1.31.2.3 nathanw 666: while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) {
1.21 chs 667: size_t b;
668:
1.24 chs 669: KASSERT((offset & (PAGE_SIZE - 1)) == 0);
1.26 chs 670: b = MIN(PAGE_SIZE, bytes);
1.21 chs 671: offset += b;
672: bytes -= b;
673: skipbytes += b;
674: pidx++;
675: UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
1.31.2.13! nathanw 676: offset, 0,0,0);
1.21 chs 677: if (bytes == 0) {
678: goto loopdone;
679: }
680: }
681:
682: /*
683: * bmap the file to find out the blkno to read from and
684: * how much we can read in one i/o. if bmap returns an error,
685: * skip the rest of the top-level i/o.
686: */
687:
688: lbn = offset >> fs_bshift;
1.31.2.4 nathanw 689: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21 chs 690: if (error) {
691: UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
1.31.2.13! nathanw 692: lbn, error,0,0);
1.21 chs 693: skipbytes += bytes;
694: goto loopdone;
695: }
696:
697: /*
698: * see how many pages can be read with this i/o.
699: * reduce the i/o size if necessary to avoid
700: * overwriting pages with valid data.
701: */
702:
1.26 chs 703: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
704: bytes);
1.21 chs 705: if (offset + iobytes > round_page(offset)) {
706: pcount = 1;
707: while (pidx + pcount < npages &&
1.31.2.13! nathanw 708: pgs[pidx + pcount]->flags & PG_FAKE) {
1.21 chs 709: pcount++;
710: }
1.26 chs 711: iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
1.31.2.13! nathanw 712: (offset - trunc_page(offset)));
1.21 chs 713: }
714:
715: /*
1.31.2.13! nathanw 716: * if this block isn't allocated, zero it instead of
! 717: * reading it. if this is a read access, mark the
! 718: * pages we zeroed PG_RDONLY.
1.21 chs 719: */
720:
721: if (blkno < 0) {
1.31.2.13! nathanw 722: int holepages = (round_page(offset + iobytes) -
! 723: trunc_page(offset)) >> PAGE_SHIFT;
1.21 chs 724: UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
725:
726: sawhole = TRUE;
727: memset((char *)kva + (offset - startoffset), 0,
1.31.2.13! nathanw 728: iobytes);
1.21 chs 729: skipbytes += iobytes;
730:
1.31.2.3 nathanw 731: for (i = 0; i < holepages; i++) {
732: if (write) {
733: pgs[pidx + i]->flags &= ~PG_CLEAN;
734: } else {
1.21 chs 735: pgs[pidx + i]->flags |= PG_RDONLY;
736: }
737: }
738: continue;
739: }
740:
741: /*
742: * allocate a sub-buf for this piece of the i/o
743: * (or just use mbp if there's only 1 piece),
744: * and start it going.
745: */
746:
747: if (offset == startoffset && iobytes == bytes) {
748: bp = mbp;
749: } else {
750: s = splbio();
751: bp = pool_get(&bufpool, PR_WAITOK);
752: splx(s);
753: bp->b_data = (char *)kva + offset - startoffset;
754: bp->b_resid = bp->b_bcount = iobytes;
755: bp->b_flags = B_BUSY|B_READ|B_CALL;
756: bp->b_iodone = uvm_aio_biodone1;
757: bp->b_vp = vp;
1.31.2.5 nathanw 758: bp->b_proc = NULL;
1.21 chs 759: LIST_INIT(&bp->b_dep);
760: }
761: bp->b_lblkno = 0;
762: bp->b_private = mbp;
1.31.2.5 nathanw 763: if (devvp->v_type == VBLK) {
764: bp->b_dev = devvp->v_rdev;
765: }
1.21 chs 766:
767: /* adjust physical blkno for partial blocks */
1.25 fvdl 768: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.31.2.13! nathanw 769: dev_bshift);
1.21 chs 770:
1.31.2.13! nathanw 771: UVMHIST_LOG(ubchist,
! 772: "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
! 773: bp, offset, iobytes, bp->b_blkno);
1.21 chs 774:
775: VOP_STRATEGY(bp);
776: }
777:
778: loopdone:
779: if (skipbytes) {
780: s = splbio();
781: if (error) {
782: mbp->b_flags |= B_ERROR;
783: mbp->b_error = error;
784: }
785: mbp->b_resid -= skipbytes;
786: if (mbp->b_resid == 0) {
787: biodone(mbp);
788: }
789: splx(s);
790: }
791:
792: if (async) {
1.31.2.2 nathanw 793: UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
1.31.2.5 nathanw 794: lockmgr(&gp->g_glock, LK_RELEASE, NULL);
1.31.2.13! nathanw 795: return (0);
1.21 chs 796: }
797: if (bp != NULL) {
798: error = biowait(mbp);
799: }
800: s = splbio();
801: pool_put(&bufpool, mbp);
802: splx(s);
803: uvm_pagermapout(kva, npages);
1.24 chs 804: raoffset = startoffset + totalbytes;
1.21 chs 805:
806: /*
807: * if this we encountered a hole then we have to do a little more work.
808: * for read faults, we marked the page PG_RDONLY so that future
809: * write accesses to the page will fault again.
810: * for write faults, we must make sure that the backing store for
811: * the page is completely allocated while the pages are locked.
812: */
813:
1.31.2.5 nathanw 814: if (!error && sawhole && write) {
815: for (i = 0; i < npages; i++) {
816: if (pgs[i] == NULL) {
817: continue;
818: }
819: pgs[i]->flags &= ~PG_CLEAN;
820: UVMHIST_LOG(ubchist, "mark dirty pg %p", pgs[i],0,0,0);
1.21 chs 821: }
1.31.2.5 nathanw 822: error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
1.31.2.13! nathanw 823: cred);
1.31.2.5 nathanw 824: UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
825: startoffset, npages << PAGE_SHIFT, error,0);
1.21 chs 826: }
1.31.2.5 nathanw 827: lockmgr(&gp->g_glock, LK_RELEASE, NULL);
1.21 chs 828: simple_lock(&uobj->vmobjlock);
829:
830: /*
831: * see if we want to start any readahead.
832: * XXXUBC for now, just read the next 128k on 64k boundaries.
833: * this is pretty nonsensical, but it is 50% faster than reading
834: * just the next 64k.
835: */
836:
837: raout:
1.24 chs 838: if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
1.21 chs 839: PAGE_SHIFT <= 16) {
1.31.2.8 nathanw 840: off_t rasize;
1.21 chs 841: int racount;
842:
1.31.2.8 nathanw 843: /* XXXUBC temp limit, from above */
844: racount = MIN(1 << (16 - PAGE_SHIFT), MAX_READ_AHEAD);
845: rasize = racount << PAGE_SHIFT;
1.21 chs 846: (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
1.31.2.13! nathanw 847: VM_PROT_READ, 0, 0);
1.21 chs 848: simple_lock(&uobj->vmobjlock);
849:
1.31.2.8 nathanw 850: /* XXXUBC temp limit, from above */
851: racount = MIN(1 << (16 - PAGE_SHIFT), MAX_READ_AHEAD);
852: (void) VOP_GETPAGES(vp, raoffset + rasize, NULL, &racount, 0,
1.31.2.13! nathanw 853: VM_PROT_READ, 0, 0);
1.21 chs 854: simple_lock(&uobj->vmobjlock);
855: }
856:
857: /*
858: * we're almost done! release the pages...
859: * for errors, we free the pages.
860: * otherwise we activate them and mark them as valid and clean.
861: * also, unbusy pages that were not actually requested.
862: */
863:
864: if (error) {
865: for (i = 0; i < npages; i++) {
866: if (pgs[i] == NULL) {
867: continue;
868: }
869: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.31.2.13! nathanw 870: pgs[i], pgs[i]->flags, 0,0);
1.26 chs 871: if (pgs[i]->flags & PG_FAKE) {
1.31.2.5 nathanw 872: pgs[i]->flags |= PG_RELEASED;
1.21 chs 873: }
874: }
1.31.2.5 nathanw 875: uvm_lock_pageq();
876: uvm_page_unbusy(pgs, npages);
1.21 chs 877: uvm_unlock_pageq();
878: simple_unlock(&uobj->vmobjlock);
879: UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
1.31.2.13! nathanw 880: return (error);
1.21 chs 881: }
882:
1.31.2.5 nathanw 883: out:
1.21 chs 884: UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
1.26 chs 885: uvm_lock_pageq();
1.21 chs 886: for (i = 0; i < npages; i++) {
1.31.2.5 nathanw 887: pg = pgs[i];
888: if (pg == NULL) {
1.21 chs 889: continue;
890: }
891: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.31.2.13! nathanw 892: pg, pg->flags, 0,0);
1.31.2.5 nathanw 893: if (pg->flags & PG_FAKE && !overwrite) {
894: pg->flags &= ~(PG_FAKE);
1.21 chs 895: pmap_clear_modify(pgs[i]);
896: }
897: if (write) {
1.31.2.5 nathanw 898: pg->flags &= ~(PG_RDONLY);
1.21 chs 899: }
900: if (i < ridx || i >= ridx + orignpages || async) {
901: UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
1.31.2.13! nathanw 902: pg, pg->offset,0,0);
1.31.2.5 nathanw 903: if (pg->flags & PG_WANTED) {
904: wakeup(pg);
905: }
906: if (pg->flags & PG_FAKE) {
907: KASSERT(overwrite);
908: uvm_pagezero(pg);
909: }
910: if (pg->flags & PG_RELEASED) {
911: uvm_pagefree(pg);
1.26 chs 912: continue;
1.21 chs 913: }
1.31.2.5 nathanw 914: uvm_pageactivate(pg);
915: pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
916: UVM_PAGE_OWN(pg, NULL);
1.21 chs 917: }
918: }
1.26 chs 919: uvm_unlock_pageq();
1.21 chs 920: simple_unlock(&uobj->vmobjlock);
921: if (ap->a_m != NULL) {
922: memcpy(ap->a_m, &pgs[ridx],
1.31.2.13! nathanw 923: orignpages * sizeof(struct vm_page *));
1.21 chs 924: }
1.31.2.13! nathanw 925: return (0);
1.21 chs 926: }
927:
928: /*
929: * generic VM putpages routine.
930: * Write the given range of pages to backing store.
1.31.2.5 nathanw 931: *
932: * => "offhi == 0" means flush all pages at or after "offlo".
933: * => object should be locked by caller. we may _unlock_ the object
934: * if (and only if) we need to clean a page (PGO_CLEANIT), or
935: * if PGO_SYNCIO is set and there are pages busy.
936: * we return with the object locked.
937: * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
938: * thus, a caller might want to unlock higher level resources
939: * (e.g. vm_map) before calling flush.
940: * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
941: * unlock the object nor block.
942: * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
943: * => NOTE: we rely on the fact that the object's memq is a TAILQ and
944: * that new pages are inserted on the tail end of the list. thus,
945: * we can make a complete pass through the object in one go by starting
946: * at the head and working towards the tail (new pages are put in
947: * front of us).
948: * => NOTE: we are allowed to lock the page queues, so the caller
949: * must not be holding the page queue lock.
950: *
951: * note on "cleaning" object and PG_BUSY pages:
952: * this routine is holding the lock on the object. the only time
953: * that it can run into a PG_BUSY page that it does not own is if
954: * some other process has started I/O on the page (e.g. either
955: * a pagein, or a pageout). if the PG_BUSY page is being paged
956: * in, then it can not be dirty (!PG_CLEAN) because no one has
957: * had a chance to modify it yet. if the PG_BUSY page is being
958: * paged out then it means that someone else has already started
1.31.2.13! nathanw 959: * cleaning the page for us (how nice!). in this case, if we
1.31.2.5 nathanw 960: * have syncio specified, then after we make our pass through the
1.31.2.13! nathanw 961: * object we need to wait for the other PG_BUSY pages to clear
1.31.2.5 nathanw 962: * off (i.e. we need to do an iosync). also note that once a
963: * page is PG_BUSY it must stay in its object until it is un-busyed.
964: *
965: * note on page traversal:
966: * we can traverse the pages in an object either by going down the
967: * linked list in "uobj->memq", or we can go over the address range
968: * by page doing hash table lookups for each address. depending
1.31.2.13! nathanw 969: * on how many pages are in the object it may be cheaper to do one
1.31.2.5 nathanw 970: * or the other. we set "by_list" to true if we are using memq.
971: * if the cost of a hash lookup was equal to the cost of the list
972: * traversal we could compare the number of pages in the start->stop
973: * range to the total number of pages in the object. however, it
974: * seems that a hash table lookup is more expensive than the linked
1.31.2.13! nathanw 975: * list traversal, so we multiply the number of pages in the
1.31.2.5 nathanw 976: * range by an estimate of the relatively higher cost of the hash lookup.
1.21 chs 977: */
978:
979: int
1.31.2.13! nathanw 980: genfs_putpages(void *v)
1.21 chs 981: {
982: struct vop_putpages_args /* {
983: struct vnode *a_vp;
1.31.2.5 nathanw 984: voff_t a_offlo;
985: voff_t a_offhi;
1.21 chs 986: int a_flags;
987: } */ *ap = v;
1.31.2.5 nathanw 988: struct vnode *vp = ap->a_vp;
989: struct uvm_object *uobj = &vp->v_uobj;
1.31.2.10 nathanw 990: struct simplelock *slock = &uobj->vmobjlock;
1.31.2.5 nathanw 991: off_t startoff = ap->a_offlo;
992: off_t endoff = ap->a_offhi;
993: off_t off;
994: int flags = ap->a_flags;
995: int n = MAXBSIZE >> PAGE_SHIFT;
996: int i, s, error, npages, nback;
997: int freeflag;
998: struct vm_page *pgs[n], *pg, *nextpg, *tpg, curmp, endmp;
1.31.2.10 nathanw 999: boolean_t wasclean, by_list, needs_clean, yield;
1.31.2.5 nathanw 1000: boolean_t async = (flags & PGO_SYNCIO) == 0;
1001: UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
1002:
1003: KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1004: KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
1005: KASSERT(startoff < endoff || endoff == 0);
1006:
1007: UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1008: vp, uobj->uo_npages, startoff, endoff - startoff);
1009: if (uobj->uo_npages == 0) {
1010: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1011: (vp->v_flag & VONWORKLST)) {
1012: vp->v_flag &= ~VONWORKLST;
1013: LIST_REMOVE(vp, v_synclist);
1014: }
1.31.2.10 nathanw 1015: simple_unlock(slock);
1.31.2.13! nathanw 1016: return (0);
1.31.2.5 nathanw 1017: }
1018:
1019: /*
1020: * the vnode has pages, set up to process the request.
1021: */
1022:
1023: error = 0;
1.31.2.8 nathanw 1024: s = splbio();
1025: wasclean = (vp->v_numoutput == 0);
1026: splx(s);
1.31.2.5 nathanw 1027: off = startoff;
1028: if (endoff == 0 || flags & PGO_ALLPAGES) {
1029: endoff = trunc_page(LLONG_MAX);
1030: }
1031: by_list = (uobj->uo_npages <=
1032: ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
1033:
1034: /*
1035: * start the loop. when scanning by list, hold the last page
1036: * in the list before we start. pages allocated after we start
1037: * will be added to the end of the list, so we can stop at the
1038: * current last page.
1039: */
1040:
1041: freeflag = (curproc == uvm.pagedaemon_proc) ? PG_PAGEOUT : PG_RELEASED;
1042: curmp.uobject = uobj;
1043: curmp.offset = (voff_t)-1;
1044: curmp.flags = PG_BUSY;
1045: endmp.uobject = uobj;
1046: endmp.offset = (voff_t)-1;
1047: endmp.flags = PG_BUSY;
1048: if (by_list) {
1049: pg = TAILQ_FIRST(&uobj->memq);
1050: TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
1051: PHOLD(curproc);
1052: } else {
1053: pg = uvm_pagelookup(uobj, off);
1054: }
1055: nextpg = NULL;
1056: while (by_list || off < endoff) {
1057:
1058: /*
1059: * if the current page is not interesting, move on to the next.
1060: */
1061:
1062: KASSERT(pg == NULL || pg->uobject == uobj);
1063: KASSERT(pg == NULL ||
1.31.2.13! nathanw 1064: (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
! 1065: (pg->flags & PG_BUSY) != 0);
1.31.2.5 nathanw 1066: if (by_list) {
1067: if (pg == &endmp) {
1068: break;
1069: }
1070: if (pg->offset < startoff || pg->offset >= endoff ||
1071: pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1072: pg = TAILQ_NEXT(pg, listq);
1073: continue;
1074: }
1075: off = pg->offset;
1.31.2.13! nathanw 1076: } else if (pg == NULL ||
! 1077: pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1.31.2.5 nathanw 1078: off += PAGE_SIZE;
1079: if (off < endoff) {
1080: pg = uvm_pagelookup(uobj, off);
1081: }
1082: continue;
1083: }
1.21 chs 1084:
1.31.2.5 nathanw 1085: /*
1086: * if the current page needs to be cleaned and it's busy,
1087: * wait for it to become unbusy.
1088: */
1089:
1.31.2.12 nathanw 1090: yield = (curproc->l_cpu->ci_schedstate.spc_flags &
1091: SPCF_SHOULDYIELD) && curproc != uvm.pagedaemon_proc;
1.31.2.10 nathanw 1092: if (pg->flags & PG_BUSY || yield) {
1.31.2.5 nathanw 1093: KASSERT(curproc != uvm.pagedaemon_proc);
1094: UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
1095: if (by_list) {
1096: TAILQ_INSERT_BEFORE(pg, &curmp, listq);
1097: UVMHIST_LOG(ubchist, "curmp next %p",
1.31.2.13! nathanw 1098: TAILQ_NEXT(&curmp, listq), 0,0,0);
1.31.2.5 nathanw 1099: }
1.31.2.10 nathanw 1100: if (yield) {
1101: simple_unlock(slock);
1102: preempt(NULL);
1103: simple_lock(slock);
1104: } else {
1105: pg->flags |= PG_WANTED;
1106: UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
1107: simple_lock(slock);
1108: }
1.31.2.5 nathanw 1109: if (by_list) {
1110: UVMHIST_LOG(ubchist, "after next %p",
1.31.2.13! nathanw 1111: TAILQ_NEXT(&curmp, listq), 0,0,0);
1.31.2.5 nathanw 1112: pg = TAILQ_NEXT(&curmp, listq);
1113: TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1114: } else {
1115: pg = uvm_pagelookup(uobj, off);
1116: }
1117: continue;
1118: }
1119:
1120: /*
1.31.2.10 nathanw 1121: * if we're freeing, remove all mappings of the page now.
1122: * if we're cleaning, check if the page is needs to be cleaned.
1123: */
1124:
1125: if (flags & PGO_FREE) {
1126: pmap_page_protect(pg, VM_PROT_NONE);
1127: }
1128: if (flags & PGO_CLEANIT) {
1129: needs_clean = pmap_clear_modify(pg) ||
1.31.2.13! nathanw 1130: (pg->flags & PG_CLEAN) == 0;
1.31.2.10 nathanw 1131: pg->flags |= PG_CLEAN;
1132: } else {
1133: needs_clean = FALSE;
1134: }
1135:
1136: /*
1.31.2.5 nathanw 1137: * if we're cleaning, build a cluster.
1138: * the cluster will consist of pages which are currently dirty,
1139: * but they will be returned to us marked clean.
1140: * if not cleaning, just operate on the one page.
1141: */
1142:
1143: if (needs_clean) {
1144: wasclean = FALSE;
1145: memset(pgs, 0, sizeof(pgs));
1146: pg->flags |= PG_BUSY;
1147: UVM_PAGE_OWN(pg, "genfs_putpages");
1148:
1149: /*
1150: * first look backward.
1151: */
1152:
1153: npages = MIN(n >> 1, off >> PAGE_SHIFT);
1154: nback = npages;
1155: uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
1156: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
1157: if (nback) {
1158: memmove(&pgs[0], &pgs[npages - nback],
1159: nback * sizeof(pgs[0]));
1.31.2.10 nathanw 1160: if (npages - nback < nback)
1161: memset(&pgs[nback], 0,
1162: (npages - nback) * sizeof(pgs[0]));
1163: else
1164: memset(&pgs[npages - nback], 0,
1165: nback * sizeof(pgs[0]));
1166: n -= nback;
1.31.2.5 nathanw 1167: }
1168:
1169: /*
1170: * then plug in our page of interest.
1171: */
1172:
1173: pgs[nback] = pg;
1174:
1175: /*
1176: * then look forward to fill in the remaining space in
1177: * the array of pages.
1178: */
1179:
1180: npages = MIN(n, (endoff - off) >> PAGE_SHIFT) - 1;
1181: uvn_findpages(uobj, off + PAGE_SIZE, &npages,
1182: &pgs[nback + 1],
1183: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
1184: npages += nback + 1;
1185: } else {
1186: pgs[0] = pg;
1187: npages = 1;
1188: }
1189:
1190: /*
1191: * apply FREE or DEACTIVATE options if requested.
1192: */
1193:
1194: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1195: uvm_lock_pageq();
1196: }
1197: for (i = 0; i < npages; i++) {
1198: tpg = pgs[i];
1199: KASSERT(tpg->uobject == uobj);
1200: if (flags & PGO_DEACTIVATE &&
1201: (tpg->pqflags & PQ_INACTIVE) == 0 &&
1202: tpg->wire_count == 0) {
1203: (void) pmap_clear_reference(tpg);
1204: uvm_pagedeactivate(tpg);
1205: } else if (flags & PGO_FREE) {
1206: pmap_page_protect(tpg, VM_PROT_NONE);
1207: if (tpg->flags & PG_BUSY) {
1208: tpg->flags |= freeflag;
1209: if (freeflag == PG_PAGEOUT) {
1210: uvmexp.paging++;
1211: uvm_pagedequeue(tpg);
1212: }
1213: } else {
1214: nextpg = TAILQ_NEXT(tpg, listq);
1215: uvm_pagefree(tpg);
1216: }
1217: }
1218: }
1219: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1220: uvm_unlock_pageq();
1221: }
1222: if (needs_clean) {
1223:
1224: /*
1225: * start the i/o. if we're traversing by list,
1226: * keep our place in the list with a marker page.
1227: */
1228:
1229: if (by_list) {
1230: TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1231: listq);
1232: }
1.31.2.10 nathanw 1233: simple_unlock(slock);
1.31.2.5 nathanw 1234: error = GOP_WRITE(vp, pgs, npages, flags);
1.31.2.10 nathanw 1235: simple_lock(slock);
1.31.2.5 nathanw 1236: if (by_list) {
1237: pg = TAILQ_NEXT(&curmp, listq);
1238: TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1239: }
1240: if (error == ENOMEM) {
1241: for (i = 0; i < npages; i++) {
1242: tpg = pgs[i];
1243: if (tpg->flags & PG_PAGEOUT) {
1244: tpg->flags &= ~PG_PAGEOUT;
1245: uvmexp.paging--;
1246: }
1247: tpg->flags &= ~PG_CLEAN;
1248: uvm_pageactivate(tpg);
1249: }
1250: uvm_page_unbusy(pgs, npages);
1251: }
1252: if (error) {
1253: break;
1254: }
1255: if (by_list) {
1256: continue;
1257: }
1258: }
1259:
1260: /*
1261: * find the next page and continue if there was no error.
1262: */
1263:
1264: if (by_list) {
1265: if (nextpg) {
1266: pg = nextpg;
1267: nextpg = NULL;
1268: } else {
1269: pg = TAILQ_NEXT(pg, listq);
1270: }
1271: } else {
1.31.2.10 nathanw 1272: off += npages << PAGE_SHIFT;
1.31.2.5 nathanw 1273: if (off < endoff) {
1274: pg = uvm_pagelookup(uobj, off);
1275: }
1276: }
1277: }
1278: if (by_list) {
1279: TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1280: PRELE(curproc);
1281: }
1282:
1283: /*
1284: * if we're cleaning and there was nothing to clean,
1285: * take us off the syncer list. if we started any i/o
1286: * and we're doing sync i/o, wait for all writes to finish.
1287: */
1288:
1289: if ((flags & PGO_CLEANIT) && wasclean &&
1290: startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
1291: LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1292: (vp->v_flag & VONWORKLST)) {
1293: vp->v_flag &= ~VONWORKLST;
1294: LIST_REMOVE(vp, v_synclist);
1295: }
1296: if (!wasclean && !async) {
1297: s = splbio();
1298: while (vp->v_numoutput != 0) {
1299: vp->v_flag |= VBWAIT;
1.31.2.10 nathanw 1300: UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, FALSE,
1301: "genput2", 0);
1302: simple_lock(slock);
1.31.2.5 nathanw 1303: }
1304: splx(s);
1305: }
1306: simple_unlock(&uobj->vmobjlock);
1.31.2.13! nathanw 1307: return (error);
1.31.2.5 nathanw 1308: }
1309:
1310: int
1311: genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1312: {
1313: int s, error, run;
1314: int fs_bshift, dev_bshift;
1.21 chs 1315: vaddr_t kva;
1316: off_t eof, offset, startoffset;
1317: size_t bytes, iobytes, skipbytes;
1318: daddr_t lbn, blkno;
1319: struct vm_page *pg;
1320: struct buf *mbp, *bp;
1.31.2.4 nathanw 1321: struct vnode *devvp;
1.31.2.5 nathanw 1322: boolean_t async = (flags & PGO_SYNCIO) == 0;
1.31.2.6 nathanw 1323: UVMHIST_FUNC("genfs_gop_write"); UVMHIST_CALLED(ubchist);
1.21 chs 1324:
1.31.2.5 nathanw 1325: UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1326: vp, pgs, npages, flags);
1.21 chs 1327:
1.31.2.5 nathanw 1328: GOP_SIZE(vp, vp->v_size, &eof);
1.31.2.4 nathanw 1329: if (vp->v_type == VREG) {
1330: fs_bshift = vp->v_mount->mnt_fs_bshift;
1331: dev_bshift = vp->v_mount->mnt_dev_bshift;
1332: } else {
1333: fs_bshift = DEV_BSHIFT;
1334: dev_bshift = DEV_BSHIFT;
1335: }
1.31.2.5 nathanw 1336: error = 0;
1337: pg = pgs[0];
1.21 chs 1338: startoffset = pg->offset;
1.26 chs 1339: bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
1.21 chs 1340: skipbytes = 0;
1341: KASSERT(bytes != 0);
1342:
1.31.2.13! nathanw 1343: kva = uvm_pagermapin(pgs, npages,
! 1344: UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1.21 chs 1345:
1346: s = splbio();
1347: vp->v_numoutput += 2;
1348: mbp = pool_get(&bufpool, PR_WAITOK);
1349: UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1.31.2.13! nathanw 1350: vp, mbp, vp->v_numoutput, bytes);
1.21 chs 1351: splx(s);
1352: mbp->b_bufsize = npages << PAGE_SHIFT;
1353: mbp->b_data = (void *)kva;
1354: mbp->b_resid = mbp->b_bcount = bytes;
1.31.2.8 nathanw 1355: mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? (B_CALL|B_ASYNC) : 0);
1.21 chs 1356: mbp->b_iodone = uvm_aio_biodone;
1357: mbp->b_vp = vp;
1358: LIST_INIT(&mbp->b_dep);
1359:
1360: bp = NULL;
1361: for (offset = startoffset;
1.31.2.13! nathanw 1362: bytes > 0;
! 1363: offset += iobytes, bytes -= iobytes) {
1.21 chs 1364: lbn = offset >> fs_bshift;
1.31.2.4 nathanw 1365: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21 chs 1366: if (error) {
1367: UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
1368: skipbytes += bytes;
1369: bytes = 0;
1370: break;
1371: }
1372:
1.26 chs 1373: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1374: bytes);
1.21 chs 1375: if (blkno == (daddr_t)-1) {
1376: skipbytes += iobytes;
1377: continue;
1378: }
1379:
1380: /* if it's really one i/o, don't make a second buf */
1381: if (offset == startoffset && iobytes == bytes) {
1382: bp = mbp;
1383: } else {
1384: s = splbio();
1385: vp->v_numoutput++;
1386: bp = pool_get(&bufpool, PR_WAITOK);
1387: UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1.31.2.13! nathanw 1388: vp, bp, vp->v_numoutput, 0);
1.21 chs 1389: splx(s);
1390: bp->b_data = (char *)kva +
1.31.2.13! nathanw 1391: (vaddr_t)(offset - pg->offset);
1.21 chs 1392: bp->b_resid = bp->b_bcount = iobytes;
1.31.2.8 nathanw 1393: bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
1.21 chs 1394: bp->b_iodone = uvm_aio_biodone1;
1395: bp->b_vp = vp;
1396: LIST_INIT(&bp->b_dep);
1397: }
1398: bp->b_lblkno = 0;
1399: bp->b_private = mbp;
1.31.2.5 nathanw 1400: if (devvp->v_type == VBLK) {
1401: bp->b_dev = devvp->v_rdev;
1402: }
1.21 chs 1403:
1404: /* adjust physical blkno for partial blocks */
1.25 fvdl 1405: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.31.2.13! nathanw 1406: dev_bshift);
! 1407: UVMHIST_LOG(ubchist,
! 1408: "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
! 1409: vp, offset, bp->b_bcount, bp->b_blkno);
1.21 chs 1410: VOP_STRATEGY(bp);
1411: }
1412: if (skipbytes) {
1.29 chs 1413: UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1.21 chs 1414: s = splbio();
1.29 chs 1415: if (error) {
1416: mbp->b_flags |= B_ERROR;
1417: mbp->b_error = error;
1418: }
1.31.2.5 nathanw 1419: mbp->b_resid -= skipbytes;
1.21 chs 1420: if (mbp->b_resid == 0) {
1421: biodone(mbp);
1422: }
1423: splx(s);
1424: }
1425: if (async) {
1.31.2.2 nathanw 1426: UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1.31.2.13! nathanw 1427: return (0);
1.21 chs 1428: }
1.31.2.5 nathanw 1429: UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1430: error = biowait(mbp);
1431: uvm_aio_aiodone(mbp);
1.21 chs 1432: UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1.31.2.13! nathanw 1433: return (error);
1.21 chs 1434: }
1435:
1.31.2.8 nathanw 1436: /*
1437: * VOP_PUTPAGES() for vnodes which never have pages.
1438: */
1439:
1440: int
1441: genfs_null_putpages(void *v)
1442: {
1443: struct vop_putpages_args /* {
1444: struct vnode *a_vp;
1445: voff_t a_offlo;
1446: voff_t a_offhi;
1447: int a_flags;
1448: } */ *ap = v;
1449: struct vnode *vp = ap->a_vp;
1450:
1451: KASSERT(vp->v_uobj.uo_npages == 0);
1452: simple_unlock(&vp->v_interlock);
1453: return (0);
1454: }
1455:
1.31.2.5 nathanw 1456: void
1457: genfs_node_init(struct vnode *vp, struct genfs_ops *ops)
1458: {
1459: struct genfs_node *gp = VTOG(vp);
1460:
1461: lockinit(&gp->g_glock, PINOD, "glock", 0, 0);
1462: gp->g_op = ops;
1463: }
1464:
1465: void
1466: genfs_size(struct vnode *vp, off_t size, off_t *eobp)
1.21 chs 1467: {
1468: int bsize;
1469:
1.31.2.5 nathanw 1470: bsize = 1 << vp->v_mount->mnt_fs_bshift;
1471: *eobp = (size + bsize - 1) & ~(bsize - 1);
1.31.2.8 nathanw 1472: }
1473:
1474: int
1475: genfs_compat_getpages(void *v)
1476: {
1477: struct vop_getpages_args /* {
1478: struct vnode *a_vp;
1479: voff_t a_offset;
1480: struct vm_page **a_m;
1481: int *a_count;
1482: int a_centeridx;
1483: vm_prot_t a_access_type;
1484: int a_advice;
1485: int a_flags;
1486: } */ *ap = v;
1487:
1488: off_t origoffset;
1489: struct vnode *vp = ap->a_vp;
1490: struct uvm_object *uobj = &vp->v_uobj;
1491: struct vm_page *pg, **pgs;
1492: vaddr_t kva;
1493: int i, error, orignpages, npages;
1494: struct iovec iov;
1495: struct uio uio;
1.31.2.9 nathanw 1496: struct ucred *cred = curproc->l_proc->p_ucred;
1.31.2.8 nathanw 1497: boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
1498:
1499: error = 0;
1500: origoffset = ap->a_offset;
1501: orignpages = *ap->a_count;
1502: pgs = ap->a_m;
1503:
1504: if (write && (vp->v_flag & VONWORKLST) == 0) {
1505: vn_syncer_add_to_worklist(vp, filedelay);
1506: }
1507: if (ap->a_flags & PGO_LOCKED) {
1508: uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1.31.2.13! nathanw 1509: UFP_NOWAIT|UFP_NOALLOC| (write ? UFP_NORDONLY : 0));
1.31.2.8 nathanw 1510:
1.31.2.13! nathanw 1511: return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1.31.2.8 nathanw 1512: }
1513: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
1514: simple_unlock(&uobj->vmobjlock);
1.31.2.13! nathanw 1515: return (EINVAL);
1.31.2.8 nathanw 1516: }
1517: npages = orignpages;
1518: uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
1519: simple_unlock(&uobj->vmobjlock);
1.31.2.13! nathanw 1520: kva = uvm_pagermapin(pgs, npages,
! 1521: UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1.31.2.8 nathanw 1522: for (i = 0; i < npages; i++) {
1523: pg = pgs[i];
1524: if ((pg->flags & PG_FAKE) == 0) {
1525: continue;
1526: }
1527: iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
1528: iov.iov_len = PAGE_SIZE;
1529: uio.uio_iov = &iov;
1530: uio.uio_iovcnt = 1;
1531: uio.uio_offset = origoffset + (i << PAGE_SHIFT);
1532: uio.uio_segflg = UIO_SYSSPACE;
1533: uio.uio_rw = UIO_READ;
1534: uio.uio_resid = PAGE_SIZE;
1.31.2.9 nathanw 1535: uio.uio_procp = curproc->l_proc;
1.31.2.8 nathanw 1536: error = VOP_READ(vp, &uio, 0, cred);
1537: if (error) {
1538: break;
1.31.2.12 nathanw 1539: }
1540: if (uio.uio_resid) {
1541: memset(iov.iov_base, 0, uio.uio_resid);
1.31.2.8 nathanw 1542: }
1543: }
1544: uvm_pagermapout(kva, npages);
1545: simple_lock(&uobj->vmobjlock);
1546: uvm_lock_pageq();
1547: for (i = 0; i < npages; i++) {
1548: pg = pgs[i];
1549: if (error && (pg->flags & PG_FAKE) != 0) {
1550: pg->flags |= PG_RELEASED;
1551: } else {
1552: pmap_clear_modify(pg);
1553: uvm_pageactivate(pg);
1554: }
1555: }
1556: if (error) {
1557: uvm_page_unbusy(pgs, npages);
1558: }
1559: uvm_unlock_pageq();
1560: simple_unlock(&uobj->vmobjlock);
1.31.2.13! nathanw 1561: return (error);
1.31.2.8 nathanw 1562: }
1563:
1564: int
1565: genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
1566: int flags)
1567: {
1568: off_t offset;
1569: struct iovec iov;
1570: struct uio uio;
1.31.2.9 nathanw 1571: struct ucred *cred = curproc->l_proc->p_ucred;
1.31.2.8 nathanw 1572: struct buf *bp;
1573: vaddr_t kva;
1574: int s, error;
1575:
1576: offset = pgs[0]->offset;
1.31.2.13! nathanw 1577: kva = uvm_pagermapin(pgs, npages,
! 1578: UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1.31.2.8 nathanw 1579:
1580: iov.iov_base = (void *)kva;
1581: iov.iov_len = npages << PAGE_SHIFT;
1582: uio.uio_iov = &iov;
1583: uio.uio_iovcnt = npages;
1584: uio.uio_offset = offset;
1585: uio.uio_segflg = UIO_SYSSPACE;
1586: uio.uio_rw = UIO_WRITE;
1587: uio.uio_resid = npages << PAGE_SHIFT;
1.31.2.9 nathanw 1588: uio.uio_procp = curproc->l_proc;
1.31.2.8 nathanw 1589: error = VOP_WRITE(vp, &uio, 0, cred);
1590:
1591: s = splbio();
1592: vp->v_numoutput++;
1593: bp = pool_get(&bufpool, PR_WAITOK);
1594: splx(s);
1595:
1596: bp->b_flags = B_BUSY | B_WRITE | B_AGE;
1597: bp->b_vp = vp;
1598: bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
1599: bp->b_data = (char *)kva;
1600: bp->b_bcount = npages << PAGE_SHIFT;
1601: bp->b_bufsize = npages << PAGE_SHIFT;
1602: bp->b_resid = 0;
1603: LIST_INIT(&bp->b_dep);
1604: if (error) {
1605: bp->b_flags |= B_ERROR;
1606: bp->b_error = error;
1607: }
1608: uvm_aio_aiodone(bp);
1.31.2.13! nathanw 1609: return (error);
1.1 mycroft 1610: }
CVSweb <webmaster@jp.NetBSD.org>