Annotation of src/sys/miscfs/genfs/genfs_vnops.c, Revision 1.28
1.27 fvdl 1: /* $NetBSD: genfs_vnops.c,v 1.26 2001/02/05 12:26:08 chs Exp $ */
1.6 fvdl 2:
3: /*
4: * Copyright (c) 1982, 1986, 1989, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: * 3. All advertising materials mentioning features or use of this software
16: * must display the following acknowledgement:
17: * This product includes software developed by the University of
18: * California, Berkeley and its contributors.
19: * 4. Neither the name of the University nor the names of its contributors
20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: *
35: */
1.5 perry 36:
1.8 thorpej 37: #include "opt_nfsserver.h"
38:
1.1 mycroft 39: #include <sys/param.h>
40: #include <sys/systm.h>
1.6 fvdl 41: #include <sys/proc.h>
1.1 mycroft 42: #include <sys/kernel.h>
43: #include <sys/mount.h>
44: #include <sys/namei.h>
45: #include <sys/vnode.h>
1.13 wrstuden 46: #include <sys/fcntl.h>
1.1 mycroft 47: #include <sys/malloc.h>
1.3 mycroft 48: #include <sys/poll.h>
1.1 mycroft 49:
50: #include <miscfs/genfs/genfs.h>
1.6 fvdl 51: #include <miscfs/specfs/specdev.h>
1.1 mycroft 52:
1.21 chs 53: #include <uvm/uvm.h>
54: #include <uvm/uvm_pager.h>
55:
1.8 thorpej 56: #ifdef NFSSERVER
57: #include <nfs/rpcv2.h>
58: #include <nfs/nfsproto.h>
59: #include <nfs/nfs.h>
60: #include <nfs/nqnfs.h>
61: #include <nfs/nfs_var.h>
62: #endif
63:
1.1 mycroft 64: int
1.3 mycroft 65: genfs_poll(v)
1.1 mycroft 66: void *v;
67: {
1.3 mycroft 68: struct vop_poll_args /* {
1.1 mycroft 69: struct vnode *a_vp;
1.3 mycroft 70: int a_events;
1.1 mycroft 71: struct proc *a_p;
72: } */ *ap = v;
73:
1.3 mycroft 74: return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1.1 mycroft 75: }
76:
77: int
78: genfs_fsync(v)
79: void *v;
80: {
81: struct vop_fsync_args /* {
82: struct vnode *a_vp;
83: struct ucred *a_cred;
1.7 kleink 84: int a_flags;
1.20 fvdl 85: off_t offlo;
86: off_t offhi;
1.1 mycroft 87: struct proc *a_p;
88: } */ *ap = v;
1.16 augustss 89: struct vnode *vp = ap->a_vp;
1.11 mycroft 90: int wait;
1.1 mycroft 91:
1.11 mycroft 92: wait = (ap->a_flags & FSYNC_WAIT) != 0;
93: vflushbuf(vp, wait);
94: if ((ap->a_flags & FSYNC_DATAONLY) != 0)
1.7 kleink 95: return (0);
1.11 mycroft 96: else
1.18 mycroft 97: return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0));
1.1 mycroft 98: }
99:
100: int
1.4 kleink 101: genfs_seek(v)
102: void *v;
103: {
104: struct vop_seek_args /* {
105: struct vnode *a_vp;
106: off_t a_oldoff;
107: off_t a_newoff;
108: struct ucred *a_ucred;
109: } */ *ap = v;
110:
111: if (ap->a_newoff < 0)
112: return (EINVAL);
113:
114: return (0);
115: }
116:
117: int
1.1 mycroft 118: genfs_abortop(v)
119: void *v;
120: {
121: struct vop_abortop_args /* {
122: struct vnode *a_dvp;
123: struct componentname *a_cnp;
124: } */ *ap = v;
125:
126: if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1.19 thorpej 127: PNBUF_PUT(ap->a_cnp->cn_pnbuf);
1.1 mycroft 128: return (0);
1.13 wrstuden 129: }
130:
131: int
132: genfs_fcntl(v)
133: void *v;
134: {
135: struct vop_fcntl_args /* {
136: struct vnode *a_vp;
137: u_int a_command;
138: caddr_t a_data;
139: int a_fflag;
140: struct ucred *a_cred;
141: struct proc *a_p;
142: } */ *ap = v;
143:
144: if (ap->a_command == F_SETFL)
145: return (0);
146: else
147: return (EOPNOTSUPP);
1.1 mycroft 148: }
149:
150: /*ARGSUSED*/
151: int
152: genfs_badop(v)
153: void *v;
154: {
155:
156: panic("genfs: bad op");
157: }
158:
159: /*ARGSUSED*/
160: int
161: genfs_nullop(v)
162: void *v;
163: {
164:
165: return (0);
1.10 kleink 166: }
167:
168: /*ARGSUSED*/
169: int
170: genfs_einval(v)
171: void *v;
172: {
173:
174: return (EINVAL);
1.1 mycroft 175: }
176:
177: /*ARGSUSED*/
178: int
179: genfs_eopnotsupp(v)
180: void *v;
181: {
182:
183: return (EOPNOTSUPP);
184: }
185:
1.12 wrstuden 186: /*
187: * Called when an fs doesn't support a particular vop but the vop needs to
188: * vrele, vput, or vunlock passed in vnodes.
189: */
190: int
191: genfs_eopnotsupp_rele(v)
192: void *v;
193: {
194: struct vop_generic_args /*
195: struct vnodeop_desc *a_desc;
196: / * other random data follows, presumably * /
197: } */ *ap = v;
198: struct vnodeop_desc *desc = ap->a_desc;
199: struct vnode *vp;
200: int flags, i, j, offset;
201:
202: flags = desc->vdesc_flags;
203: for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
204: if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
205: break; /* stop at end of list */
206: if ((j = flags & VDESC_VP0_WILLPUT)) {
207: vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap);
208: switch (j) {
209: case VDESC_VP0_WILLPUT:
210: vput(vp);
211: break;
212: case VDESC_VP0_WILLUNLOCK:
213: VOP_UNLOCK(vp, 0);
214: break;
215: case VDESC_VP0_WILLRELE:
216: vrele(vp);
217: break;
218: }
219: }
220: }
221:
222: return (EOPNOTSUPP);
223: }
224:
1.1 mycroft 225: /*ARGSUSED*/
226: int
227: genfs_ebadf(v)
228: void *v;
229: {
230:
231: return (EBADF);
1.9 matthias 232: }
233:
234: /* ARGSUSED */
235: int
236: genfs_enoioctl(v)
237: void *v;
238: {
239:
240: return (ENOTTY);
1.6 fvdl 241: }
242:
243:
244: /*
1.15 fvdl 245: * Eliminate all activity associated with the requested vnode
1.6 fvdl 246: * and with all vnodes aliased to the requested vnode.
247: */
248: int
249: genfs_revoke(v)
250: void *v;
251: {
252: struct vop_revoke_args /* {
253: struct vnode *a_vp;
254: int a_flags;
255: } */ *ap = v;
256: struct vnode *vp, *vq;
257: struct proc *p = curproc; /* XXX */
258:
259: #ifdef DIAGNOSTIC
260: if ((ap->a_flags & REVOKEALL) == 0)
261: panic("genfs_revoke: not revokeall");
262: #endif
263:
264: vp = ap->a_vp;
265: simple_lock(&vp->v_interlock);
266:
267: if (vp->v_flag & VALIASED) {
268: /*
269: * If a vgone (or vclean) is already in progress,
270: * wait until it is done and return.
271: */
272: if (vp->v_flag & VXLOCK) {
273: vp->v_flag |= VXWANT;
274: simple_unlock(&vp->v_interlock);
275: tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
276: return (0);
277: }
278: /*
279: * Ensure that vp will not be vgone'd while we
280: * are eliminating its aliases.
281: */
282: vp->v_flag |= VXLOCK;
283: simple_unlock(&vp->v_interlock);
284: while (vp->v_flag & VALIASED) {
285: simple_lock(&spechash_slock);
286: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
287: if (vq->v_rdev != vp->v_rdev ||
288: vq->v_type != vp->v_type || vp == vq)
289: continue;
290: simple_unlock(&spechash_slock);
291: vgone(vq);
292: break;
293: }
294: if (vq == NULLVP)
295: simple_unlock(&spechash_slock);
296: }
297: /*
298: * Remove the lock so that vgone below will
299: * really eliminate the vnode after which time
300: * vgone will awaken any sleepers.
301: */
302: simple_lock(&vp->v_interlock);
303: vp->v_flag &= ~VXLOCK;
304: }
305: vgonel(vp, p);
306: return (0);
307: }
308:
309: /*
1.12 wrstuden 310: * Lock the node.
1.6 fvdl 311: */
312: int
1.12 wrstuden 313: genfs_lock(v)
1.6 fvdl 314: void *v;
315: {
316: struct vop_lock_args /* {
317: struct vnode *a_vp;
318: int a_flags;
319: } */ *ap = v;
320: struct vnode *vp = ap->a_vp;
321:
1.12 wrstuden 322: return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock));
1.6 fvdl 323: }
324:
325: /*
1.12 wrstuden 326: * Unlock the node.
1.6 fvdl 327: */
328: int
1.12 wrstuden 329: genfs_unlock(v)
1.6 fvdl 330: void *v;
331: {
332: struct vop_unlock_args /* {
333: struct vnode *a_vp;
334: int a_flags;
335: } */ *ap = v;
336: struct vnode *vp = ap->a_vp;
337:
1.12 wrstuden 338: return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE,
339: &vp->v_interlock));
1.6 fvdl 340: }
341:
342: /*
1.12 wrstuden 343: * Return whether or not the node is locked.
1.6 fvdl 344: */
345: int
1.12 wrstuden 346: genfs_islocked(v)
1.6 fvdl 347: void *v;
348: {
349: struct vop_islocked_args /* {
350: struct vnode *a_vp;
351: } */ *ap = v;
352: struct vnode *vp = ap->a_vp;
353:
1.12 wrstuden 354: return (lockstatus(&vp->v_lock));
355: }
356:
357: /*
358: * Stubs to use when there is no locking to be done on the underlying object.
359: */
360: int
361: genfs_nolock(v)
362: void *v;
363: {
364: struct vop_lock_args /* {
365: struct vnode *a_vp;
366: int a_flags;
367: struct proc *a_p;
368: } */ *ap = v;
369:
370: /*
371: * Since we are not using the lock manager, we must clear
372: * the interlock here.
373: */
374: if (ap->a_flags & LK_INTERLOCK)
375: simple_unlock(&ap->a_vp->v_interlock);
376: return (0);
377: }
378:
379: int
380: genfs_nounlock(v)
381: void *v;
382: {
383: return (0);
384: }
385:
386: int
387: genfs_noislocked(v)
388: void *v;
389: {
390: return (0);
1.8 thorpej 391: }
392:
393: /*
394: * Local lease check for NFS servers. Just set up args and let
395: * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel,
396: * this is a null operation.
397: */
398: int
399: genfs_lease_check(v)
400: void *v;
401: {
402: #ifdef NFSSERVER
403: struct vop_lease_args /* {
404: struct vnode *a_vp;
405: struct proc *a_p;
406: struct ucred *a_cred;
407: int a_flag;
408: } */ *ap = v;
409: u_int32_t duration = 0;
410: int cache;
411: u_quad_t frev;
412:
413: (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
414: NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
415: return (0);
416: #else
417: return (0);
418: #endif /* NFSSERVER */
1.21 chs 419: }
420:
421: /*
422: * generic VM getpages routine.
423: * Return PG_BUSY pages for the given range,
424: * reading from backing store if necessary.
425: */
426:
427: int
428: genfs_getpages(v)
429: void *v;
430: {
431: struct vop_getpages_args /* {
432: struct vnode *a_vp;
433: voff_t a_offset;
434: vm_page_t *a_m;
435: int *a_count;
436: int a_centeridx;
437: vm_prot_t a_access_type;
438: int a_advice;
439: int a_flags;
440: } */ *ap = v;
441:
1.26 chs 442: off_t newsize, eof;
443: off_t offset, origoffset, startoffset, endoffset, raoffset;
1.21 chs 444: daddr_t lbn, blkno;
445: int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
446: int fs_bshift, fs_bsize, dev_bshift, dev_bsize;
447: int flags = ap->a_flags;
448: size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
449: vaddr_t kva;
450: struct buf *bp, *mbp;
451: struct vnode *vp = ap->a_vp;
452: struct uvm_object *uobj = &vp->v_uvm.u_obj;
453: struct vm_page *pgs[16]; /* XXXUBC 16 */
454: struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */
455: boolean_t async = (flags & PGO_SYNCIO) == 0;
456: boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
457: boolean_t sawhole = FALSE;
458: UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
459:
460: /* XXXUBC temp limit */
461: if (*ap->a_count > 16) {
462: return EINVAL;
463: }
464:
1.26 chs 465: error = 0;
466: origoffset = ap->a_offset;
467: orignpages = *ap->a_count;
468: if (flags & PGO_PASTEOF) {
469: newsize = MAX(vp->v_uvm.u_size,
470: origoffset + (orignpages << PAGE_SHIFT));
471: } else {
472: newsize = vp->v_uvm.u_size;
473: }
474: error = VOP_SIZE(vp, newsize, &eof);
1.21 chs 475: if (error) {
476: return error;
477: }
478:
479: #ifdef DIAGNOSTIC
480: if (ap->a_centeridx < 0 || ap->a_centeridx > *ap->a_count) {
481: panic("genfs_getpages: centeridx %d out of range",
482: ap->a_centeridx);
483: }
1.26 chs 484: if (origoffset & (PAGE_SIZE - 1) || origoffset < 0) {
1.21 chs 485: panic("genfs_getpages: offset 0x%x", (int)ap->a_offset);
486: }
487: if (*ap->a_count < 0) {
488: panic("genfs_getpages: count %d < 0", *ap->a_count);
489: }
490: #endif
491:
492: /*
493: * Bounds-check the request.
494: */
495:
496: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= eof &&
497: (flags & PGO_PASTEOF) == 0) {
498: if ((flags & PGO_LOCKED) == 0) {
499: simple_unlock(&uobj->vmobjlock);
500: }
501: UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
502: origoffset, *ap->a_count, eof,0);
503: return EINVAL;
504: }
505:
506: /*
507: * For PGO_LOCKED requests, just return whatever's in memory.
508: */
509:
510: if (flags & PGO_LOCKED) {
511: uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
512: UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
513:
514: return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
515: }
516:
517: /* vnode is VOP_LOCKed, uobj is locked */
518:
519: if (write && (vp->v_flag & VONWORKLST) == 0) {
520: vn_syncer_add_to_worklist(vp, filedelay);
521: }
522:
523: /*
524: * find the requested pages and make some simple checks.
525: * leave space in the page array for a whole block.
526: */
527:
528: fs_bshift = vp->v_mount->mnt_fs_bshift;
529: fs_bsize = 1 << fs_bshift;
530: dev_bshift = vp->v_mount->mnt_dev_bshift;
531: dev_bsize = 1 << dev_bshift;
532: KASSERT((eof & (dev_bsize - 1)) == 0);
533:
1.26 chs 534: if ((flags & PGO_PASTEOF) == 0) {
535: orignpages = MIN(orignpages,
536: round_page(eof - origoffset) >> PAGE_SHIFT);
1.21 chs 537: }
538: npages = orignpages;
539: startoffset = origoffset & ~(fs_bsize - 1);
540: endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
541: + fs_bsize - 1) & ~(fs_bsize - 1));
1.26 chs 542: if ((flags & PGO_PASTEOF) == 0) {
543: endoffset = MIN(endoffset, round_page(eof));
544: }
1.21 chs 545: ridx = (origoffset - startoffset) >> PAGE_SHIFT;
546:
547: memset(pgs, 0, sizeof(pgs));
548: uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
549:
550: /*
551: * if PGO_OVERWRITE is set, don't bother reading the pages.
552: * PGO_OVERWRITE also means that the caller guarantees
553: * that the pages already have backing store allocated.
554: */
555:
556: if (flags & PGO_OVERWRITE) {
557: UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
558:
559: for (i = 0; i < npages; i++) {
560: struct vm_page *pg = pgs[ridx + i];
561:
562: if (pg->flags & PG_FAKE) {
563: uvm_pagezero(pg);
564: pg->flags &= ~(PG_FAKE);
565: }
566: pg->flags &= ~(PG_RDONLY);
567: }
1.26 chs 568: npages += ridx;
1.21 chs 569: goto out;
570: }
571:
572: /*
573: * if the pages are already resident, just return them.
574: */
575:
576: for (i = 0; i < npages; i++) {
577: struct vm_page *pg = pgs[ridx + i];
578:
579: if ((pg->flags & PG_FAKE) ||
580: (write && (pg->flags & PG_RDONLY))) {
581: break;
582: }
583: }
584: if (i == npages) {
585: UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
586: raoffset = origoffset + (orignpages << PAGE_SHIFT);
1.26 chs 587: npages += ridx;
1.21 chs 588: goto raout;
589: }
590:
591: /*
592: * the page wasn't resident and we're not overwriting,
593: * so we're going to have to do some i/o.
594: * find any additional pages needed to cover the expanded range.
595: */
596:
597: if (startoffset != origoffset) {
598:
599: /*
600: * XXXUBC we need to avoid deadlocks caused by locking
601: * additional pages at lower offsets than pages we
602: * already have locked. for now, unlock them all and
603: * start over.
604: */
605:
606: for (i = 0; i < npages; i++) {
607: struct vm_page *pg = pgs[ridx + i];
608:
609: if (pg->flags & PG_FAKE) {
610: pg->flags |= PG_RELEASED;
611: }
612: }
613: uvm_page_unbusy(&pgs[ridx], npages);
614: memset(pgs, 0, sizeof(pgs));
615:
616: UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
617: startoffset, endoffset, 0,0);
618: npages = (endoffset - startoffset) >> PAGE_SHIFT;
619: npgs = npages;
620: uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
621: }
622: simple_unlock(&uobj->vmobjlock);
623:
624: /*
625: * read the desired page(s).
626: */
627:
628: totalbytes = npages << PAGE_SHIFT;
1.26 chs 629: bytes = MIN(totalbytes, eof - startoffset);
1.21 chs 630: tailbytes = totalbytes - bytes;
631: skipbytes = 0;
632:
633: kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
634: UVMPAGER_MAPIN_READ);
635:
636: s = splbio();
637: mbp = pool_get(&bufpool, PR_WAITOK);
638: splx(s);
639: mbp->b_bufsize = totalbytes;
640: mbp->b_data = (void *)kva;
641: mbp->b_resid = mbp->b_bcount = bytes;
642: mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
643: mbp->b_iodone = uvm_aio_biodone;
644: mbp->b_vp = vp;
645: LIST_INIT(&mbp->b_dep);
646:
647: /*
648: * if EOF is in the middle of the last page, zero the part past EOF.
649: */
650:
1.23 chs 651: if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) {
1.21 chs 652: memset((void *)(kva + bytes), 0, tailbytes);
653: }
654:
655: /*
656: * now loop over the pages, reading as needed.
657: */
658:
659: if (write) {
660: lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL);
661: } else {
662: lockmgr(&vp->v_glock, LK_SHARED, NULL);
663: }
664:
665: bp = NULL;
666: for (offset = startoffset;
667: bytes > 0;
668: offset += iobytes, bytes -= iobytes) {
669:
670: /*
671: * skip pages which don't need to be read.
672: */
673:
674: pidx = (offset - startoffset) >> PAGE_SHIFT;
675: while ((pgs[pidx]->flags & PG_FAKE) == 0) {
676: size_t b;
677:
1.24 chs 678: KASSERT((offset & (PAGE_SIZE - 1)) == 0);
1.26 chs 679: b = MIN(PAGE_SIZE, bytes);
1.21 chs 680: offset += b;
681: bytes -= b;
682: skipbytes += b;
683: pidx++;
684: UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
685: offset, 0,0,0);
686: if (bytes == 0) {
687: goto loopdone;
688: }
689: }
690:
691: /*
692: * bmap the file to find out the blkno to read from and
693: * how much we can read in one i/o. if bmap returns an error,
694: * skip the rest of the top-level i/o.
695: */
696:
697: lbn = offset >> fs_bshift;
698: error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
699: if (error) {
700: UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
701: lbn, error,0,0);
702: skipbytes += bytes;
703: goto loopdone;
704: }
705:
706: /*
707: * see how many pages can be read with this i/o.
708: * reduce the i/o size if necessary to avoid
709: * overwriting pages with valid data.
710: */
711:
1.26 chs 712: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
713: bytes);
1.21 chs 714: if (offset + iobytes > round_page(offset)) {
715: pcount = 1;
716: while (pidx + pcount < npages &&
717: pgs[pidx + pcount]->flags & PG_FAKE) {
718: pcount++;
719: }
1.26 chs 720: iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
1.21 chs 721: (offset - trunc_page(offset)));
722: }
723:
724: /*
725: * if this block isn't allocated, zero it instead of reading it.
726: * if this is a read access, mark the pages we zeroed PG_RDONLY.
727: */
728:
729: if (blkno < 0) {
730: UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
731:
732: sawhole = TRUE;
733: memset((char *)kva + (offset - startoffset), 0,
734: iobytes);
735: skipbytes += iobytes;
736:
737: if (!write) {
738: int holepages =
739: (round_page(offset + iobytes) -
740: trunc_page(offset)) >> PAGE_SHIFT;
741: for (i = 0; i < holepages; i++) {
742: pgs[pidx + i]->flags |= PG_RDONLY;
743: }
744: }
745: continue;
746: }
747:
748: /*
749: * allocate a sub-buf for this piece of the i/o
750: * (or just use mbp if there's only 1 piece),
751: * and start it going.
752: */
753:
754: if (offset == startoffset && iobytes == bytes) {
755: bp = mbp;
756: } else {
757: s = splbio();
758: bp = pool_get(&bufpool, PR_WAITOK);
759: splx(s);
760: bp->b_data = (char *)kva + offset - startoffset;
761: bp->b_resid = bp->b_bcount = iobytes;
762: bp->b_flags = B_BUSY|B_READ|B_CALL;
763: bp->b_iodone = uvm_aio_biodone1;
764: bp->b_vp = vp;
765: LIST_INIT(&bp->b_dep);
766: }
767: bp->b_lblkno = 0;
768: bp->b_private = mbp;
769:
770: /* adjust physical blkno for partial blocks */
1.25 fvdl 771: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.21 chs 772: dev_bshift);
773:
774: UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
775: bp, offset, iobytes, bp->b_blkno);
776:
777: VOP_STRATEGY(bp);
778: }
779:
780: loopdone:
781: if (skipbytes) {
782: s = splbio();
783: if (error) {
784: mbp->b_flags |= B_ERROR;
785: mbp->b_error = error;
786: }
787: mbp->b_resid -= skipbytes;
788: if (mbp->b_resid == 0) {
789: biodone(mbp);
790: }
791: splx(s);
792: }
793:
794: if (async) {
795: UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);
796: lockmgr(&vp->v_glock, LK_RELEASE, NULL);
797: return EINPROGRESS;
798: }
799: if (bp != NULL) {
800: error = biowait(mbp);
801: }
802: s = splbio();
803: pool_put(&bufpool, mbp);
804: splx(s);
805: uvm_pagermapout(kva, npages);
1.24 chs 806: raoffset = startoffset + totalbytes;
1.21 chs 807:
808: /*
809: * if this we encountered a hole then we have to do a little more work.
810: * for read faults, we marked the page PG_RDONLY so that future
811: * write accesses to the page will fault again.
812: * for write faults, we must make sure that the backing store for
813: * the page is completely allocated while the pages are locked.
814: */
815:
816: if (error == 0 && sawhole && write) {
817: error = VOP_BALLOCN(vp, startoffset, npages << PAGE_SHIFT,
818: cred, 0);
819: if (error) {
820: UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d",
821: lbn, error,0,0);
822: lockmgr(&vp->v_glock, LK_RELEASE, NULL);
823: simple_lock(&uobj->vmobjlock);
824: goto out;
825: }
826: }
827: lockmgr(&vp->v_glock, LK_RELEASE, NULL);
828: simple_lock(&uobj->vmobjlock);
829:
830: /*
831: * see if we want to start any readahead.
832: * XXXUBC for now, just read the next 128k on 64k boundaries.
833: * this is pretty nonsensical, but it is 50% faster than reading
834: * just the next 64k.
835: */
836:
837: raout:
1.24 chs 838: if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
1.21 chs 839: PAGE_SHIFT <= 16) {
840: int racount;
841:
842: racount = 1 << (16 - PAGE_SHIFT);
843: (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
844: VM_PROT_READ, 0, 0);
845: simple_lock(&uobj->vmobjlock);
846:
847: racount = 1 << (16 - PAGE_SHIFT);
848: (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0,
849: VM_PROT_READ, 0, 0);
850: simple_lock(&uobj->vmobjlock);
851: }
852:
853: /*
854: * we're almost done! release the pages...
855: * for errors, we free the pages.
856: * otherwise we activate them and mark them as valid and clean.
857: * also, unbusy pages that were not actually requested.
858: */
859:
860: out:
861: if (error) {
862: uvm_lock_pageq();
863: for (i = 0; i < npages; i++) {
864: if (pgs[i] == NULL) {
865: continue;
866: }
867: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
868: pgs[i], pgs[i]->flags, 0,0);
1.26 chs 869: if (pgs[i]->flags & PG_WANTED) {
870: wakeup(pgs[i]);
871: }
872: if (pgs[i]->flags & PG_RELEASED) {
873: uvm_unlock_pageq();
874: (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
875: uvm_lock_pageq();
1.21 chs 876: continue;
877: }
1.26 chs 878: if (pgs[i]->flags & PG_FAKE) {
879: uvm_pagefree(pgs[i]);
1.21 chs 880: }
881: }
882: uvm_unlock_pageq();
883: simple_unlock(&uobj->vmobjlock);
884: UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
885: return error;
886: }
887:
888: UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
1.26 chs 889: uvm_lock_pageq();
1.21 chs 890: for (i = 0; i < npages; i++) {
891: if (pgs[i] == NULL) {
892: continue;
893: }
894: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
895: pgs[i], pgs[i]->flags, 0,0);
896: if (pgs[i]->flags & PG_FAKE) {
897: UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
898: pgs[i], pgs[i]->offset,0,0);
899: pgs[i]->flags &= ~(PG_FAKE);
900: pmap_clear_modify(pgs[i]);
901: pmap_clear_reference(pgs[i]);
902: }
903: if (write) {
904: pgs[i]->flags &= ~(PG_RDONLY);
905: }
906: if (i < ridx || i >= ridx + orignpages || async) {
907: UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
908: pgs[i], pgs[i]->offset,0,0);
909: if (pgs[i]->flags & PG_WANTED) {
910: wakeup(pgs[i]);
911: }
1.26 chs 912: if (pgs[i]->flags & PG_RELEASED) {
913: uvm_unlock_pageq();
914: (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
915: uvm_lock_pageq();
916: continue;
1.21 chs 917: }
1.26 chs 918: uvm_pageactivate(pgs[i]);
1.21 chs 919: pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
920: UVM_PAGE_OWN(pgs[i], NULL);
921: }
922: }
1.26 chs 923: uvm_unlock_pageq();
1.21 chs 924: simple_unlock(&uobj->vmobjlock);
925: if (ap->a_m != NULL) {
926: memcpy(ap->a_m, &pgs[ridx],
927: orignpages * sizeof(struct vm_page *));
928: }
929: return 0;
930: }
931:
932: /*
933: * generic VM putpages routine.
934: * Write the given range of pages to backing store.
935: */
936:
937: int
938: genfs_putpages(v)
939: void *v;
940: {
941: struct vop_putpages_args /* {
942: struct vnode *a_vp;
943: struct vm_page **a_m;
944: int a_count;
945: int a_flags;
946: int *a_rtvals;
947: } */ *ap = v;
948:
949: int s, error, error2, npages, run;
950: int fs_bshift, dev_bshift, dev_bsize;
951: vaddr_t kva;
952: off_t eof, offset, startoffset;
953: size_t bytes, iobytes, skipbytes;
954: daddr_t lbn, blkno;
955: struct vm_page *pg;
956: struct buf *mbp, *bp;
957: struct vnode *vp = ap->a_vp;
958: boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0;
959: UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
960:
961: simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
962:
963: error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof);
964: if (error) {
965: return error;
966: }
967:
968: error = error2 = 0;
969: npages = ap->a_count;
970: fs_bshift = vp->v_mount->mnt_fs_bshift;
971: dev_bshift = vp->v_mount->mnt_dev_bshift;
972: dev_bsize = 1 << dev_bshift;
973: KASSERT((eof & (dev_bsize - 1)) == 0);
974:
975: pg = ap->a_m[0];
976: startoffset = pg->offset;
1.26 chs 977: bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
1.21 chs 978: skipbytes = 0;
979: KASSERT(bytes != 0);
980:
981: kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK);
982:
983: s = splbio();
984: vp->v_numoutput += 2;
985: mbp = pool_get(&bufpool, PR_WAITOK);
986: UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
987: vp, mbp, vp->v_numoutput, bytes);
988: splx(s);
989: mbp->b_bufsize = npages << PAGE_SHIFT;
990: mbp->b_data = (void *)kva;
991: mbp->b_resid = mbp->b_bcount = bytes;
992: mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
993: (async ? B_CALL : 0) |
994: (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
995: mbp->b_iodone = uvm_aio_biodone;
996: mbp->b_vp = vp;
997: LIST_INIT(&mbp->b_dep);
998:
999: bp = NULL;
1000: for (offset = startoffset;
1001: bytes > 0;
1002: offset += iobytes, bytes -= iobytes) {
1003: lbn = offset >> fs_bshift;
1004: error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
1005: if (error) {
1006: UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
1007: skipbytes += bytes;
1008: bytes = 0;
1009: break;
1010: }
1011:
1.26 chs 1012: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1013: bytes);
1.21 chs 1014: if (blkno == (daddr_t)-1) {
1015: skipbytes += iobytes;
1016: continue;
1017: }
1018:
1019: /* if it's really one i/o, don't make a second buf */
1020: if (offset == startoffset && iobytes == bytes) {
1021: bp = mbp;
1022: } else {
1023: s = splbio();
1024: vp->v_numoutput++;
1025: bp = pool_get(&bufpool, PR_WAITOK);
1026: UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1027: vp, bp, vp->v_numoutput, 0);
1028: splx(s);
1029: bp->b_data = (char *)kva +
1030: (vaddr_t)(offset - pg->offset);
1031: bp->b_resid = bp->b_bcount = iobytes;
1032: bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
1033: bp->b_iodone = uvm_aio_biodone1;
1034: bp->b_vp = vp;
1035: LIST_INIT(&bp->b_dep);
1036: }
1037: bp->b_lblkno = 0;
1038: bp->b_private = mbp;
1039:
1040: /* adjust physical blkno for partial blocks */
1.25 fvdl 1041: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.21 chs 1042: dev_bshift);
1043: UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
1044: vp, offset, bp->b_bcount, bp->b_blkno);
1045: VOP_STRATEGY(bp);
1046: }
1047: if (skipbytes) {
1048: UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0);
1049: s = splbio();
1050: mbp->b_resid -= skipbytes;
1051: if (mbp->b_resid == 0) {
1052: biodone(mbp);
1053: }
1054: splx(s);
1055: }
1056: if (async) {
1057: UVMHIST_LOG(ubchist, "returning PEND", 0,0,0,0);
1058: return EINPROGRESS;
1059: }
1060: if (bp != NULL) {
1061: UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1062: error2 = biowait(mbp);
1063: }
1.22 chs 1064: if (bioops.io_pageiodone) {
1065: (*bioops.io_pageiodone)(mbp);
1.21 chs 1066: }
1067: s = splbio();
1068: vwakeup(mbp);
1069: pool_put(&bufpool, mbp);
1070: splx(s);
1071: uvm_pagermapout(kva, npages);
1072: UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1073: return error ? error : error2;
1074: }
1075:
1076: int
1077: genfs_size(v)
1078: void *v;
1079: {
1080: struct vop_size_args /* {
1081: struct vnode *a_vp;
1082: off_t a_size;
1083: off_t *a_eobp;
1084: } */ *ap = v;
1085: int bsize;
1086:
1087: bsize = 1 << ap->a_vp->v_mount->mnt_fs_bshift;
1.24 chs 1088: *ap->a_eobp = (ap->a_size + bsize - 1) & ~(bsize - 1);
1.21 chs 1089: return 0;
1.1 mycroft 1090: }
CVSweb <webmaster@jp.NetBSD.org>