Annotation of src/sys/miscfs/genfs/genfs_vnops.c, Revision 1.40
1.40 ! lukem 1: /* $NetBSD: genfs_vnops.c,v 1.39 2001/10/03 14:13:08 enami Exp $ */
1.6 fvdl 2:
3: /*
4: * Copyright (c) 1982, 1986, 1989, 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: * 3. All advertising materials mentioning features or use of this software
16: * must display the following acknowledgement:
17: * This product includes software developed by the University of
18: * California, Berkeley and its contributors.
19: * 4. Neither the name of the University nor the names of its contributors
20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: *
35: */
1.40 ! lukem 36:
! 37: #include <sys/cdefs.h>
! 38: __KERNEL_RCSID(0, "$NetBSD$");
1.5 perry 39:
1.8 thorpej 40: #include "opt_nfsserver.h"
41:
1.1 mycroft 42: #include <sys/param.h>
43: #include <sys/systm.h>
1.6 fvdl 44: #include <sys/proc.h>
1.1 mycroft 45: #include <sys/kernel.h>
46: #include <sys/mount.h>
47: #include <sys/namei.h>
48: #include <sys/vnode.h>
1.13 wrstuden 49: #include <sys/fcntl.h>
1.1 mycroft 50: #include <sys/malloc.h>
1.3 mycroft 51: #include <sys/poll.h>
1.37 chs 52: #include <sys/mman.h>
1.1 mycroft 53:
54: #include <miscfs/genfs/genfs.h>
1.37 chs 55: #include <miscfs/genfs/genfs_node.h>
1.6 fvdl 56: #include <miscfs/specfs/specdev.h>
1.1 mycroft 57:
1.21 chs 58: #include <uvm/uvm.h>
59: #include <uvm/uvm_pager.h>
60:
1.8 thorpej 61: #ifdef NFSSERVER
62: #include <nfs/rpcv2.h>
63: #include <nfs/nfsproto.h>
64: #include <nfs/nfs.h>
65: #include <nfs/nqnfs.h>
66: #include <nfs/nfs_var.h>
67: #endif
68:
1.1 mycroft 69: int
1.3 mycroft 70: genfs_poll(v)
1.1 mycroft 71: void *v;
72: {
1.3 mycroft 73: struct vop_poll_args /* {
1.1 mycroft 74: struct vnode *a_vp;
1.3 mycroft 75: int a_events;
1.1 mycroft 76: struct proc *a_p;
77: } */ *ap = v;
78:
1.3 mycroft 79: return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1.1 mycroft 80: }
81:
82: int
83: genfs_fsync(v)
84: void *v;
85: {
86: struct vop_fsync_args /* {
87: struct vnode *a_vp;
88: struct ucred *a_cred;
1.7 kleink 89: int a_flags;
1.20 fvdl 90: off_t offlo;
91: off_t offhi;
1.1 mycroft 92: struct proc *a_p;
93: } */ *ap = v;
1.16 augustss 94: struct vnode *vp = ap->a_vp;
1.11 mycroft 95: int wait;
1.1 mycroft 96:
1.11 mycroft 97: wait = (ap->a_flags & FSYNC_WAIT) != 0;
98: vflushbuf(vp, wait);
99: if ((ap->a_flags & FSYNC_DATAONLY) != 0)
1.7 kleink 100: return (0);
1.11 mycroft 101: else
1.18 mycroft 102: return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0));
1.1 mycroft 103: }
104:
105: int
1.4 kleink 106: genfs_seek(v)
107: void *v;
108: {
109: struct vop_seek_args /* {
110: struct vnode *a_vp;
111: off_t a_oldoff;
112: off_t a_newoff;
113: struct ucred *a_ucred;
114: } */ *ap = v;
115:
116: if (ap->a_newoff < 0)
117: return (EINVAL);
118:
119: return (0);
120: }
121:
122: int
1.1 mycroft 123: genfs_abortop(v)
124: void *v;
125: {
126: struct vop_abortop_args /* {
127: struct vnode *a_dvp;
128: struct componentname *a_cnp;
129: } */ *ap = v;
130:
131: if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
1.19 thorpej 132: PNBUF_PUT(ap->a_cnp->cn_pnbuf);
1.1 mycroft 133: return (0);
1.13 wrstuden 134: }
135:
136: int
137: genfs_fcntl(v)
138: void *v;
139: {
140: struct vop_fcntl_args /* {
141: struct vnode *a_vp;
142: u_int a_command;
143: caddr_t a_data;
144: int a_fflag;
145: struct ucred *a_cred;
146: struct proc *a_p;
147: } */ *ap = v;
148:
149: if (ap->a_command == F_SETFL)
150: return (0);
151: else
152: return (EOPNOTSUPP);
1.1 mycroft 153: }
154:
155: /*ARGSUSED*/
156: int
157: genfs_badop(v)
158: void *v;
159: {
160:
161: panic("genfs: bad op");
162: }
163:
164: /*ARGSUSED*/
165: int
166: genfs_nullop(v)
167: void *v;
168: {
169:
170: return (0);
1.10 kleink 171: }
172:
173: /*ARGSUSED*/
174: int
175: genfs_einval(v)
176: void *v;
177: {
178:
179: return (EINVAL);
1.1 mycroft 180: }
181:
182: /*ARGSUSED*/
183: int
184: genfs_eopnotsupp(v)
185: void *v;
186: {
187:
188: return (EOPNOTSUPP);
189: }
190:
1.12 wrstuden 191: /*
192: * Called when an fs doesn't support a particular vop but the vop needs to
193: * vrele, vput, or vunlock passed in vnodes.
194: */
195: int
196: genfs_eopnotsupp_rele(v)
197: void *v;
198: {
199: struct vop_generic_args /*
200: struct vnodeop_desc *a_desc;
201: / * other random data follows, presumably * /
202: } */ *ap = v;
203: struct vnodeop_desc *desc = ap->a_desc;
204: struct vnode *vp;
205: int flags, i, j, offset;
206:
207: flags = desc->vdesc_flags;
208: for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
209: if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
210: break; /* stop at end of list */
211: if ((j = flags & VDESC_VP0_WILLPUT)) {
212: vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap);
213: switch (j) {
214: case VDESC_VP0_WILLPUT:
215: vput(vp);
216: break;
217: case VDESC_VP0_WILLUNLOCK:
218: VOP_UNLOCK(vp, 0);
219: break;
220: case VDESC_VP0_WILLRELE:
221: vrele(vp);
222: break;
223: }
224: }
225: }
226:
227: return (EOPNOTSUPP);
228: }
229:
1.1 mycroft 230: /*ARGSUSED*/
231: int
232: genfs_ebadf(v)
233: void *v;
234: {
235:
236: return (EBADF);
1.9 matthias 237: }
238:
239: /* ARGSUSED */
240: int
241: genfs_enoioctl(v)
242: void *v;
243: {
244:
245: return (ENOTTY);
1.6 fvdl 246: }
247:
248:
249: /*
1.15 fvdl 250: * Eliminate all activity associated with the requested vnode
1.6 fvdl 251: * and with all vnodes aliased to the requested vnode.
252: */
253: int
254: genfs_revoke(v)
255: void *v;
256: {
257: struct vop_revoke_args /* {
258: struct vnode *a_vp;
259: int a_flags;
260: } */ *ap = v;
261: struct vnode *vp, *vq;
262: struct proc *p = curproc; /* XXX */
263:
264: #ifdef DIAGNOSTIC
265: if ((ap->a_flags & REVOKEALL) == 0)
266: panic("genfs_revoke: not revokeall");
267: #endif
268:
269: vp = ap->a_vp;
270: simple_lock(&vp->v_interlock);
271:
272: if (vp->v_flag & VALIASED) {
273: /*
274: * If a vgone (or vclean) is already in progress,
275: * wait until it is done and return.
276: */
277: if (vp->v_flag & VXLOCK) {
278: vp->v_flag |= VXWANT;
279: simple_unlock(&vp->v_interlock);
280: tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
281: return (0);
282: }
283: /*
284: * Ensure that vp will not be vgone'd while we
285: * are eliminating its aliases.
286: */
287: vp->v_flag |= VXLOCK;
288: simple_unlock(&vp->v_interlock);
289: while (vp->v_flag & VALIASED) {
290: simple_lock(&spechash_slock);
291: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
292: if (vq->v_rdev != vp->v_rdev ||
293: vq->v_type != vp->v_type || vp == vq)
294: continue;
295: simple_unlock(&spechash_slock);
296: vgone(vq);
297: break;
298: }
299: if (vq == NULLVP)
300: simple_unlock(&spechash_slock);
301: }
302: /*
303: * Remove the lock so that vgone below will
304: * really eliminate the vnode after which time
305: * vgone will awaken any sleepers.
306: */
307: simple_lock(&vp->v_interlock);
308: vp->v_flag &= ~VXLOCK;
309: }
310: vgonel(vp, p);
311: return (0);
312: }
313:
314: /*
1.12 wrstuden 315: * Lock the node.
1.6 fvdl 316: */
317: int
1.12 wrstuden 318: genfs_lock(v)
1.6 fvdl 319: void *v;
320: {
321: struct vop_lock_args /* {
322: struct vnode *a_vp;
323: int a_flags;
324: } */ *ap = v;
325: struct vnode *vp = ap->a_vp;
326:
1.12 wrstuden 327: return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock));
1.6 fvdl 328: }
329:
330: /*
1.12 wrstuden 331: * Unlock the node.
1.6 fvdl 332: */
333: int
1.12 wrstuden 334: genfs_unlock(v)
1.6 fvdl 335: void *v;
336: {
337: struct vop_unlock_args /* {
338: struct vnode *a_vp;
339: int a_flags;
340: } */ *ap = v;
341: struct vnode *vp = ap->a_vp;
342:
1.12 wrstuden 343: return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE,
344: &vp->v_interlock));
1.6 fvdl 345: }
346:
347: /*
1.12 wrstuden 348: * Return whether or not the node is locked.
1.6 fvdl 349: */
350: int
1.12 wrstuden 351: genfs_islocked(v)
1.6 fvdl 352: void *v;
353: {
354: struct vop_islocked_args /* {
355: struct vnode *a_vp;
356: } */ *ap = v;
357: struct vnode *vp = ap->a_vp;
358:
1.12 wrstuden 359: return (lockstatus(&vp->v_lock));
360: }
361:
362: /*
363: * Stubs to use when there is no locking to be done on the underlying object.
364: */
365: int
366: genfs_nolock(v)
367: void *v;
368: {
369: struct vop_lock_args /* {
370: struct vnode *a_vp;
371: int a_flags;
372: struct proc *a_p;
373: } */ *ap = v;
374:
375: /*
376: * Since we are not using the lock manager, we must clear
377: * the interlock here.
378: */
379: if (ap->a_flags & LK_INTERLOCK)
380: simple_unlock(&ap->a_vp->v_interlock);
381: return (0);
382: }
383:
384: int
385: genfs_nounlock(v)
386: void *v;
387: {
388: return (0);
389: }
390:
391: int
392: genfs_noislocked(v)
393: void *v;
394: {
395: return (0);
1.8 thorpej 396: }
397:
398: /*
399: * Local lease check for NFS servers. Just set up args and let
400: * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel,
401: * this is a null operation.
402: */
403: int
404: genfs_lease_check(v)
405: void *v;
406: {
407: #ifdef NFSSERVER
408: struct vop_lease_args /* {
409: struct vnode *a_vp;
410: struct proc *a_p;
411: struct ucred *a_cred;
412: int a_flag;
413: } */ *ap = v;
414: u_int32_t duration = 0;
415: int cache;
416: u_quad_t frev;
417:
418: (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
419: NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
420: return (0);
421: #else
422: return (0);
423: #endif /* NFSSERVER */
1.34 chs 424: }
425:
426: int
427: genfs_mmap(v)
428: void *v;
429: {
430: return 0;
1.21 chs 431: }
432:
433: /*
434: * generic VM getpages routine.
435: * Return PG_BUSY pages for the given range,
436: * reading from backing store if necessary.
437: */
438:
439: int
440: genfs_getpages(v)
441: void *v;
442: {
443: struct vop_getpages_args /* {
444: struct vnode *a_vp;
445: voff_t a_offset;
1.33 chs 446: struct vm_page **a_m;
1.21 chs 447: int *a_count;
448: int a_centeridx;
449: vm_prot_t a_access_type;
450: int a_advice;
451: int a_flags;
452: } */ *ap = v;
453:
1.30 chs 454: off_t newsize, diskeof, memeof;
1.26 chs 455: off_t offset, origoffset, startoffset, endoffset, raoffset;
1.21 chs 456: daddr_t lbn, blkno;
457: int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
1.37 chs 458: int fs_bshift, fs_bsize, dev_bshift;
1.21 chs 459: int flags = ap->a_flags;
460: size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
461: vaddr_t kva;
462: struct buf *bp, *mbp;
463: struct vnode *vp = ap->a_vp;
1.36 chs 464: struct vnode *devvp;
1.37 chs 465: struct genfs_node *gp = VTOG(vp);
466: struct uvm_object *uobj = &vp->v_uobj;
467: struct vm_page *pg, *pgs[16]; /* XXXUBC 16 */
1.21 chs 468: struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */
469: boolean_t async = (flags & PGO_SYNCIO) == 0;
470: boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
471: boolean_t sawhole = FALSE;
1.37 chs 472: boolean_t overwrite = (flags & PGO_OVERWRITE) != 0;
1.21 chs 473: UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
474:
1.30 chs 475: UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
476: vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
477:
1.21 chs 478: /* XXXUBC temp limit */
479: if (*ap->a_count > 16) {
1.37 chs 480: panic("genfs_getpages: too many pages");
1.21 chs 481: }
482:
1.26 chs 483: error = 0;
484: origoffset = ap->a_offset;
485: orignpages = *ap->a_count;
1.37 chs 486: GOP_SIZE(vp, vp->v_size, &diskeof);
1.26 chs 487: if (flags & PGO_PASTEOF) {
1.37 chs 488: newsize = MAX(vp->v_size,
1.26 chs 489: origoffset + (orignpages << PAGE_SHIFT));
1.37 chs 490: GOP_SIZE(vp, newsize, &memeof);
1.26 chs 491: } else {
1.30 chs 492: memeof = diskeof;
1.21 chs 493: }
1.30 chs 494: KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
495: KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
496: KASSERT(orignpages > 0);
1.21 chs 497:
498: /*
499: * Bounds-check the request.
500: */
501:
1.30 chs 502: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
1.21 chs 503: if ((flags & PGO_LOCKED) == 0) {
504: simple_unlock(&uobj->vmobjlock);
505: }
506: UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
1.30 chs 507: origoffset, *ap->a_count, memeof,0);
1.21 chs 508: return EINVAL;
509: }
510:
511: /*
512: * For PGO_LOCKED requests, just return whatever's in memory.
513: */
514:
515: if (flags & PGO_LOCKED) {
516: uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
517: UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
518:
519: return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
520: }
521:
522: /* vnode is VOP_LOCKed, uobj is locked */
523:
524: if (write && (vp->v_flag & VONWORKLST) == 0) {
525: vn_syncer_add_to_worklist(vp, filedelay);
526: }
527:
528: /*
529: * find the requested pages and make some simple checks.
530: * leave space in the page array for a whole block.
531: */
532:
1.36 chs 533: if (vp->v_type == VREG) {
534: fs_bshift = vp->v_mount->mnt_fs_bshift;
535: dev_bshift = vp->v_mount->mnt_dev_bshift;
536: } else {
537: fs_bshift = DEV_BSHIFT;
538: dev_bshift = DEV_BSHIFT;
539: }
1.21 chs 540: fs_bsize = 1 << fs_bshift;
541:
1.30 chs 542: orignpages = MIN(orignpages,
543: round_page(memeof - origoffset) >> PAGE_SHIFT);
1.21 chs 544: npages = orignpages;
545: startoffset = origoffset & ~(fs_bsize - 1);
546: endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
547: + fs_bsize - 1) & ~(fs_bsize - 1));
1.30 chs 548: endoffset = MIN(endoffset, round_page(memeof));
1.21 chs 549: ridx = (origoffset - startoffset) >> PAGE_SHIFT;
550:
551: memset(pgs, 0, sizeof(pgs));
552: uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
553:
554: /*
555: * if the pages are already resident, just return them.
556: */
557:
558: for (i = 0; i < npages; i++) {
559: struct vm_page *pg = pgs[ridx + i];
560:
561: if ((pg->flags & PG_FAKE) ||
562: (write && (pg->flags & PG_RDONLY))) {
563: break;
564: }
565: }
566: if (i == npages) {
567: UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
568: raoffset = origoffset + (orignpages << PAGE_SHIFT);
1.26 chs 569: npages += ridx;
1.21 chs 570: goto raout;
571: }
572:
573: /*
1.37 chs 574: * if PGO_OVERWRITE is set, don't bother reading the pages.
575: */
576:
577: if (flags & PGO_OVERWRITE) {
578: UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
579:
580: for (i = 0; i < npages; i++) {
581: struct vm_page *pg = pgs[ridx + i];
582:
583: pg->flags &= ~(PG_RDONLY|PG_CLEAN);
584: }
585: npages += ridx;
586: goto out;
587: }
588:
589: /*
1.21 chs 590: * the page wasn't resident and we're not overwriting,
591: * so we're going to have to do some i/o.
592: * find any additional pages needed to cover the expanded range.
593: */
594:
1.35 chs 595: npages = (endoffset - startoffset) >> PAGE_SHIFT;
596: if (startoffset != origoffset || npages != orignpages) {
1.21 chs 597:
598: /*
1.37 chs 599: * we need to avoid deadlocks caused by locking
1.21 chs 600: * additional pages at lower offsets than pages we
1.37 chs 601: * already have locked. unlock them all and start over.
1.21 chs 602: */
603:
1.35 chs 604: for (i = 0; i < orignpages; i++) {
1.21 chs 605: struct vm_page *pg = pgs[ridx + i];
606:
607: if (pg->flags & PG_FAKE) {
608: pg->flags |= PG_RELEASED;
609: }
610: }
1.35 chs 611: uvm_page_unbusy(&pgs[ridx], orignpages);
1.21 chs 612: memset(pgs, 0, sizeof(pgs));
613:
614: UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
615: startoffset, endoffset, 0,0);
616: npgs = npages;
617: uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
618: }
619: simple_unlock(&uobj->vmobjlock);
620:
621: /*
622: * read the desired page(s).
623: */
624:
625: totalbytes = npages << PAGE_SHIFT;
1.30 chs 626: bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
1.21 chs 627: tailbytes = totalbytes - bytes;
628: skipbytes = 0;
629:
630: kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
631: UVMPAGER_MAPIN_READ);
632:
633: s = splbio();
634: mbp = pool_get(&bufpool, PR_WAITOK);
635: splx(s);
636: mbp->b_bufsize = totalbytes;
637: mbp->b_data = (void *)kva;
638: mbp->b_resid = mbp->b_bcount = bytes;
639: mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
1.37 chs 640: mbp->b_iodone = (async ? uvm_aio_biodone : 0);
1.21 chs 641: mbp->b_vp = vp;
642: LIST_INIT(&mbp->b_dep);
643:
644: /*
1.31 chs 645: * if EOF is in the middle of the range, zero the part past EOF.
1.38 chs 646: * if the page including EOF is not PG_FAKE, skip over it since
647: * in that case it has valid data that we need to preserve.
1.21 chs 648: */
649:
1.31 chs 650: if (tailbytes > 0) {
1.38 chs 651: size_t tailstart = bytes;
652:
653: if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
654: tailstart = round_page(tailstart);
655: tailbytes -= tailstart - bytes;
656: }
1.37 chs 657: UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
1.38 chs 658: kva, tailstart, tailbytes,0);
659: memset((void *)(kva + tailstart), 0, tailbytes);
1.21 chs 660: }
661:
662: /*
663: * now loop over the pages, reading as needed.
664: */
665:
666: if (write) {
1.37 chs 667: lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
1.21 chs 668: } else {
1.37 chs 669: lockmgr(&gp->g_glock, LK_SHARED, NULL);
1.21 chs 670: }
671:
672: bp = NULL;
673: for (offset = startoffset;
674: bytes > 0;
675: offset += iobytes, bytes -= iobytes) {
676:
677: /*
678: * skip pages which don't need to be read.
679: */
680:
681: pidx = (offset - startoffset) >> PAGE_SHIFT;
1.35 chs 682: while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) {
1.21 chs 683: size_t b;
684:
1.24 chs 685: KASSERT((offset & (PAGE_SIZE - 1)) == 0);
1.26 chs 686: b = MIN(PAGE_SIZE, bytes);
1.21 chs 687: offset += b;
688: bytes -= b;
689: skipbytes += b;
690: pidx++;
691: UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
692: offset, 0,0,0);
693: if (bytes == 0) {
694: goto loopdone;
695: }
696: }
697:
698: /*
699: * bmap the file to find out the blkno to read from and
700: * how much we can read in one i/o. if bmap returns an error,
701: * skip the rest of the top-level i/o.
702: */
703:
704: lbn = offset >> fs_bshift;
1.36 chs 705: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21 chs 706: if (error) {
707: UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
708: lbn, error,0,0);
709: skipbytes += bytes;
710: goto loopdone;
711: }
712:
713: /*
714: * see how many pages can be read with this i/o.
715: * reduce the i/o size if necessary to avoid
716: * overwriting pages with valid data.
717: */
718:
1.26 chs 719: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
720: bytes);
1.21 chs 721: if (offset + iobytes > round_page(offset)) {
722: pcount = 1;
723: while (pidx + pcount < npages &&
724: pgs[pidx + pcount]->flags & PG_FAKE) {
725: pcount++;
726: }
1.26 chs 727: iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
1.21 chs 728: (offset - trunc_page(offset)));
729: }
730:
731: /*
732: * if this block isn't allocated, zero it instead of reading it.
733: * if this is a read access, mark the pages we zeroed PG_RDONLY.
734: */
735:
736: if (blkno < 0) {
1.35 chs 737: int holepages = (round_page(offset + iobytes) -
738: trunc_page(offset)) >> PAGE_SHIFT;
1.21 chs 739: UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
740:
741: sawhole = TRUE;
742: memset((char *)kva + (offset - startoffset), 0,
743: iobytes);
744: skipbytes += iobytes;
745:
1.35 chs 746: for (i = 0; i < holepages; i++) {
747: if (write) {
748: pgs[pidx + i]->flags &= ~PG_CLEAN;
749: } else {
1.21 chs 750: pgs[pidx + i]->flags |= PG_RDONLY;
751: }
752: }
753: continue;
754: }
755:
756: /*
757: * allocate a sub-buf for this piece of the i/o
758: * (or just use mbp if there's only 1 piece),
759: * and start it going.
760: */
761:
762: if (offset == startoffset && iobytes == bytes) {
763: bp = mbp;
764: } else {
765: s = splbio();
766: bp = pool_get(&bufpool, PR_WAITOK);
767: splx(s);
768: bp->b_data = (char *)kva + offset - startoffset;
769: bp->b_resid = bp->b_bcount = iobytes;
770: bp->b_flags = B_BUSY|B_READ|B_CALL;
771: bp->b_iodone = uvm_aio_biodone1;
772: bp->b_vp = vp;
1.37 chs 773: bp->b_proc = NULL;
1.21 chs 774: LIST_INIT(&bp->b_dep);
775: }
776: bp->b_lblkno = 0;
777: bp->b_private = mbp;
1.37 chs 778: if (devvp->v_type == VBLK) {
779: bp->b_dev = devvp->v_rdev;
780: }
1.21 chs 781:
782: /* adjust physical blkno for partial blocks */
1.25 fvdl 783: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.21 chs 784: dev_bshift);
785:
786: UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
787: bp, offset, iobytes, bp->b_blkno);
788:
789: VOP_STRATEGY(bp);
790: }
791:
792: loopdone:
793: if (skipbytes) {
794: s = splbio();
795: if (error) {
796: mbp->b_flags |= B_ERROR;
797: mbp->b_error = error;
798: }
799: mbp->b_resid -= skipbytes;
800: if (mbp->b_resid == 0) {
801: biodone(mbp);
802: }
803: splx(s);
804: }
805:
806: if (async) {
1.32 chs 807: UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
1.37 chs 808: lockmgr(&gp->g_glock, LK_RELEASE, NULL);
1.32 chs 809: return 0;
1.21 chs 810: }
811: if (bp != NULL) {
812: error = biowait(mbp);
813: }
814: s = splbio();
815: pool_put(&bufpool, mbp);
816: splx(s);
817: uvm_pagermapout(kva, npages);
1.24 chs 818: raoffset = startoffset + totalbytes;
1.21 chs 819:
820: /*
821: * if this we encountered a hole then we have to do a little more work.
822: * for read faults, we marked the page PG_RDONLY so that future
823: * write accesses to the page will fault again.
824: * for write faults, we must make sure that the backing store for
825: * the page is completely allocated while the pages are locked.
826: */
827:
1.37 chs 828: if (!error && sawhole && write) {
829: for (i = 0; i < npages; i++) {
830: if (pgs[i] == NULL) {
831: continue;
832: }
833: pgs[i]->flags &= ~PG_CLEAN;
834: UVMHIST_LOG(ubchist, "mark dirty pg %p", pgs[i],0,0,0);
1.21 chs 835: }
1.37 chs 836: error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
837: cred);
838: UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
839: startoffset, npages << PAGE_SHIFT, error,0);
1.21 chs 840: }
1.37 chs 841: lockmgr(&gp->g_glock, LK_RELEASE, NULL);
1.21 chs 842: simple_lock(&uobj->vmobjlock);
843:
844: /*
845: * see if we want to start any readahead.
846: * XXXUBC for now, just read the next 128k on 64k boundaries.
847: * this is pretty nonsensical, but it is 50% faster than reading
848: * just the next 64k.
849: */
850:
851: raout:
1.24 chs 852: if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
1.21 chs 853: PAGE_SHIFT <= 16) {
854: int racount;
855:
856: racount = 1 << (16 - PAGE_SHIFT);
857: (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
858: VM_PROT_READ, 0, 0);
859: simple_lock(&uobj->vmobjlock);
860:
861: racount = 1 << (16 - PAGE_SHIFT);
862: (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0,
863: VM_PROT_READ, 0, 0);
864: simple_lock(&uobj->vmobjlock);
865: }
866:
867: /*
868: * we're almost done! release the pages...
869: * for errors, we free the pages.
870: * otherwise we activate them and mark them as valid and clean.
871: * also, unbusy pages that were not actually requested.
872: */
873:
874: if (error) {
875: for (i = 0; i < npages; i++) {
876: if (pgs[i] == NULL) {
877: continue;
878: }
879: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
880: pgs[i], pgs[i]->flags, 0,0);
1.26 chs 881: if (pgs[i]->flags & PG_FAKE) {
1.37 chs 882: pgs[i]->flags |= PG_RELEASED;
1.21 chs 883: }
884: }
1.37 chs 885: uvm_lock_pageq();
886: uvm_page_unbusy(pgs, npages);
1.21 chs 887: uvm_unlock_pageq();
888: simple_unlock(&uobj->vmobjlock);
889: UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
890: return error;
891: }
892:
1.37 chs 893: out:
1.21 chs 894: UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
1.26 chs 895: uvm_lock_pageq();
1.21 chs 896: for (i = 0; i < npages; i++) {
1.37 chs 897: pg = pgs[i];
898: if (pg == NULL) {
1.21 chs 899: continue;
900: }
901: UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1.37 chs 902: pg, pg->flags, 0,0);
903: if (pg->flags & PG_FAKE && !overwrite) {
904: pg->flags &= ~(PG_FAKE);
1.21 chs 905: pmap_clear_modify(pgs[i]);
906: }
907: if (write) {
1.37 chs 908: pg->flags &= ~(PG_RDONLY);
1.21 chs 909: }
910: if (i < ridx || i >= ridx + orignpages || async) {
911: UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
1.37 chs 912: pg, pg->offset,0,0);
913: if (pg->flags & PG_WANTED) {
914: wakeup(pg);
915: }
916: if (pg->flags & PG_FAKE) {
917: KASSERT(overwrite);
918: uvm_pagezero(pg);
919: }
920: if (pg->flags & PG_RELEASED) {
921: uvm_pagefree(pg);
1.26 chs 922: continue;
1.21 chs 923: }
1.37 chs 924: uvm_pageactivate(pg);
925: pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
926: UVM_PAGE_OWN(pg, NULL);
1.21 chs 927: }
928: }
1.26 chs 929: uvm_unlock_pageq();
1.21 chs 930: simple_unlock(&uobj->vmobjlock);
931: if (ap->a_m != NULL) {
932: memcpy(ap->a_m, &pgs[ridx],
933: orignpages * sizeof(struct vm_page *));
934: }
935: return 0;
936: }
937:
938: /*
939: * generic VM putpages routine.
940: * Write the given range of pages to backing store.
1.37 chs 941: *
942: * => "offhi == 0" means flush all pages at or after "offlo".
943: * => object should be locked by caller. we may _unlock_ the object
944: * if (and only if) we need to clean a page (PGO_CLEANIT), or
945: * if PGO_SYNCIO is set and there are pages busy.
946: * we return with the object locked.
947: * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
948: * thus, a caller might want to unlock higher level resources
949: * (e.g. vm_map) before calling flush.
950: * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
951: * unlock the object nor block.
952: * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
953: * => NOTE: we rely on the fact that the object's memq is a TAILQ and
954: * that new pages are inserted on the tail end of the list. thus,
955: * we can make a complete pass through the object in one go by starting
956: * at the head and working towards the tail (new pages are put in
957: * front of us).
958: * => NOTE: we are allowed to lock the page queues, so the caller
959: * must not be holding the page queue lock.
960: *
961: * note on "cleaning" object and PG_BUSY pages:
962: * this routine is holding the lock on the object. the only time
963: * that it can run into a PG_BUSY page that it does not own is if
964: * some other process has started I/O on the page (e.g. either
965: * a pagein, or a pageout). if the PG_BUSY page is being paged
966: * in, then it can not be dirty (!PG_CLEAN) because no one has
967: * had a chance to modify it yet. if the PG_BUSY page is being
968: * paged out then it means that someone else has already started
969: * cleaning the page for us (how nice!). in this case, if we
970: * have syncio specified, then after we make our pass through the
971: * object we need to wait for the other PG_BUSY pages to clear
972: * off (i.e. we need to do an iosync). also note that once a
973: * page is PG_BUSY it must stay in its object until it is un-busyed.
974: *
975: * note on page traversal:
976: * we can traverse the pages in an object either by going down the
977: * linked list in "uobj->memq", or we can go over the address range
978: * by page doing hash table lookups for each address. depending
979: * on how many pages are in the object it may be cheaper to do one
980: * or the other. we set "by_list" to true if we are using memq.
981: * if the cost of a hash lookup was equal to the cost of the list
982: * traversal we could compare the number of pages in the start->stop
983: * range to the total number of pages in the object. however, it
984: * seems that a hash table lookup is more expensive than the linked
985: * list traversal, so we multiply the number of pages in the
986: * range by an estimate of the relatively higher cost of the hash lookup.
1.21 chs 987: */
988:
989: int
990: genfs_putpages(v)
991: void *v;
992: {
993: struct vop_putpages_args /* {
994: struct vnode *a_vp;
1.37 chs 995: voff_t a_offlo;
996: voff_t a_offhi;
1.21 chs 997: int a_flags;
998: } */ *ap = v;
1.37 chs 999: struct vnode *vp = ap->a_vp;
1000: struct uvm_object *uobj = &vp->v_uobj;
1001: off_t startoff = ap->a_offlo;
1002: off_t endoff = ap->a_offhi;
1003: off_t off;
1004: int flags = ap->a_flags;
1005: int n = MAXBSIZE >> PAGE_SHIFT;
1006: int i, s, error, npages, nback;
1007: int freeflag;
1008: struct vm_page *pgs[n], *pg, *nextpg, *tpg, curmp, endmp;
1009: boolean_t wasclean, by_list, needs_clean;
1010: boolean_t async = (flags & PGO_SYNCIO) == 0;
1011: UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
1012:
1013: KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1014: KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
1015: KASSERT(startoff < endoff || endoff == 0);
1016:
1017: UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1018: vp, uobj->uo_npages, startoff, endoff - startoff);
1019: if (uobj->uo_npages == 0) {
1020: if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1021: (vp->v_flag & VONWORKLST)) {
1022: vp->v_flag &= ~VONWORKLST;
1023: LIST_REMOVE(vp, v_synclist);
1024: }
1025: simple_unlock(&uobj->vmobjlock);
1026: return 0;
1027: }
1028:
1029: /*
1030: * the vnode has pages, set up to process the request.
1031: */
1032:
1033: error = 0;
1034: wasclean = TRUE;
1035: off = startoff;
1036: if (endoff == 0 || flags & PGO_ALLPAGES) {
1037: endoff = trunc_page(LLONG_MAX);
1038: }
1039: by_list = (uobj->uo_npages <=
1040: ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
1041:
1042: /*
1043: * start the loop. when scanning by list, hold the last page
1044: * in the list before we start. pages allocated after we start
1045: * will be added to the end of the list, so we can stop at the
1046: * current last page.
1047: */
1048:
1049: freeflag = (curproc == uvm.pagedaemon_proc) ? PG_PAGEOUT : PG_RELEASED;
1050: curmp.uobject = uobj;
1051: curmp.offset = (voff_t)-1;
1052: curmp.flags = PG_BUSY;
1053: endmp.uobject = uobj;
1054: endmp.offset = (voff_t)-1;
1055: endmp.flags = PG_BUSY;
1056: if (by_list) {
1057: pg = TAILQ_FIRST(&uobj->memq);
1058: TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
1059: PHOLD(curproc);
1060: } else {
1061: pg = uvm_pagelookup(uobj, off);
1062: }
1063: nextpg = NULL;
1064: while (by_list || off < endoff) {
1065:
1066: /*
1067: * if the current page is not interesting, move on to the next.
1068: */
1069:
1070: KASSERT(pg == NULL || pg->uobject == uobj);
1071: KASSERT(pg == NULL ||
1072: (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1073: (pg->flags & PG_BUSY) != 0);
1074: if (by_list) {
1075: if (pg == &endmp) {
1076: break;
1077: }
1078: if (pg->offset < startoff || pg->offset >= endoff ||
1079: pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1080: pg = TAILQ_NEXT(pg, listq);
1081: continue;
1082: }
1083: off = pg->offset;
1084: } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1085: off += PAGE_SIZE;
1086: if (off < endoff) {
1087: pg = uvm_pagelookup(uobj, off);
1088: }
1089: continue;
1090: }
1.21 chs 1091:
1.37 chs 1092: /*
1093: * if the current page needs to be cleaned and it's busy,
1094: * wait for it to become unbusy.
1095: */
1096:
1097: if (flags & PGO_FREE) {
1098: pmap_page_protect(pg, VM_PROT_NONE);
1099: }
1100: if (flags & PGO_CLEANIT) {
1101: needs_clean = pmap_clear_modify(pg) ||
1102: (pg->flags & PG_CLEAN) == 0;
1103: pg->flags |= PG_CLEAN;
1104: } else {
1105: needs_clean = FALSE;
1106: }
1107: if (needs_clean && pg->flags & PG_BUSY) {
1108: KASSERT(curproc != uvm.pagedaemon_proc);
1109: UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
1110: if (by_list) {
1111: TAILQ_INSERT_BEFORE(pg, &curmp, listq);
1112: UVMHIST_LOG(ubchist, "curmp next %p",
1113: TAILQ_NEXT(&curmp, listq), 0,0,0);
1114: }
1115: pg->flags |= PG_WANTED;
1116: pg->flags &= ~PG_CLEAN;
1117: UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
1118: "genput", 0);
1119: simple_lock(&uobj->vmobjlock);
1120: if (by_list) {
1121: UVMHIST_LOG(ubchist, "after next %p",
1122: TAILQ_NEXT(&curmp, listq), 0,0,0);
1123: pg = TAILQ_NEXT(&curmp, listq);
1124: TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1125: } else {
1126: pg = uvm_pagelookup(uobj, off);
1127: }
1128: continue;
1129: }
1130:
1131: /*
1132: * if we're cleaning, build a cluster.
1133: * the cluster will consist of pages which are currently dirty,
1134: * but they will be returned to us marked clean.
1135: * if not cleaning, just operate on the one page.
1136: */
1137:
1138: if (needs_clean) {
1139: wasclean = FALSE;
1140: memset(pgs, 0, sizeof(pgs));
1141: pg->flags |= PG_BUSY;
1142: UVM_PAGE_OWN(pg, "genfs_putpages");
1143:
1144: /*
1145: * first look backward.
1146: */
1147:
1148: npages = MIN(n >> 1, off >> PAGE_SHIFT);
1149: nback = npages;
1150: uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
1151: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
1152: if (nback) {
1153: memmove(&pgs[0], &pgs[npages - nback],
1154: nback * sizeof(pgs[0]));
1155: }
1156: n -= nback;
1157:
1158: /*
1159: * then plug in our page of interest.
1160: */
1161:
1162: pgs[nback] = pg;
1163:
1164: /*
1165: * then look forward to fill in the remaining space in
1166: * the array of pages.
1167: */
1168:
1169: npages = MIN(n, (endoff - off) >> PAGE_SHIFT) - 1;
1170: uvn_findpages(uobj, off + PAGE_SIZE, &npages,
1171: &pgs[nback + 1],
1172: UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
1173: npages += nback + 1;
1174: } else {
1175: pgs[0] = pg;
1176: npages = 1;
1177: }
1178:
1179: /*
1180: * apply FREE or DEACTIVATE options if requested.
1181: */
1182:
1183: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1184: uvm_lock_pageq();
1185: }
1186: for (i = 0; i < npages; i++) {
1187: tpg = pgs[i];
1188: KASSERT(tpg->uobject == uobj);
1189: if (flags & PGO_DEACTIVATE &&
1190: (tpg->pqflags & PQ_INACTIVE) == 0 &&
1191: tpg->wire_count == 0) {
1192: (void) pmap_clear_reference(tpg);
1193: uvm_pagedeactivate(tpg);
1194: } else if (flags & PGO_FREE) {
1195: pmap_page_protect(tpg, VM_PROT_NONE);
1196: if (tpg->flags & PG_BUSY) {
1197: tpg->flags |= freeflag;
1198: if (freeflag == PG_PAGEOUT) {
1199: uvmexp.paging++;
1200: uvm_pagedequeue(tpg);
1201: }
1202: } else {
1203: nextpg = TAILQ_NEXT(tpg, listq);
1204: uvm_pagefree(tpg);
1205: }
1206: }
1207: }
1208: if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1209: uvm_unlock_pageq();
1210: }
1211: if (needs_clean) {
1212:
1213: /*
1214: * start the i/o. if we're traversing by list,
1215: * keep our place in the list with a marker page.
1216: */
1217:
1218: if (by_list) {
1219: TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1220: listq);
1221: }
1222: simple_unlock(&uobj->vmobjlock);
1223: error = GOP_WRITE(vp, pgs, npages, flags);
1224: simple_lock(&uobj->vmobjlock);
1225: if (by_list) {
1226: pg = TAILQ_NEXT(&curmp, listq);
1227: TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1228: }
1229: if (error == ENOMEM) {
1230: for (i = 0; i < npages; i++) {
1231: tpg = pgs[i];
1232: if (tpg->flags & PG_PAGEOUT) {
1233: tpg->flags &= ~PG_PAGEOUT;
1234: uvmexp.paging--;
1235: }
1236: tpg->flags &= ~PG_CLEAN;
1237: uvm_pageactivate(tpg);
1238: }
1239: uvm_page_unbusy(pgs, npages);
1240: }
1241: if (error) {
1242: break;
1243: }
1244: if (by_list) {
1245: continue;
1246: }
1247: }
1248:
1249: /*
1250: * find the next page and continue if there was no error.
1251: */
1252:
1253: if (by_list) {
1254: if (nextpg) {
1255: pg = nextpg;
1256: nextpg = NULL;
1257: } else {
1258: pg = TAILQ_NEXT(pg, listq);
1259: }
1260: } else {
1261: off += PAGE_SIZE;
1262: if (off < endoff) {
1263: pg = uvm_pagelookup(uobj, off);
1264: }
1265: }
1266: }
1267: if (by_list) {
1268: TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1269: PRELE(curproc);
1270: }
1271:
1272: /*
1273: * if we're cleaning and there was nothing to clean,
1274: * take us off the syncer list. if we started any i/o
1275: * and we're doing sync i/o, wait for all writes to finish.
1276: */
1277:
1278: if ((flags & PGO_CLEANIT) && wasclean &&
1279: startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
1280: LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
1281: (vp->v_flag & VONWORKLST)) {
1282: vp->v_flag &= ~VONWORKLST;
1283: LIST_REMOVE(vp, v_synclist);
1284: }
1285: if (!wasclean && !async) {
1286: s = splbio();
1287: while (vp->v_numoutput != 0) {
1288: vp->v_flag |= VBWAIT;
1289: UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, &uobj->vmobjlock,
1290: FALSE, "genput2",0);
1291: simple_lock(&uobj->vmobjlock);
1292: }
1293: splx(s);
1294: }
1295: simple_unlock(&uobj->vmobjlock);
1296: return error;
1297: }
1298:
1299: int
1300: genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1301: {
1302: int s, error, run;
1303: int fs_bshift, dev_bshift;
1.21 chs 1304: vaddr_t kva;
1305: off_t eof, offset, startoffset;
1306: size_t bytes, iobytes, skipbytes;
1307: daddr_t lbn, blkno;
1308: struct vm_page *pg;
1309: struct buf *mbp, *bp;
1.36 chs 1310: struct vnode *devvp;
1.37 chs 1311: boolean_t async = (flags & PGO_SYNCIO) == 0;
1.39 enami 1312: UVMHIST_FUNC("genfs_gop_write"); UVMHIST_CALLED(ubchist);
1.21 chs 1313:
1.37 chs 1314: UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1315: vp, pgs, npages, flags);
1.21 chs 1316:
1.37 chs 1317: GOP_SIZE(vp, vp->v_size, &eof);
1.36 chs 1318: if (vp->v_type == VREG) {
1319: fs_bshift = vp->v_mount->mnt_fs_bshift;
1320: dev_bshift = vp->v_mount->mnt_dev_bshift;
1321: } else {
1322: fs_bshift = DEV_BSHIFT;
1323: dev_bshift = DEV_BSHIFT;
1324: }
1.37 chs 1325: error = 0;
1326: pg = pgs[0];
1.21 chs 1327: startoffset = pg->offset;
1.26 chs 1328: bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
1.21 chs 1329: skipbytes = 0;
1330: KASSERT(bytes != 0);
1331:
1.37 chs 1332: kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE |
1333: UVMPAGER_MAPIN_WAITOK);
1.21 chs 1334:
1335: s = splbio();
1336: vp->v_numoutput += 2;
1337: mbp = pool_get(&bufpool, PR_WAITOK);
1338: UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1339: vp, mbp, vp->v_numoutput, bytes);
1340: splx(s);
1341: mbp->b_bufsize = npages << PAGE_SHIFT;
1342: mbp->b_data = (void *)kva;
1343: mbp->b_resid = mbp->b_bcount = bytes;
1.37 chs 1344: mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? B_CALL : 0);
1.21 chs 1345: mbp->b_iodone = uvm_aio_biodone;
1346: mbp->b_vp = vp;
1347: LIST_INIT(&mbp->b_dep);
1348:
1349: bp = NULL;
1350: for (offset = startoffset;
1351: bytes > 0;
1352: offset += iobytes, bytes -= iobytes) {
1353: lbn = offset >> fs_bshift;
1.36 chs 1354: error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1.21 chs 1355: if (error) {
1356: UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
1357: skipbytes += bytes;
1358: bytes = 0;
1359: break;
1360: }
1361:
1.26 chs 1362: iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1363: bytes);
1.21 chs 1364: if (blkno == (daddr_t)-1) {
1365: skipbytes += iobytes;
1366: continue;
1367: }
1368:
1369: /* if it's really one i/o, don't make a second buf */
1370: if (offset == startoffset && iobytes == bytes) {
1371: bp = mbp;
1372: } else {
1373: s = splbio();
1374: vp->v_numoutput++;
1375: bp = pool_get(&bufpool, PR_WAITOK);
1376: UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1377: vp, bp, vp->v_numoutput, 0);
1378: splx(s);
1379: bp->b_data = (char *)kva +
1380: (vaddr_t)(offset - pg->offset);
1381: bp->b_resid = bp->b_bcount = iobytes;
1.37 chs 1382: bp->b_flags = B_BUSY|B_WRITE|B_CALL;
1.21 chs 1383: bp->b_iodone = uvm_aio_biodone1;
1384: bp->b_vp = vp;
1385: LIST_INIT(&bp->b_dep);
1386: }
1387: bp->b_lblkno = 0;
1388: bp->b_private = mbp;
1.37 chs 1389: if (devvp->v_type == VBLK) {
1390: bp->b_dev = devvp->v_rdev;
1391: }
1.21 chs 1392:
1393: /* adjust physical blkno for partial blocks */
1.25 fvdl 1394: bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1.21 chs 1395: dev_bshift);
1396: UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
1397: vp, offset, bp->b_bcount, bp->b_blkno);
1398: VOP_STRATEGY(bp);
1399: }
1400: if (skipbytes) {
1.29 chs 1401: UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1.21 chs 1402: s = splbio();
1.29 chs 1403: if (error) {
1404: mbp->b_flags |= B_ERROR;
1405: mbp->b_error = error;
1406: }
1.37 chs 1407: mbp->b_resid -= skipbytes;
1.21 chs 1408: if (mbp->b_resid == 0) {
1409: biodone(mbp);
1410: }
1411: splx(s);
1412: }
1413: if (async) {
1.32 chs 1414: UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1415: return 0;
1.21 chs 1416: }
1.37 chs 1417: UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1418: error = biowait(mbp);
1419: uvm_aio_aiodone(mbp);
1.21 chs 1420: UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1.29 chs 1421: return error;
1.21 chs 1422: }
1423:
1.37 chs 1424: void
1425: genfs_node_init(struct vnode *vp, struct genfs_ops *ops)
1426: {
1427: struct genfs_node *gp = VTOG(vp);
1428:
1429: lockinit(&gp->g_glock, PINOD, "glock", 0, 0);
1430: gp->g_op = ops;
1431: }
1432:
1433: void
1434: genfs_size(struct vnode *vp, off_t size, off_t *eobp)
1.21 chs 1435: {
1436: int bsize;
1437:
1.37 chs 1438: bsize = 1 << vp->v_mount->mnt_fs_bshift;
1439: *eobp = (size + bsize - 1) & ~(bsize - 1);
1.1 mycroft 1440: }
CVSweb <webmaster@jp.NetBSD.org>