Annotation of src/sys/ufs/ufs/ufs_readwrite.c, Revision 1.54.2.7
1.54.2.7! skrll 1: /* $NetBSD: ufs_readwrite.c,v 1.54.2.6 2004/10/30 09:30:30 skrll Exp $ */
1.3 cgd 2:
1.1 mycroft 3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
1.54.2.2 skrll 15: * 3. Neither the name of the University nor the names of its contributors
1.1 mycroft 16: * may be used to endorse or promote products derived from this software
17: * without specific prior written permission.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29: * SUCH DAMAGE.
30: *
1.16 fvdl 31: * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
1.1 mycroft 32: */
1.37 lukem 33:
34: #include <sys/cdefs.h>
1.54.2.7! skrll 35: __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.54.2.6 2004/10/30 09:30:30 skrll Exp $");
1.15 mrg 36:
1.1 mycroft 37: #ifdef LFS_READWRITE
1.16 fvdl 38: #define BLKSIZE(a, b, c) blksize(a, b, c)
1.1 mycroft 39: #define FS struct lfs
40: #define I_FS i_lfs
41: #define READ lfs_read
42: #define READ_S "lfs_read"
43: #define WRITE lfs_write
44: #define WRITE_S "lfs_write"
45: #define fs_bsize lfs_bsize
1.54.2.3 skrll 46: #define fs_bmask lfs_bmask
1.1 mycroft 47: #else
48: #define BLKSIZE(a, b, c) blksize(a, b, c)
49: #define FS struct fs
50: #define I_FS i_fs
51: #define READ ffs_read
52: #define READ_S "ffs_read"
53: #define WRITE ffs_write
54: #define WRITE_S "ffs_write"
55: #endif
56:
57: /*
58: * Vnode op for reading.
59: */
60: /* ARGSUSED */
1.8 christos 61: int
1.29 lukem 62: READ(void *v)
1.8 christos 63: {
1.1 mycroft 64: struct vop_read_args /* {
65: struct vnode *a_vp;
66: struct uio *a_uio;
67: int a_ioflag;
68: struct ucred *a_cred;
1.8 christos 69: } */ *ap = v;
1.24 augustss 70: struct vnode *vp;
71: struct inode *ip;
72: struct uio *uio;
1.54.2.7! skrll 73: struct ufsmount *ump;
! 74: struct buf *bp;
1.24 augustss 75: FS *fs;
1.28 chs 76: void *win;
77: vsize_t bytelen;
1.47 fvdl 78: daddr_t lbn, nextlbn;
1.1 mycroft 79: off_t bytesinfile;
80: long size, xfersize, blkoffset;
1.54.2.7! skrll 81: int error, flags;
1.34 chs 82: boolean_t usepc = FALSE;
1.1 mycroft 83:
84: vp = ap->a_vp;
85: ip = VTOI(vp);
1.54.2.3 skrll 86: ump = ip->i_ump;
1.1 mycroft 87: uio = ap->a_uio;
1.28 chs 88: error = 0;
1.1 mycroft 89:
90: #ifdef DIAGNOSTIC
91: if (uio->uio_rw != UIO_READ)
92: panic("%s: mode", READ_S);
93:
94: if (vp->v_type == VLNK) {
1.54.2.3 skrll 95: if (ip->i_size < ump->um_maxsymlinklen ||
96: (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0))
1.1 mycroft 97: panic("%s: short symlink", READ_S);
98: } else if (vp->v_type != VREG && vp->v_type != VDIR)
99: panic("%s: type %d", READ_S, vp->v_type);
100: #endif
101: fs = ip->I_FS;
1.54.2.3 skrll 102: if ((u_int64_t)uio->uio_offset > ump->um_maxfilesize)
1.1 mycroft 103: return (EFBIG);
1.11 kleink 104: if (uio->uio_resid == 0)
105: return (0);
1.54.2.3 skrll 106: if (uio->uio_offset >= ip->i_size)
1.28 chs 107: goto out;
108:
1.48 perseant 109: #ifdef LFS_READWRITE
110: usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM);
111: #else /* !LFS_READWRITE */
1.34 chs 112: usepc = vp->v_type == VREG;
1.48 perseant 113: #endif /* !LFS_READWRITE */
1.34 chs 114: if (usepc) {
1.28 chs 115: while (uio->uio_resid > 0) {
1.51 fvdl 116: bytelen = MIN(ip->i_size - uio->uio_offset,
1.28 chs 117: uio->uio_resid);
118: if (bytelen == 0)
119: break;
120:
1.33 chs 121: win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
1.54.2.3 skrll 122: &bytelen, UBC_READ);
1.28 chs 123: error = uiomove(win, bytelen, uio);
1.54.2.7! skrll 124: flags = UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0;
! 125: ubc_release(win, flags);
1.28 chs 126: if (error)
127: break;
128: }
129: goto out;
130: }
1.1 mycroft 131:
132: for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
1.51 fvdl 133: bytesinfile = ip->i_size - uio->uio_offset;
1.28 chs 134: if (bytesinfile <= 0)
1.1 mycroft 135: break;
136: lbn = lblkno(fs, uio->uio_offset);
137: nextlbn = lbn + 1;
138: size = BLKSIZE(fs, ip, lbn);
139: blkoffset = blkoff(fs, uio->uio_offset);
1.30 chs 140: xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
1.28 chs 141: bytesinfile);
1.1 mycroft 142:
1.51 fvdl 143: if (lblktosize(fs, nextlbn) >= ip->i_size)
1.1 mycroft 144: error = bread(vp, lbn, size, NOCRED, &bp);
1.33 chs 145: else {
1.1 mycroft 146: int nextsize = BLKSIZE(fs, ip, nextlbn);
147: error = breadn(vp, lbn,
148: size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1.33 chs 149: }
1.1 mycroft 150: if (error)
151: break;
152:
153: /*
154: * We should only get non-zero b_resid when an I/O error
155: * has occurred, which should cause us to break above.
156: * However, if the short read did not cause an error,
157: * then we want to ensure that we do not uiomove bad
158: * or uninitialized data.
159: */
160: size -= bp->b_resid;
161: if (size < xfersize) {
162: if (size == 0)
163: break;
164: xfersize = size;
165: }
1.28 chs 166: error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
1.8 christos 167: if (error)
1.1 mycroft 168: break;
169: brelse(bp);
170: }
171: if (bp != NULL)
172: brelse(bp);
1.28 chs 173:
1.29 lukem 174: out:
1.18 kleink 175: if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
1.10 tls 176: ip->i_flag |= IN_ACCESS;
1.20 mycroft 177: if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
1.25 perseant 178: error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
1.18 kleink 179: }
1.1 mycroft 180: return (error);
181: }
182:
183: /*
184: * Vnode op for writing.
185: */
1.8 christos 186: int
1.29 lukem 187: WRITE(void *v)
1.8 christos 188: {
1.1 mycroft 189: struct vop_write_args /* {
190: struct vnode *a_vp;
191: struct uio *a_uio;
192: int a_ioflag;
193: struct ucred *a_cred;
1.8 christos 194: } */ *ap = v;
1.24 augustss 195: struct vnode *vp;
196: struct uio *uio;
197: struct inode *ip;
1.33 chs 198: struct genfs_node *gp;
1.24 augustss 199: FS *fs;
1.1 mycroft 200: struct buf *bp;
1.54.2.6 skrll 201: struct lwp *l;
1.33 chs 202: struct ucred *cred;
1.47 fvdl 203: daddr_t lbn;
1.33 chs 204: off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
1.1 mycroft 205: int blkoffset, error, flags, ioflag, resid, size, xfersize;
1.54.2.3 skrll 206: int aflag;
1.54.2.7! skrll 207: int ubc_alloc_flags, ubc_release_flags;
1.44 jdolecek 208: int extended=0;
1.28 chs 209: void *win;
210: vsize_t bytelen;
1.36 chs 211: boolean_t async;
1.34 chs 212: boolean_t usepc = FALSE;
1.45 yamt 213: #ifdef LFS_READWRITE
214: boolean_t need_unreserve = FALSE;
215: #endif
1.54.2.3 skrll 216: struct ufsmount *ump;
1.1 mycroft 217:
1.33 chs 218: cred = ap->a_cred;
1.1 mycroft 219: ioflag = ap->a_ioflag;
220: uio = ap->a_uio;
221: vp = ap->a_vp;
222: ip = VTOI(vp);
1.33 chs 223: gp = VTOG(vp);
1.54.2.3 skrll 224: ump = ip->i_ump;
1.1 mycroft 225:
1.51 fvdl 226: KASSERT(vp->v_size == ip->i_size);
1.1 mycroft 227: #ifdef DIAGNOSTIC
228: if (uio->uio_rw != UIO_WRITE)
229: panic("%s: mode", WRITE_S);
230: #endif
231:
232: switch (vp->v_type) {
233: case VREG:
234: if (ioflag & IO_APPEND)
1.51 fvdl 235: uio->uio_offset = ip->i_size;
236: if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
1.1 mycroft 237: return (EPERM);
238: /* FALLTHROUGH */
239: case VLNK:
240: break;
241: case VDIR:
242: if ((ioflag & IO_SYNC) == 0)
243: panic("%s: nonsync dir write", WRITE_S);
244: break;
245: default:
246: panic("%s: type", WRITE_S);
247: }
248:
249: fs = ip->I_FS;
250: if (uio->uio_offset < 0 ||
1.54.2.3 skrll 251: (u_int64_t)uio->uio_offset + uio->uio_resid > ump->um_maxfilesize)
1.1 mycroft 252: return (EFBIG);
1.26 perseant 253: #ifdef LFS_READWRITE
254: /* Disallow writes to the Ifile, even if noschg flag is removed */
255: /* XXX can this go away when the Ifile is no longer in the namespace? */
256: if (vp == fs->lfs_ivnode)
257: return (EPERM);
258: #endif
1.1 mycroft 259: /*
260: * Maybe this should be above the vnode op call, but so long as
261: * file servers have no limits, I don't think it matters.
262: */
1.54.2.6 skrll 263: l = uio->uio_lwp;
264: if (vp->v_type == VREG && l &&
1.1 mycroft 265: uio->uio_offset + uio->uio_resid >
1.54.2.6 skrll 266: l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
267: psignal(l->l_proc, SIGXFSZ);
1.1 mycroft 268: return (EFBIG);
269: }
1.42 chs 270: if (uio->uio_resid == 0)
271: return (0);
1.1 mycroft 272:
1.39 simonb 273: flags = ioflag & IO_SYNC ? B_SYNC : 0;
1.36 chs 274: async = vp->v_mount->mnt_flag & MNT_ASYNC;
1.41 chs 275: origoff = uio->uio_offset;
1.1 mycroft 276: resid = uio->uio_resid;
1.51 fvdl 277: osize = ip->i_size;
1.28 chs 278: error = 0;
279:
1.50 perseant 280: usepc = vp->v_type == VREG;
1.48 perseant 281: #ifdef LFS_READWRITE
282: async = TRUE;
1.50 perseant 283:
284: /* Account writes. This overcounts if pages are already dirty. */
285: if (usepc) {
286: simple_lock(&lfs_subsys_lock);
287: lfs_subsys_pages += round_page(uio->uio_resid) >> PAGE_SHIFT;
288: simple_unlock(&lfs_subsys_lock);
289: }
290: lfs_check(vp, LFS_UNUSED_LBN, 0);
1.49 perseant 291: #endif /* !LFS_READWRITE */
1.54.2.3 skrll 292: if (!usepc)
1.28 chs 293: goto bcache;
294:
1.33 chs 295: preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset)));
296: aflag = ioflag & IO_SYNC ? B_SYNC : 0;
297: nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
298: endallocoff = nsize - blkoff(fs, nsize);
299:
300: /*
301: * if we're increasing the file size, deal with expanding
302: * the fragment if there is one.
303: */
304:
305: if (nsize > osize && lblkno(fs, osize) < NDADDR &&
306: lblkno(fs, osize) != lblkno(fs, nsize) &&
307: blkroundup(fs, osize) != osize) {
1.54.2.3 skrll 308: off_t eob;
309:
310: eob = blkroundup(fs, osize);
311: error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag);
312: if (error)
1.33 chs 313: goto out;
1.38 chs 314: if (flags & B_SYNC) {
1.54.2.3 skrll 315: vp->v_size = eob;
1.38 chs 316: simple_lock(&vp->v_interlock);
1.54.2.3 skrll 317: VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask),
318: round_page(eob), PGO_CLEANIT | PGO_SYNCIO);
1.38 chs 319: }
1.33 chs 320: }
321:
322: ubc_alloc_flags = UBC_WRITE;
1.28 chs 323: while (uio->uio_resid > 0) {
1.43 yamt 324: boolean_t extending; /* if we're extending a whole block */
325: off_t newoff;
326:
1.28 chs 327: oldoff = uio->uio_offset;
328: blkoffset = blkoff(fs, uio->uio_offset);
1.30 chs 329: bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
1.28 chs 330:
331: /*
1.33 chs 332: * if we're filling in a hole, allocate the blocks now and
333: * initialize the pages first. if we're extending the file,
334: * we can safely allocate blocks without initializing pages
335: * since the new blocks will be inaccessible until the write
336: * is complete.
1.28 chs 337: */
1.43 yamt 338: extending = uio->uio_offset >= preallocoff &&
339: uio->uio_offset < endallocoff;
1.28 chs 340:
1.43 yamt 341: if (!extending) {
1.33 chs 342: error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
343: cred, aflag);
1.54.2.3 skrll 344: if (error)
1.33 chs 345: break;
346: ubc_alloc_flags &= ~UBC_FAULTBUSY;
1.35 chs 347: } else {
1.33 chs 348: lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
1.35 chs 349: error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
1.33 chs 350: aflag, cred);
351: lockmgr(&gp->g_glock, LK_RELEASE, NULL);
1.54.2.3 skrll 352: if (error)
1.33 chs 353: break;
354: ubc_alloc_flags |= UBC_FAULTBUSY;
1.28 chs 355: }
356:
1.33 chs 357: /*
358: * copy the data.
359: */
360:
361: win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
362: ubc_alloc_flags);
1.28 chs 363: error = uiomove(win, bytelen, uio);
1.43 yamt 364: if (error && extending) {
365: /*
366: * if we haven't initialized the pages yet,
367: * do it now. it's safe to use memset here
368: * because we just mapped the pages above.
369: */
370: memset(win, 0, bytelen);
371: }
1.54.2.7! skrll 372: ubc_release_flags = UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0;
! 373: ubc_release(win, ubc_release_flags);
1.33 chs 374:
375: /*
376: * update UVM's notion of the size now that we've
377: * copied the data into the vnode's pages.
1.43 yamt 378: *
379: * we should update the size even when uiomove failed.
380: * otherwise ffs_truncate can't flush soft update states.
1.33 chs 381: */
382:
1.43 yamt 383: newoff = oldoff + bytelen;
384: if (vp->v_size < newoff) {
385: uvm_vnp_setsize(vp, newoff);
1.44 jdolecek 386: extended = 1;
1.43 yamt 387: }
388:
1.54.2.3 skrll 389: if (error)
1.43 yamt 390: break;
1.28 chs 391:
392: /*
393: * flush what we just wrote if necessary.
394: * XXXUBC simplistic async flushing.
395: */
396:
1.36 chs 397: if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
1.38 chs 398: simple_lock(&vp->v_interlock);
399: error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
400: (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
1.54.2.3 skrll 401: if (error)
1.33 chs 402: break;
1.28 chs 403: }
1.33 chs 404: }
405: if (error == 0 && ioflag & IO_SYNC) {
1.38 chs 406: simple_lock(&vp->v_interlock);
1.54.2.3 skrll 407: error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask),
1.40 chs 408: round_page(blkroundup(fs, uio->uio_offset)),
409: PGO_CLEANIT | PGO_SYNCIO);
1.28 chs 410: }
411: goto out;
412:
1.29 lukem 413: bcache:
1.41 chs 414: simple_lock(&vp->v_interlock);
415: VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid),
416: PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
1.28 chs 417: while (uio->uio_resid > 0) {
1.1 mycroft 418: lbn = lblkno(fs, uio->uio_offset);
419: blkoffset = blkoff(fs, uio->uio_offset);
1.30 chs 420: xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
1.1 mycroft 421: if (fs->fs_bsize > xfersize)
422: flags |= B_CLRBUF;
423: else
424: flags &= ~B_CLRBUF;
425:
1.45 yamt 426: #ifdef LFS_READWRITE
1.46 yamt 427: error = lfs_reserve(fs, vp, NULL,
1.45 yamt 428: btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
429: if (error)
430: break;
431: need_unreserve = TRUE;
432: #endif
1.23 fvdl 433: error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
434: ap->a_cred, flags, &bp);
435:
1.1 mycroft 436: if (error)
437: break;
1.51 fvdl 438: if (uio->uio_offset + xfersize > ip->i_size) {
439: ip->i_size = uio->uio_offset + xfersize;
1.52 kristerw 440: DIP_ASSIGN(ip, size, ip->i_size);
1.51 fvdl 441: uvm_vnp_setsize(vp, ip->i_size);
1.44 jdolecek 442: extended = 1;
1.1 mycroft 443: }
444: size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
1.28 chs 445: if (xfersize > size)
1.1 mycroft 446: xfersize = size;
447:
1.28 chs 448: error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
449:
450: /*
451: * if we didn't clear the block and the uiomove failed,
452: * the buf will now contain part of some other file,
453: * so we need to invalidate it.
454: */
455: if (error && (flags & B_CLRBUF) == 0) {
456: bp->b_flags |= B_INVAL;
457: brelse(bp);
458: break;
459: }
1.1 mycroft 460: #ifdef LFS_READWRITE
461: (void)VOP_BWRITE(bp);
1.46 yamt 462: lfs_reserve(fs, vp, NULL,
1.45 yamt 463: -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
464: need_unreserve = FALSE;
1.1 mycroft 465: #else
466: if (ioflag & IO_SYNC)
467: (void)bwrite(bp);
468: else if (xfersize + blkoffset == fs->fs_bsize)
1.33 chs 469: bawrite(bp);
1.1 mycroft 470: else
471: bdwrite(bp);
472: #endif
473: if (error || xfersize == 0)
474: break;
475: }
1.45 yamt 476: #ifdef LFS_READWRITE
477: if (need_unreserve) {
1.46 yamt 478: lfs_reserve(fs, vp, NULL,
1.45 yamt 479: -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
480: }
481: #endif
482:
1.1 mycroft 483: /*
484: * If we successfully wrote any data, and we are not the superuser
485: * we clear the setuid and setgid bits as a precaution against
486: * tampering.
487: */
1.33 chs 488: out:
1.28 chs 489: ip->i_flag |= IN_CHANGE | IN_UPDATE;
1.51 fvdl 490: if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) {
491: ip->i_mode &= ~(ISUID | ISGID);
1.52 kristerw 492: DIP_ASSIGN(ip, mode, ip->i_mode);
1.51 fvdl 493: }
1.44 jdolecek 494: if (resid > uio->uio_resid)
495: VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
1.1 mycroft 496: if (error) {
1.33 chs 497: (void) VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred,
1.54.2.5 skrll 498: uio->uio_lwp);
1.33 chs 499: uio->uio_offset -= resid - uio->uio_resid;
500: uio->uio_resid = resid;
1.20 mycroft 501: } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
1.25 perseant 502: error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
1.51 fvdl 503: KASSERT(vp->v_size == ip->i_size);
1.1 mycroft 504: return (error);
505: }
CVSweb <webmaster@jp.NetBSD.org>