Annotation of src/sys/uvm/uvm_mmap.c, Revision 1.98
1.98 ! ad 1: /* $NetBSD: uvm_mmap.c,v 1.97 2006/05/20 15:45:38 elad Exp $ */
1.1 mrg 2:
3: /*
4: * Copyright (c) 1997 Charles D. Cranor and Washington University.
1.51 chs 5: * Copyright (c) 1991, 1993 The Regents of the University of California.
1.1 mrg 6: * Copyright (c) 1988 University of Utah.
1.51 chs 7: *
1.1 mrg 8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * the Systems Programming Group of the University of Utah Computer
12: * Science Department.
13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: * 3. All advertising materials mentioning features or use of this software
23: * must display the following acknowledgement:
24: * This product includes software developed by the Charles D. Cranor,
1.51 chs 25: * Washington University, University of California, Berkeley and
1.1 mrg 26: * its contributors.
27: * 4. Neither the name of the University nor the names of its contributors
28: * may be used to endorse or promote products derived from this software
29: * without specific prior written permission.
30: *
31: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41: * SUCH DAMAGE.
42: *
43: * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
44: * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
1.3 mrg 45: * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
1.1 mrg 46: */
47:
48: /*
49: * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
50: * function.
51: */
1.60 lukem 52:
53: #include <sys/cdefs.h>
1.98 ! ad 54: __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.97 2006/05/20 15:45:38 elad Exp $");
1.80 jdolecek 55:
56: #include "opt_compat_netbsd.h"
1.97 elad 57: #include "opt_pax.h"
1.60 lukem 58:
1.1 mrg 59: #include <sys/param.h>
60: #include <sys/systm.h>
61: #include <sys/file.h>
62: #include <sys/filedesc.h>
63: #include <sys/resourcevar.h>
64: #include <sys/mman.h>
65: #include <sys/mount.h>
66: #include <sys/proc.h>
67: #include <sys/malloc.h>
68: #include <sys/vnode.h>
69: #include <sys/conf.h>
1.9 mrg 70: #include <sys/stat.h>
1.97 elad 71:
72: #ifdef PAX_MPROTECT
73: #include <sys/pax.h>
74: #endif /* PAX_MPROTECT */
1.1 mrg 75:
76: #include <miscfs/specfs/specdev.h>
77:
1.67 thorpej 78: #include <sys/sa.h>
1.1 mrg 79: #include <sys/syscallargs.h>
80:
81: #include <uvm/uvm.h>
82: #include <uvm/uvm_device.h>
83:
1.80 jdolecek 84: #ifndef COMPAT_ZERODEV
1.81 dsl 85: #define COMPAT_ZERODEV(dev) (0)
1.80 jdolecek 86: #endif
1.1 mrg 87:
88: /*
89: * unimplemented VM system calls:
90: */
91:
92: /*
93: * sys_sbrk: sbrk system call.
94: */
95:
96: /* ARGSUSED */
1.6 mrg 97: int
1.67 thorpej 98: sys_sbrk(l, v, retval)
99: struct lwp *l;
1.6 mrg 100: void *v;
101: register_t *retval;
1.1 mrg 102: {
103: #if 0
1.6 mrg 104: struct sys_sbrk_args /* {
1.33 kleink 105: syscallarg(intptr_t) incr;
1.20 mrg 106: } */ *uap = v;
1.1 mrg 107: #endif
1.6 mrg 108:
1.17 kleink 109: return (ENOSYS);
1.1 mrg 110: }
111:
112: /*
113: * sys_sstk: sstk system call.
114: */
115:
116: /* ARGSUSED */
1.6 mrg 117: int
1.67 thorpej 118: sys_sstk(l, v, retval)
119: struct lwp *l;
1.6 mrg 120: void *v;
121: register_t *retval;
1.1 mrg 122: {
123: #if 0
1.6 mrg 124: struct sys_sstk_args /* {
1.20 mrg 125: syscallarg(int) incr;
126: } */ *uap = v;
1.1 mrg 127: #endif
1.6 mrg 128:
1.17 kleink 129: return (ENOSYS);
1.1 mrg 130: }
131:
132: /*
133: * sys_mincore: determine if pages are in core or not.
134: */
135:
136: /* ARGSUSED */
1.6 mrg 137: int
1.67 thorpej 138: sys_mincore(l, v, retval)
139: struct lwp *l;
1.6 mrg 140: void *v;
141: register_t *retval;
1.1 mrg 142: {
1.6 mrg 143: struct sys_mincore_args /* {
1.22 thorpej 144: syscallarg(void *) addr;
1.20 mrg 145: syscallarg(size_t) len;
146: syscallarg(char *) vec;
147: } */ *uap = v;
1.67 thorpej 148: struct proc *p = l->l_proc;
1.56 chs 149: struct vm_page *pg;
1.22 thorpej 150: char *vec, pgi;
151: struct uvm_object *uobj;
152: struct vm_amap *amap;
153: struct vm_anon *anon;
1.53 chs 154: struct vm_map_entry *entry;
1.22 thorpej 155: vaddr_t start, end, lim;
1.53 chs 156: struct vm_map *map;
1.22 thorpej 157: vsize_t len;
158: int error = 0, npgs;
159:
160: map = &p->p_vmspace->vm_map;
161:
162: start = (vaddr_t)SCARG(uap, addr);
163: len = SCARG(uap, len);
164: vec = SCARG(uap, vec);
165:
166: if (start & PAGE_MASK)
167: return (EINVAL);
168: len = round_page(len);
169: end = start + len;
170: if (end <= start)
171: return (EINVAL);
172:
173: /*
174: * Lock down vec, so our returned status isn't outdated by
175: * storing the status byte for a page.
176: */
1.50 chs 177:
1.62 chs 178: npgs = len >> PAGE_SHIFT;
179: error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
180: if (error) {
181: return error;
182: }
1.22 thorpej 183: vm_map_lock_read(map);
184:
185: if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
186: error = ENOMEM;
187: goto out;
188: }
189:
190: for (/* nothing */;
191: entry != &map->header && entry->start < end;
192: entry = entry->next) {
1.49 chs 193: KASSERT(!UVM_ET_ISSUBMAP(entry));
194: KASSERT(start >= entry->start);
195:
1.22 thorpej 196: /* Make sure there are no holes. */
197: if (entry->end < end &&
198: (entry->next == &map->header ||
199: entry->next->start > entry->end)) {
200: error = ENOMEM;
201: goto out;
202: }
1.6 mrg 203:
1.22 thorpej 204: lim = end < entry->end ? end : entry->end;
205:
206: /*
1.31 thorpej 207: * Special case for objects with no "real" pages. Those
208: * are always considered resident (mapped devices).
1.22 thorpej 209: */
1.50 chs 210:
1.22 thorpej 211: if (UVM_ET_ISOBJ(entry)) {
1.49 chs 212: KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
1.79 yamt 213: if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
1.22 thorpej 214: for (/* nothing */; start < lim;
215: start += PAGE_SIZE, vec++)
216: subyte(vec, 1);
217: continue;
218: }
219: }
220:
1.32 thorpej 221: amap = entry->aref.ar_amap; /* top layer */
222: uobj = entry->object.uvm_obj; /* bottom layer */
1.22 thorpej 223:
224: if (amap != NULL)
225: amap_lock(amap);
226: if (uobj != NULL)
227: simple_lock(&uobj->vmobjlock);
228:
229: for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
230: pgi = 0;
231: if (amap != NULL) {
232: /* Check the top layer first. */
233: anon = amap_lookup(&entry->aref,
234: start - entry->start);
235: /* Don't need to lock anon here. */
1.91 yamt 236: if (anon != NULL && anon->an_page != NULL) {
1.50 chs 237:
1.22 thorpej 238: /*
239: * Anon has the page for this entry
240: * offset.
241: */
1.50 chs 242:
1.22 thorpej 243: pgi = 1;
244: }
245: }
246: if (uobj != NULL && pgi == 0) {
247: /* Check the bottom layer. */
1.56 chs 248: pg = uvm_pagelookup(uobj,
1.22 thorpej 249: entry->offset + (start - entry->start));
1.56 chs 250: if (pg != NULL) {
1.50 chs 251:
1.22 thorpej 252: /*
253: * Object has the page for this entry
254: * offset.
255: */
1.50 chs 256:
1.22 thorpej 257: pgi = 1;
258: }
259: }
260: (void) subyte(vec, pgi);
261: }
262: if (uobj != NULL)
1.27 thorpej 263: simple_unlock(&uobj->vmobjlock);
1.22 thorpej 264: if (amap != NULL)
265: amap_unlock(amap);
266: }
267:
268: out:
269: vm_map_unlock_read(map);
270: uvm_vsunlock(p, SCARG(uap, vec), npgs);
271: return (error);
1.1 mrg 272: }
273:
274: /*
275: * sys_mmap: mmap system call.
276: *
1.64 atatat 277: * => file offset and address may not be page aligned
1.1 mrg 278: * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
279: * - if address isn't page aligned the mapping starts at trunc_page(addr)
280: * and the return value is adjusted up by the page offset.
281: */
282:
1.6 mrg 283: int
1.67 thorpej 284: sys_mmap(l, v, retval)
285: struct lwp *l;
1.6 mrg 286: void *v;
287: register_t *retval;
288: {
1.40 augustss 289: struct sys_mmap_args /* {
1.6 mrg 290: syscallarg(caddr_t) addr;
291: syscallarg(size_t) len;
292: syscallarg(int) prot;
293: syscallarg(int) flags;
294: syscallarg(int) fd;
295: syscallarg(long) pad;
296: syscallarg(off_t) pos;
297: } */ *uap = v;
1.67 thorpej 298: struct proc *p = l->l_proc;
1.12 eeh 299: vaddr_t addr;
1.9 mrg 300: struct vattr va;
1.6 mrg 301: off_t pos;
1.12 eeh 302: vsize_t size, pageoff;
1.6 mrg 303: vm_prot_t prot, maxprot;
304: int flags, fd;
1.89 fvdl 305: vaddr_t vm_min_address = VM_MIN_ADDRESS, defaddr;
1.40 augustss 306: struct filedesc *fdp = p->p_fd;
307: struct file *fp;
1.6 mrg 308: struct vnode *vp;
1.50 chs 309: void *handle;
1.6 mrg 310: int error;
311:
312: /*
313: * first, extract syscall args from the uap.
314: */
315:
1.50 chs 316: addr = (vaddr_t)SCARG(uap, addr);
317: size = (vsize_t)SCARG(uap, len);
1.6 mrg 318: prot = SCARG(uap, prot) & VM_PROT_ALL;
319: flags = SCARG(uap, flags);
320: fd = SCARG(uap, fd);
321: pos = SCARG(uap, pos);
322:
323: /*
1.24 thorpej 324: * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
325: * validate the flags.
326: */
327: if (flags & MAP_COPY)
328: flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
329: if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
330: return (EINVAL);
331:
332: /*
1.6 mrg 333: * align file position and save offset. adjust size.
334: */
335:
336: pageoff = (pos & PAGE_MASK);
337: pos -= pageoff;
338: size += pageoff; /* add offset */
1.50 chs 339: size = (vsize_t)round_page(size); /* round up */
1.6 mrg 340: if ((ssize_t) size < 0)
341: return (EINVAL); /* don't allow wrap */
342:
343: /*
1.51 chs 344: * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
1.6 mrg 345: */
346:
347: if (flags & MAP_FIXED) {
348:
349: /* ensure address and file offset are aligned properly */
350: addr -= pageoff;
351: if (addr & PAGE_MASK)
352: return (EINVAL);
353:
354: if (VM_MAXUSER_ADDRESS > 0 &&
355: (addr + size) > VM_MAXUSER_ADDRESS)
1.63 darrenr 356: return (EFBIG);
1.6 mrg 357: if (vm_min_address > 0 && addr < vm_min_address)
358: return (EINVAL);
359: if (addr > addr + size)
1.63 darrenr 360: return (EOVERFLOW); /* no wrapping! */
1.6 mrg 361:
1.75 christos 362: } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
1.6 mrg 363:
364: /*
1.68 atatat 365: * not fixed: make sure we skip over the largest
366: * possible heap for non-topdown mapping arrangements.
367: * we will refine our guess later (e.g. to account for
368: * VAC, etc)
1.6 mrg 369: */
1.46 chs 370:
1.89 fvdl 371: defaddr = p->p_emul->e_vm_default_addr(p,
372: (vaddr_t)p->p_vmspace->vm_daddr, size);
373:
1.68 atatat 374: if (addr == 0 ||
375: !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
1.89 fvdl 376: addr = MAX(addr, defaddr);
1.68 atatat 377: else
1.89 fvdl 378: addr = MIN(addr, defaddr);
1.6 mrg 379: }
380:
381: /*
382: * check for file mappings (i.e. not anonymous) and verify file.
383: */
384:
385: if ((flags & MAP_ANON) == 0) {
386:
1.54 thorpej 387: if ((fp = fd_getfile(fdp, fd)) == NULL)
388: return (EBADF);
1.69 pk 389:
390: simple_unlock(&fp->f_slock);
1.6 mrg 391:
392: if (fp->f_type != DTYPE_VNODE)
1.7 kleink 393: return (ENODEV); /* only mmap vnodes! */
1.6 mrg 394: vp = (struct vnode *)fp->f_data; /* convert to vnode */
395:
1.11 thorpej 396: if (vp->v_type != VREG && vp->v_type != VCHR &&
397: vp->v_type != VBLK)
398: return (ENODEV); /* only REG/CHR/BLK support mmap */
1.39 kleink 399:
1.61 chs 400: if (vp->v_type != VCHR && pos < 0)
401: return (EINVAL);
402:
403: if (vp->v_type != VCHR && (pos + size) < pos)
1.39 kleink 404: return (EOVERFLOW); /* no offset wrapping */
1.6 mrg 405:
406: /* special case: catch SunOS style /dev/zero */
1.80 jdolecek 407: if (vp->v_type == VCHR
408: && (vp->v_rdev == zerodev || COMPAT_ZERODEV(vp->v_rdev))) {
1.6 mrg 409: flags |= MAP_ANON;
410: goto is_anon;
411: }
412:
413: /*
414: * Old programs may not select a specific sharing type, so
415: * default to an appropriate one.
416: *
417: * XXX: how does MAP_ANON fit in the picture?
418: */
1.24 thorpej 419: if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
1.8 tv 420: #if defined(DEBUG)
1.6 mrg 421: printf("WARNING: defaulted mmap() share type to "
1.71 gmcgarry 422: "%s (pid %d command %s)\n", vp->v_type == VCHR ?
1.6 mrg 423: "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
424: p->p_comm);
1.1 mrg 425: #endif
1.6 mrg 426: if (vp->v_type == VCHR)
427: flags |= MAP_SHARED; /* for a device */
428: else
429: flags |= MAP_PRIVATE; /* for a file */
430: }
431:
1.51 chs 432: /*
1.6 mrg 433: * MAP_PRIVATE device mappings don't make sense (and aren't
434: * supported anyway). However, some programs rely on this,
435: * so just change it to MAP_SHARED.
436: */
437: if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
438: flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
439: }
1.1 mrg 440:
1.6 mrg 441: /*
442: * now check protection
443: */
444:
1.48 thorpej 445: maxprot = VM_PROT_EXECUTE;
1.6 mrg 446:
447: /* check read access */
448: if (fp->f_flag & FREAD)
449: maxprot |= VM_PROT_READ;
450: else if (prot & PROT_READ)
451: return (EACCES);
452:
1.9 mrg 453: /* check write access, shared case first */
1.6 mrg 454: if (flags & MAP_SHARED) {
1.9 mrg 455: /*
456: * if the file is writable, only add PROT_WRITE to
457: * maxprot if the file is not immutable, append-only.
458: * otherwise, if we have asked for PROT_WRITE, return
459: * EPERM.
460: */
461: if (fp->f_flag & FWRITE) {
462: if ((error =
1.98 ! ad 463: VOP_GETATTR(vp, &va, l->l_cred, l)))
1.9 mrg 464: return (error);
1.84 hannken 465: if ((va.va_flags &
466: (SF_SNAPSHOT|IMMUTABLE|APPEND)) == 0)
1.9 mrg 467: maxprot |= VM_PROT_WRITE;
468: else if (prot & PROT_WRITE)
469: return (EPERM);
470: }
1.6 mrg 471: else if (prot & PROT_WRITE)
472: return (EACCES);
473: } else {
474: /* MAP_PRIVATE mappings can always write to */
475: maxprot |= VM_PROT_WRITE;
476: }
1.50 chs 477: handle = vp;
1.1 mrg 478:
1.6 mrg 479: } else { /* MAP_ANON case */
1.24 thorpej 480: /*
481: * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
482: */
1.6 mrg 483: if (fd != -1)
484: return (EINVAL);
1.1 mrg 485:
1.24 thorpej 486: is_anon: /* label for SunOS style /dev/zero */
1.6 mrg 487: handle = NULL;
488: maxprot = VM_PROT_ALL;
489: pos = 0;
1.28 cgd 490: }
491:
492: /*
493: * XXX (in)sanity check. We don't do proper datasize checking
494: * XXX for anonymous (or private writable) mmap(). However,
495: * XXX know that if we're trying to allocate more than the amount
496: * XXX remaining under our current data size limit, _that_ should
497: * XXX be disallowed.
498: */
499: if ((flags & MAP_ANON) != 0 ||
500: ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
501: if (size >
1.50 chs 502: (p->p_rlimit[RLIMIT_DATA].rlim_cur -
503: ctob(p->p_vmspace->vm_dsize))) {
1.28 cgd 504: return (ENOMEM);
505: }
1.6 mrg 506: }
507:
1.97 elad 508: #ifdef PAX_MPROTECT
509: pax_mprotect(l, &prot, &maxprot);
510: #endif /* PAX_MPROTECT */
511:
1.6 mrg 512: /*
513: * now let kernel internal function uvm_mmap do the work.
514: */
515:
516: error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
1.25 thorpej 517: flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1.6 mrg 518:
519: if (error == 0)
520: /* remember to add offset */
521: *retval = (register_t)(addr + pageoff);
1.1 mrg 522:
1.6 mrg 523: return (error);
1.1 mrg 524: }
525:
526: /*
527: * sys___msync13: the msync system call (a front-end for flush)
528: */
529:
1.6 mrg 530: int
1.67 thorpej 531: sys___msync13(l, v, retval)
532: struct lwp *l;
1.6 mrg 533: void *v;
534: register_t *retval;
535: {
536: struct sys___msync13_args /* {
537: syscallarg(caddr_t) addr;
538: syscallarg(size_t) len;
539: syscallarg(int) flags;
540: } */ *uap = v;
1.67 thorpej 541: struct proc *p = l->l_proc;
1.12 eeh 542: vaddr_t addr;
543: vsize_t size, pageoff;
1.53 chs 544: struct vm_map *map;
1.50 chs 545: int error, rv, flags, uvmflags;
1.6 mrg 546:
547: /*
548: * extract syscall args from the uap
549: */
550:
1.12 eeh 551: addr = (vaddr_t)SCARG(uap, addr);
552: size = (vsize_t)SCARG(uap, len);
1.6 mrg 553: flags = SCARG(uap, flags);
554:
555: /* sanity check flags */
556: if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
1.77 chs 557: (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
558: (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
559: return (EINVAL);
1.6 mrg 560: if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
1.77 chs 561: flags |= MS_SYNC;
1.1 mrg 562:
1.6 mrg 563: /*
1.50 chs 564: * align the address to a page boundary and adjust the size accordingly.
1.6 mrg 565: */
566:
567: pageoff = (addr & PAGE_MASK);
568: addr -= pageoff;
569: size += pageoff;
1.50 chs 570: size = (vsize_t)round_page(size);
1.6 mrg 571:
572: /* disallow wrap-around. */
573: if (addr + size < addr)
574: return (EINVAL);
575:
576: /*
577: * get map
578: */
579:
580: map = &p->p_vmspace->vm_map;
581:
582: /*
583: * XXXCDC: do we really need this semantic?
584: *
585: * XXX Gak! If size is zero we are supposed to sync "all modified
586: * pages with the region containing addr". Unfortunately, we
587: * don't really keep track of individual mmaps so we approximate
588: * by flushing the range of the map entry containing addr.
589: * This can be incorrect if the region splits or is coalesced
590: * with a neighbor.
591: */
1.50 chs 592:
1.6 mrg 593: if (size == 0) {
1.53 chs 594: struct vm_map_entry *entry;
1.51 chs 595:
1.6 mrg 596: vm_map_lock_read(map);
597: rv = uvm_map_lookup_entry(map, addr, &entry);
598: if (rv == TRUE) {
599: addr = entry->start;
600: size = entry->end - entry->start;
601: }
602: vm_map_unlock_read(map);
603: if (rv == FALSE)
604: return (EINVAL);
605: }
606:
607: /*
608: * translate MS_ flags into PGO_ flags
609: */
1.50 chs 610:
1.34 thorpej 611: uvmflags = PGO_CLEANIT;
612: if (flags & MS_INVALIDATE)
613: uvmflags |= PGO_FREE;
1.6 mrg 614: if (flags & MS_SYNC)
615: uvmflags |= PGO_SYNCIO;
616:
1.50 chs 617: error = uvm_map_clean(map, addr, addr+size, uvmflags);
618: return error;
1.1 mrg 619: }
620:
621: /*
622: * sys_munmap: unmap a users memory
623: */
624:
1.6 mrg 625: int
1.67 thorpej 626: sys_munmap(l, v, retval)
627: struct lwp *l;
1.6 mrg 628: void *v;
629: register_t *retval;
630: {
1.40 augustss 631: struct sys_munmap_args /* {
1.6 mrg 632: syscallarg(caddr_t) addr;
633: syscallarg(size_t) len;
634: } */ *uap = v;
1.67 thorpej 635: struct proc *p = l->l_proc;
1.12 eeh 636: vaddr_t addr;
637: vsize_t size, pageoff;
1.53 chs 638: struct vm_map *map;
1.12 eeh 639: vaddr_t vm_min_address = VM_MIN_ADDRESS;
1.6 mrg 640: struct vm_map_entry *dead_entries;
641:
642: /*
1.50 chs 643: * get syscall args.
1.6 mrg 644: */
645:
1.50 chs 646: addr = (vaddr_t)SCARG(uap, addr);
647: size = (vsize_t)SCARG(uap, len);
1.51 chs 648:
1.6 mrg 649: /*
1.50 chs 650: * align the address to a page boundary and adjust the size accordingly.
1.6 mrg 651: */
652:
653: pageoff = (addr & PAGE_MASK);
654: addr -= pageoff;
655: size += pageoff;
1.50 chs 656: size = (vsize_t)round_page(size);
1.6 mrg 657:
658: if ((int)size < 0)
659: return (EINVAL);
660: if (size == 0)
661: return (0);
662:
663: /*
664: * Check for illegal addresses. Watch out for address wrap...
665: * Note that VM_*_ADDRESS are not constants due to casts (argh).
666: */
667: if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
668: return (EINVAL);
669: if (vm_min_address > 0 && addr < vm_min_address)
670: return (EINVAL);
671: if (addr > addr + size)
672: return (EINVAL);
673: map = &p->p_vmspace->vm_map;
674:
675: /*
1.51 chs 676: * interesting system call semantic: make sure entire range is
1.6 mrg 677: * allocated before allowing an unmap.
678: */
679:
1.50 chs 680: vm_map_lock(map);
1.66 mycroft 681: #if 0
1.6 mrg 682: if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
683: vm_map_unlock(map);
684: return (EINVAL);
685: }
1.66 mycroft 686: #endif
1.90 yamt 687: uvm_unmap_remove(map, addr, addr + size, &dead_entries, NULL, 0);
1.50 chs 688: vm_map_unlock(map);
1.6 mrg 689: if (dead_entries != NULL)
690: uvm_unmap_detach(dead_entries, 0);
691: return (0);
1.1 mrg 692: }
693:
694: /*
695: * sys_mprotect: the mprotect system call
696: */
697:
1.6 mrg 698: int
1.67 thorpej 699: sys_mprotect(l, v, retval)
700: struct lwp *l;
1.6 mrg 701: void *v;
702: register_t *retval;
703: {
704: struct sys_mprotect_args /* {
705: syscallarg(caddr_t) addr;
1.76 chs 706: syscallarg(size_t) len;
1.6 mrg 707: syscallarg(int) prot;
708: } */ *uap = v;
1.67 thorpej 709: struct proc *p = l->l_proc;
1.12 eeh 710: vaddr_t addr;
711: vsize_t size, pageoff;
1.6 mrg 712: vm_prot_t prot;
1.50 chs 713: int error;
1.6 mrg 714:
715: /*
716: * extract syscall args from uap
717: */
718:
1.12 eeh 719: addr = (vaddr_t)SCARG(uap, addr);
720: size = (vsize_t)SCARG(uap, len);
1.6 mrg 721: prot = SCARG(uap, prot) & VM_PROT_ALL;
722:
723: /*
1.50 chs 724: * align the address to a page boundary and adjust the size accordingly.
1.6 mrg 725: */
1.50 chs 726:
1.6 mrg 727: pageoff = (addr & PAGE_MASK);
728: addr -= pageoff;
729: size += pageoff;
1.76 chs 730: size = round_page(size);
1.50 chs 731:
732: error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
733: FALSE);
734: return error;
1.1 mrg 735: }
736:
737: /*
738: * sys_minherit: the minherit system call
739: */
740:
1.6 mrg 741: int
1.67 thorpej 742: sys_minherit(l, v, retval)
743: struct lwp *l;
1.6 mrg 744: void *v;
745: register_t *retval;
746: {
747: struct sys_minherit_args /* {
748: syscallarg(caddr_t) addr;
749: syscallarg(int) len;
750: syscallarg(int) inherit;
751: } */ *uap = v;
1.67 thorpej 752: struct proc *p = l->l_proc;
1.12 eeh 753: vaddr_t addr;
754: vsize_t size, pageoff;
1.40 augustss 755: vm_inherit_t inherit;
1.50 chs 756: int error;
1.51 chs 757:
1.12 eeh 758: addr = (vaddr_t)SCARG(uap, addr);
759: size = (vsize_t)SCARG(uap, len);
1.6 mrg 760: inherit = SCARG(uap, inherit);
1.50 chs 761:
1.6 mrg 762: /*
1.50 chs 763: * align the address to a page boundary and adjust the size accordingly.
1.6 mrg 764: */
765:
766: pageoff = (addr & PAGE_MASK);
767: addr -= pageoff;
768: size += pageoff;
1.50 chs 769: size = (vsize_t)round_page(size);
1.6 mrg 770:
771: if ((int)size < 0)
772: return (EINVAL);
1.50 chs 773: error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
774: inherit);
775: return error;
1.21 mrg 776: }
777:
778: /*
779: * sys_madvise: give advice about memory usage.
780: */
781:
782: /* ARGSUSED */
783: int
1.67 thorpej 784: sys_madvise(l, v, retval)
785: struct lwp *l;
1.21 mrg 786: void *v;
787: register_t *retval;
788: {
789: struct sys_madvise_args /* {
790: syscallarg(caddr_t) addr;
791: syscallarg(size_t) len;
792: syscallarg(int) behav;
793: } */ *uap = v;
1.67 thorpej 794: struct proc *p = l->l_proc;
1.21 mrg 795: vaddr_t addr;
796: vsize_t size, pageoff;
1.50 chs 797: int advice, error;
1.51 chs 798:
1.21 mrg 799: addr = (vaddr_t)SCARG(uap, addr);
800: size = (vsize_t)SCARG(uap, len);
801: advice = SCARG(uap, behav);
802:
803: /*
804: * align the address to a page boundary, and adjust the size accordingly
805: */
1.50 chs 806:
1.21 mrg 807: pageoff = (addr & PAGE_MASK);
808: addr -= pageoff;
809: size += pageoff;
1.50 chs 810: size = (vsize_t)round_page(size);
1.21 mrg 811:
1.29 thorpej 812: if ((ssize_t)size <= 0)
813: return (EINVAL);
814:
815: switch (advice) {
816: case MADV_NORMAL:
817: case MADV_RANDOM:
818: case MADV_SEQUENTIAL:
1.50 chs 819: error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
1.29 thorpej 820: advice);
821: break;
822:
823: case MADV_WILLNEED:
1.50 chs 824:
1.29 thorpej 825: /*
826: * Activate all these pages, pre-faulting them in if
827: * necessary.
828: */
829: /*
830: * XXX IMPLEMENT ME.
831: * Should invent a "weak" mode for uvm_fault()
832: * which would only do the PGO_LOCKED pgo_get().
833: */
1.50 chs 834:
1.29 thorpej 835: return (0);
836:
837: case MADV_DONTNEED:
1.50 chs 838:
1.29 thorpej 839: /*
840: * Deactivate all these pages. We don't need them
841: * any more. We don't, however, toss the data in
842: * the pages.
843: */
1.50 chs 844:
845: error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
1.29 thorpej 846: PGO_DEACTIVATE);
847: break;
848:
849: case MADV_FREE:
1.50 chs 850:
1.29 thorpej 851: /*
852: * These pages contain no valid data, and may be
1.45 soren 853: * garbage-collected. Toss all resources, including
1.30 thorpej 854: * any swap space in use.
1.29 thorpej 855: */
1.50 chs 856:
857: error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
1.29 thorpej 858: PGO_FREE);
859: break;
860:
861: case MADV_SPACEAVAIL:
1.50 chs 862:
1.29 thorpej 863: /*
864: * XXXMRG What is this? I think it's:
865: *
866: * Ensure that we have allocated backing-store
867: * for these pages.
868: *
869: * This is going to require changes to the page daemon,
870: * as it will free swap space allocated to pages in core.
871: * There's also what to do for device/file/anonymous memory.
872: */
1.50 chs 873:
1.29 thorpej 874: return (EINVAL);
875:
876: default:
1.21 mrg 877: return (EINVAL);
1.29 thorpej 878: }
879:
1.50 chs 880: return error;
1.1 mrg 881: }
882:
883: /*
884: * sys_mlock: memory lock
885: */
886:
1.6 mrg 887: int
1.67 thorpej 888: sys_mlock(l, v, retval)
889: struct lwp *l;
1.6 mrg 890: void *v;
891: register_t *retval;
892: {
893: struct sys_mlock_args /* {
1.10 kleink 894: syscallarg(const void *) addr;
1.6 mrg 895: syscallarg(size_t) len;
896: } */ *uap = v;
1.67 thorpej 897: struct proc *p = l->l_proc;
1.12 eeh 898: vaddr_t addr;
899: vsize_t size, pageoff;
1.6 mrg 900: int error;
901:
902: /*
903: * extract syscall args from uap
904: */
1.50 chs 905:
1.12 eeh 906: addr = (vaddr_t)SCARG(uap, addr);
907: size = (vsize_t)SCARG(uap, len);
1.6 mrg 908:
909: /*
910: * align the address to a page boundary and adjust the size accordingly
911: */
1.50 chs 912:
1.6 mrg 913: pageoff = (addr & PAGE_MASK);
914: addr -= pageoff;
915: size += pageoff;
1.50 chs 916: size = (vsize_t)round_page(size);
1.51 chs 917:
1.6 mrg 918: /* disallow wrap-around. */
1.50 chs 919: if (addr + size < addr)
1.6 mrg 920: return (EINVAL);
1.1 mrg 921:
1.6 mrg 922: if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
923: return (EAGAIN);
1.1 mrg 924:
1.6 mrg 925: if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
926: p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
927: return (EAGAIN);
1.1 mrg 928:
1.25 thorpej 929: error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
1.35 thorpej 930: 0);
1.85 briggs 931: if (error == EFAULT)
932: error = ENOMEM;
1.50 chs 933: return error;
1.1 mrg 934: }
935:
936: /*
937: * sys_munlock: unlock wired pages
938: */
939:
1.6 mrg 940: int
1.67 thorpej 941: sys_munlock(l, v, retval)
942: struct lwp *l;
1.6 mrg 943: void *v;
944: register_t *retval;
945: {
946: struct sys_munlock_args /* {
1.10 kleink 947: syscallarg(const void *) addr;
1.6 mrg 948: syscallarg(size_t) len;
949: } */ *uap = v;
1.67 thorpej 950: struct proc *p = l->l_proc;
1.12 eeh 951: vaddr_t addr;
952: vsize_t size, pageoff;
1.6 mrg 953: int error;
954:
955: /*
956: * extract syscall args from uap
957: */
958:
1.12 eeh 959: addr = (vaddr_t)SCARG(uap, addr);
960: size = (vsize_t)SCARG(uap, len);
1.6 mrg 961:
962: /*
963: * align the address to a page boundary, and adjust the size accordingly
964: */
1.50 chs 965:
1.6 mrg 966: pageoff = (addr & PAGE_MASK);
967: addr -= pageoff;
968: size += pageoff;
1.50 chs 969: size = (vsize_t)round_page(size);
1.6 mrg 970:
971: /* disallow wrap-around. */
1.50 chs 972: if (addr + size < addr)
1.6 mrg 973: return (EINVAL);
1.1 mrg 974:
1.25 thorpej 975: error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
1.35 thorpej 976: 0);
1.85 briggs 977: if (error == EFAULT)
978: error = ENOMEM;
1.50 chs 979: return error;
1.22 thorpej 980: }
981:
982: /*
983: * sys_mlockall: lock all pages mapped into an address space.
984: */
985:
986: int
1.67 thorpej 987: sys_mlockall(l, v, retval)
988: struct lwp *l;
1.22 thorpej 989: void *v;
990: register_t *retval;
991: {
992: struct sys_mlockall_args /* {
993: syscallarg(int) flags;
994: } */ *uap = v;
1.67 thorpej 995: struct proc *p = l->l_proc;
1.22 thorpej 996: int error, flags;
997:
998: flags = SCARG(uap, flags);
999:
1000: if (flags == 0 ||
1001: (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
1002: return (EINVAL);
1003:
1.25 thorpej 1004: error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
1005: p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1.22 thorpej 1006: return (error);
1007: }
1008:
1009: /*
1010: * sys_munlockall: unlock all pages mapped into an address space.
1011: */
1012:
1013: int
1.67 thorpej 1014: sys_munlockall(l, v, retval)
1015: struct lwp *l;
1.22 thorpej 1016: void *v;
1017: register_t *retval;
1018: {
1.67 thorpej 1019: struct proc *p = l->l_proc;
1.22 thorpej 1020:
1021: (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
1022: return (0);
1.1 mrg 1023: }
1024:
1025: /*
1026: * uvm_mmap: internal version of mmap
1027: *
1.56 chs 1028: * - used by sys_mmap and various framebuffers
1029: * - handle is a vnode pointer or NULL for MAP_ANON
1.1 mrg 1030: * - caller must page-align the file offset
1031: */
1032:
1.6 mrg 1033: int
1.25 thorpej 1034: uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
1.53 chs 1035: struct vm_map *map;
1.12 eeh 1036: vaddr_t *addr;
1037: vsize_t size;
1.6 mrg 1038: vm_prot_t prot, maxprot;
1039: int flags;
1.50 chs 1040: void *handle;
1.38 kleink 1041: voff_t foff;
1.25 thorpej 1042: vsize_t locklimit;
1.6 mrg 1043: {
1044: struct uvm_object *uobj;
1045: struct vnode *vp;
1.70 matt 1046: vaddr_t align = 0;
1.50 chs 1047: int error;
1.6 mrg 1048: int advice = UVM_ADV_NORMAL;
1049: uvm_flag_t uvmflag = 0;
1050:
1051: /*
1052: * check params
1053: */
1054:
1055: if (size == 0)
1056: return(0);
1057: if (foff & PAGE_MASK)
1058: return(EINVAL);
1059: if ((prot & maxprot) != prot)
1060: return(EINVAL);
1061:
1062: /*
1063: * for non-fixed mappings, round off the suggested address.
1064: * for fixed mappings, check alignment and zap old mappings.
1065: */
1066:
1067: if ((flags & MAP_FIXED) == 0) {
1.56 chs 1068: *addr = round_page(*addr);
1.6 mrg 1069: } else {
1070: if (*addr & PAGE_MASK)
1071: return(EINVAL);
1072: uvmflag |= UVM_FLAG_FIXED;
1.56 chs 1073: (void) uvm_unmap(map, *addr, *addr + size);
1.6 mrg 1074: }
1075:
1076: /*
1.70 matt 1077: * Try to see if any requested alignment can even be attemped.
1078: * Make sure we can express the alignment (asking for a >= 4GB
1079: * alignment on an ILP32 architecure make no sense) and the
1080: * alignment is at least for a page sized quanitiy. If the
1081: * request was for a fixed mapping, make sure supplied address
1082: * adheres to the request alignment.
1083: */
1084: align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
1085: if (align) {
1086: if (align >= sizeof(vaddr_t) * NBBY)
1087: return(EINVAL);
1088: align = 1L << align;
1089: if (align < PAGE_SIZE)
1090: return(EINVAL);
1.88 chs 1091: if (align >= vm_map_max(map))
1.70 matt 1092: return(ENOMEM);
1093: if (flags & MAP_FIXED) {
1094: if ((*addr & (align-1)) != 0)
1095: return(EINVAL);
1096: align = 0;
1097: }
1098: }
1099:
1100: /*
1.6 mrg 1101: * handle anon vs. non-anon mappings. for non-anon mappings attach
1102: * to underlying vm object.
1103: */
1104:
1105: if (flags & MAP_ANON) {
1.95 christos 1106: KASSERT(handle == NULL);
1.36 thorpej 1107: foff = UVM_UNKNOWN_OFFSET;
1.6 mrg 1108: uobj = NULL;
1109: if ((flags & MAP_SHARED) == 0)
1110: /* XXX: defer amap create */
1111: uvmflag |= UVM_FLAG_COPYONW;
1112: else
1113: /* shared: create amap now */
1114: uvmflag |= UVM_FLAG_OVERLAY;
1115:
1116: } else {
1.95 christos 1117: KASSERT(handle != NULL);
1.50 chs 1118: vp = (struct vnode *)handle;
1.59 thorpej 1119:
1120: /*
1121: * Don't allow mmap for EXEC if the file system
1122: * is mounted NOEXEC.
1123: */
1124: if ((prot & PROT_EXEC) != 0 &&
1125: (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0)
1126: return (EACCES);
1127:
1.6 mrg 1128: if (vp->v_type != VCHR) {
1.98 ! ad 1129: error = VOP_MMAP(vp, 0, curlwp->l_cred, curlwp);
1.55 chs 1130: if (error) {
1131: return error;
1132: }
1133:
1.50 chs 1134: uobj = uvn_attach((void *)vp, (flags & MAP_SHARED) ?
1.6 mrg 1135: maxprot : (maxprot & ~VM_PROT_WRITE));
1136:
1.46 chs 1137: /* XXX for now, attach doesn't gain a ref */
1138: VREF(vp);
1.57 thorpej 1139:
1140: /*
1141: * If the vnode is being mapped with PROT_EXEC,
1142: * then mark it as text.
1143: */
1144: if (prot & PROT_EXEC)
1.58 thorpej 1145: vn_markexec(vp);
1.6 mrg 1146: } else {
1.83 darrenr 1147: int i = maxprot;
1148:
1.48 thorpej 1149: /*
1150: * XXX Some devices don't like to be mapped with
1.83 darrenr 1151: * XXX PROT_EXEC or PROT_WRITE, but we don't really
1152: * XXX have a better way of handling this, right now
1.48 thorpej 1153: */
1.83 darrenr 1154: do {
1155: uobj = udv_attach((void *) &vp->v_rdev,
1156: (flags & MAP_SHARED) ? i :
1157: (i & ~VM_PROT_WRITE), foff, size);
1158: i--;
1159: } while ((uobj == NULL) && (i > 0));
1.6 mrg 1160: advice = UVM_ADV_RANDOM;
1161: }
1162: if (uobj == NULL)
1.11 thorpej 1163: return((vp->v_type == VREG) ? ENOMEM : EINVAL);
1.92 yamt 1164: if ((flags & MAP_SHARED) == 0) {
1.6 mrg 1165: uvmflag |= UVM_FLAG_COPYONW;
1.92 yamt 1166: } else if ((maxprot & VM_PROT_WRITE) != 0) {
1167: simple_lock(&vp->v_interlock);
1168: vp->v_flag |= VWRITEMAP;
1169: simple_unlock(&vp->v_interlock);
1170: }
1.6 mrg 1171: }
1172:
1.51 chs 1173: uvmflag = UVM_MAPFLAG(prot, maxprot,
1.1 mrg 1174: (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
1175: advice, uvmflag);
1.70 matt 1176: error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1.50 chs 1177: if (error) {
1178: if (uobj)
1179: uobj->pgops->pgo_detach(uobj);
1180: return error;
1181: }
1.1 mrg 1182:
1.6 mrg 1183: /*
1.50 chs 1184: * POSIX 1003.1b -- if our address space was configured
1185: * to lock all future mappings, wire the one we just made.
1.78 thorpej 1186: *
1187: * Also handle the MAP_WIRED flag here.
1.6 mrg 1188: */
1189:
1.50 chs 1190: if (prot == VM_PROT_NONE) {
1.6 mrg 1191:
1.25 thorpej 1192: /*
1.50 chs 1193: * No more work to do in this case.
1.25 thorpej 1194: */
1195:
1.50 chs 1196: return (0);
1197: }
1198: vm_map_lock(map);
1.78 thorpej 1199: if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
1.87 chs 1200: if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
1201: (locklimit != 0 &&
1202: size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
1203: locklimit)) {
1.50 chs 1204: vm_map_unlock(map);
1205: uvm_unmap(map, *addr, *addr + size);
1206: return ENOMEM;
1.25 thorpej 1207: }
1208:
1.50 chs 1209: /*
1210: * uvm_map_pageable() always returns the map unlocked.
1211: */
1.25 thorpej 1212:
1.50 chs 1213: error = uvm_map_pageable(map, *addr, *addr + size,
1214: FALSE, UVM_LK_ENTER);
1215: if (error) {
1216: uvm_unmap(map, *addr, *addr + size);
1217: return error;
1218: }
1.25 thorpej 1219: return (0);
1220: }
1.50 chs 1221: vm_map_unlock(map);
1222: return 0;
1.1 mrg 1223: }
1.89 fvdl 1224:
1225: vaddr_t
1226: uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
1227: {
1228: return VM_DEFAULT_ADDRESS(base, sz);
1229: }
CVSweb <webmaster@jp.NetBSD.org>