Annotation of src/sys/vm/vm_mmap.c, Revision 1.63
1.63 ! kleink 1: /* $NetBSD: vm_mmap.c,v 1.62 1998/10/02 08:00:57 mrg Exp $ */
1.31 cgd 2:
1.1 cgd 3: /*
4: * Copyright (c) 1988 University of Utah.
1.28 cgd 5: * Copyright (c) 1991, 1993
6: * The Regents of the University of California. All rights reserved.
1.1 cgd 7: *
8: * This code is derived from software contributed to Berkeley by
9: * the Systems Programming Group of the University of Utah Computer
10: * Science Department.
11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: * 3. All advertising materials mentioning features or use of this software
21: * must display the following acknowledgement:
22: * This product includes software developed by the University of
23: * California, Berkeley and its contributors.
24: * 4. Neither the name of the University nor the names of its contributors
25: * may be used to endorse or promote products derived from this software
26: * without specific prior written permission.
27: *
28: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38: * SUCH DAMAGE.
39: *
1.28 cgd 40: * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
41: *
1.54 fvdl 42: * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
1.1 cgd 43: */
44:
45: /*
46: * Mapped file (mmap) interface to VM
47: */
48:
1.14 mycroft 49: #include <sys/param.h>
50: #include <sys/systm.h>
51: #include <sys/filedesc.h>
1.28 cgd 52: #include <sys/resourcevar.h>
1.14 mycroft 53: #include <sys/proc.h>
54: #include <sys/vnode.h>
55: #include <sys/file.h>
56: #include <sys/mman.h>
57: #include <sys/conf.h>
1.57 mrg 58: #include <sys/stat.h>
1.1 cgd 59:
1.33 cgd 60: #include <sys/mount.h>
61: #include <sys/syscallargs.h>
62:
1.28 cgd 63: #include <miscfs/specfs/specdev.h>
64:
1.14 mycroft 65: #include <vm/vm.h>
66: #include <vm/vm_pager.h>
67: #include <vm/vm_prot.h>
1.1 cgd 68:
69: #ifdef DEBUG
70: int mmapdebug = 0;
71: #define MDB_FOLLOW 0x01
72: #define MDB_SYNC 0x02
73: #define MDB_MAPIT 0x04
74: #endif
75:
76: /* ARGSUSED */
1.16 mycroft 77: int
1.41 mycroft 78: sys_sbrk(p, v, retval)
1.1 cgd 79: struct proc *p;
1.40 thorpej 80: void *v;
81: register_t *retval;
82: {
1.44 christos 83: #if 0
1.41 mycroft 84: struct sys_sbrk_args /* {
1.33 cgd 85: syscallarg(int) incr;
1.40 thorpej 86: } */ *uap = v;
1.44 christos 87: #endif
1.1 cgd 88:
89: /* Not yet implemented */
1.63 ! kleink 90: return (ENOSYS);
1.1 cgd 91: }
92:
93: /* ARGSUSED */
1.16 mycroft 94: int
1.41 mycroft 95: sys_sstk(p, v, retval)
1.1 cgd 96: struct proc *p;
1.40 thorpej 97: void *v;
98: register_t *retval;
99: {
1.44 christos 100: #if 0
1.41 mycroft 101: struct sys_sstk_args /* {
1.33 cgd 102: syscallarg(int) incr;
1.40 thorpej 103: } */ *uap = v;
1.44 christos 104: #endif
1.1 cgd 105:
106: /* Not yet implemented */
1.63 ! kleink 107: return (ENOSYS);
1.1 cgd 108: }
109:
1.10 mycroft 110:
1.35 gwr 111: /*
112: * Memory Map (mmap) system call. Note that the file offset
113: * and address are allowed to be NOT page aligned, though if
114: * the MAP_FIXED flag it set, both must have the same remainder
115: * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not
116: * page-aligned, the actual mapping starts at trunc_page(addr)
117: * and the return value is adjusted up by the page offset.
118: */
1.16 mycroft 119: int
1.41 mycroft 120: sys_mmap(p, v, retval)
1.1 cgd 121: struct proc *p;
1.40 thorpej 122: void *v;
123: register_t *retval;
124: {
1.41 mycroft 125: register struct sys_mmap_args /* {
1.52 christos 126: syscallarg(void *) addr;
1.33 cgd 127: syscallarg(size_t) len;
128: syscallarg(int) prot;
129: syscallarg(int) flags;
130: syscallarg(int) fd;
131: syscallarg(long) pad;
132: syscallarg(off_t) pos;
1.40 thorpej 133: } */ *uap = v;
1.57 mrg 134: struct vattr va;
1.1 cgd 135: register struct filedesc *fdp = p->p_fd;
136: register struct file *fp;
137: struct vnode *vp;
1.60 eeh 138: vaddr_t addr;
1.50 drochner 139: off_t pos;
1.60 eeh 140: vsize_t size, pageoff;
1.28 cgd 141: vm_prot_t prot, maxprot;
1.1 cgd 142: caddr_t handle;
1.35 gwr 143: int fd, flags, error;
1.60 eeh 144: vaddr_t vm_min_address = VM_MIN_ADDRESS;
1.1 cgd 145:
1.60 eeh 146: addr = (vaddr_t) SCARG(uap, addr);
147: size = (vsize_t) SCARG(uap, len);
1.33 cgd 148: prot = SCARG(uap, prot) & VM_PROT_ALL;
149: flags = SCARG(uap, flags);
1.35 gwr 150: fd = SCARG(uap, fd);
1.50 drochner 151: pos = SCARG(uap, pos);
1.35 gwr 152:
1.1 cgd 153: #ifdef DEBUG
154: if (mmapdebug & MDB_FOLLOW)
1.50 drochner 155: printf("mmap(%d): addr %lx len %lx pro %x flg %x fd %d pos %qx\n",
1.35 gwr 156: p->p_pid, addr, size, prot, flags, fd, pos);
1.28 cgd 157: #endif
1.50 drochner 158:
159: /* make sure mapping fits into numeric range */
1.60 eeh 160: if (pos + size > (vaddr_t)-PAGE_SIZE) {
1.50 drochner 161: #ifdef DEBUG
162: printf("mmap: pos=%qx, size=%lx too big\n", pos, size);
163: #endif
164: return(EINVAL);
165: }
1.35 gwr 166:
1.1 cgd 167: /*
1.35 gwr 168: * Align the file position to a page boundary,
169: * and save its page offset component.
1.1 cgd 170: */
1.35 gwr 171: pageoff = (pos & PAGE_MASK);
172: pos -= pageoff;
173:
174: /* Adjust size for rounding (on both ends). */
175: size += pageoff; /* low end... */
1.60 eeh 176: size = (vsize_t) round_page(size); /* hi end */
1.35 gwr 177:
178: /* Do not allow mappings that cause address wrap... */
179: if ((ssize_t)size < 0)
1.28 cgd 180: return (EINVAL);
1.35 gwr 181:
1.1 cgd 182: /*
1.28 cgd 183: * Check for illegal addresses. Watch out for address wrap...
184: * Note that VM_*_ADDRESS are not constants due to casts (argh).
185: */
186: if (flags & MAP_FIXED) {
1.35 gwr 187: /*
188: * The specified address must have the same remainder
189: * as the file offset taken modulo PAGE_SIZE, so it
190: * should be aligned after adjustment by pageoff.
191: */
192: addr -= pageoff;
193: if (addr & PAGE_MASK)
194: return (EINVAL);
195: /* Address range must be all in user VM space. */
1.44 christos 196: if (VM_MAXUSER_ADDRESS > 0 &&
197: addr + size > VM_MAXUSER_ADDRESS)
1.28 cgd 198: return (EINVAL);
1.44 christos 199: if (vm_min_address > 0 && addr < vm_min_address)
1.28 cgd 200: return (EINVAL);
201: if (addr > addr + size)
202: return (EINVAL);
203: }
204: /*
205: * XXX for non-fixed mappings where no hint is provided or
206: * the hint would fall in the potential heap space,
207: * place it after the end of the largest possible heap.
1.1 cgd 208: *
209: * There should really be a pmap call to determine a reasonable
1.35 gwr 210: * location. (To avoid VA cache alias problems, for example!)
1.1 cgd 211: */
1.28 cgd 212: else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
1.1 cgd 213: addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
1.35 gwr 214:
215: if ((flags & MAP_ANON) == 0) {
1.28 cgd 216: /*
217: * Mapping file, get fp for validation.
218: * Obtain vnode and make sure it is of appropriate type.
219: */
1.35 gwr 220: if (((unsigned)fd) >= fdp->fd_nfiles ||
221: (fp = fdp->fd_ofiles[fd]) == NULL)
1.28 cgd 222: return (EBADF);
1.1 cgd 223: if (fp->f_type != DTYPE_VNODE)
1.56 kleink 224: return (ENODEV);
1.1 cgd 225: vp = (struct vnode *)fp->f_data;
1.59 thorpej 226: if (vp->v_type != VREG && vp->v_type != VCHR &&
227: vp->v_type != VBLK)
1.56 kleink 228: return (ENODEV);
1.1 cgd 229: /*
1.28 cgd 230: * XXX hack to handle use of /dev/zero to map anon
231: * memory (ala SunOS).
1.1 cgd 232: */
1.28 cgd 233: if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
234: flags |= MAP_ANON;
1.35 gwr 235: goto is_anon;
236: }
1.55 mycroft 237: /*
238: * Old programs may not select a specific sharing type, so
239: * default to an appropriate one.
240: */
241: if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
242: if (vp->v_type == VCHR)
243: flags |= MAP_SHARED; /* for a device */
244: else
245: flags |= MAP_PRIVATE; /* for a file */
246: }
247: /*
248: * MAP_PRIVATE device mappings don't make sense (and aren't
249: * supported anyway). However, some programs rely on this,
250: * so just change it to MAP_SHARED.
251: */
252: if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0)
253: flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
1.35 gwr 254: /*
255: * Ensure that file and memory protections are
256: * compatible. Note that we only worry about
257: * writability if mapping is shared; in this case,
258: * current and max prot are dictated by the open file.
259: * XXX use the vnode instead? Problem is: what
260: * credentials do we use for determination?
261: * What if proc does a setuid?
262: */
263: maxprot = VM_PROT_EXECUTE; /* ??? */
264: if (fp->f_flag & FREAD)
265: maxprot |= VM_PROT_READ;
266: else if (prot & PROT_READ)
267: return (EACCES);
268: if (flags & MAP_SHARED) {
1.57 mrg 269: /*
270: * if the file is writable, only add PROT_WRITE to
271: * maxprot if the file is not immutable, append-only.
272: * otherwise, if we have asked for PROT_WRITE, return
273: * EPERM.
274: */
275: if (fp->f_flag & FWRITE) {
276: if ((error =
277: VOP_GETATTR(vp, &va, p->p_ucred, p)))
278: return (error);
279: if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
280: maxprot |= VM_PROT_WRITE;
281: else if (prot & PROT_WRITE)
282: return (EPERM);
283: }
1.35 gwr 284: else if (prot & PROT_WRITE)
1.28 cgd 285: return (EACCES);
1.35 gwr 286: } else
287: maxprot |= VM_PROT_WRITE;
288: handle = (caddr_t)vp;
289: } else {
290: /*
291: * (flags & MAP_ANON) == TRUE
292: * Mapping blank space is trivial.
293: */
294: if (fd != -1)
295: return (EINVAL);
296: is_anon:
297: handle = NULL;
298: maxprot = VM_PROT_ALL;
299: pos = 0;
1.3 cgd 300: }
1.4 cgd 301: error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
1.35 gwr 302: flags, handle, pos);
1.1 cgd 303: if (error == 0)
1.35 gwr 304: *retval = (register_t)(addr + pageoff);
1.28 cgd 305: return (error);
1.1 cgd 306: }
307:
1.16 mycroft 308: int
1.53 thorpej 309: sys___msync13(p, v, retval)
1.1 cgd 310: struct proc *p;
1.40 thorpej 311: void *v;
312: register_t *retval;
313: {
1.53 thorpej 314: struct sys___msync13_args /* {
1.52 christos 315: syscallarg(void *) addr;
1.39 mycroft 316: syscallarg(size_t) len;
1.52 christos 317: syscallarg(int) flags;
1.40 thorpej 318: } */ *uap = v;
1.60 eeh 319: vaddr_t addr;
320: vsize_t size, pageoff;
1.28 cgd 321: vm_map_t map;
1.52 christos 322: int rv, flags;
1.28 cgd 323: boolean_t syncio, invalidate;
1.1 cgd 324:
1.60 eeh 325: addr = (vaddr_t)SCARG(uap, addr);
326: size = (vsize_t)SCARG(uap, len);
1.52 christos 327: flags = SCARG(uap, flags);
1.1 cgd 328: #ifdef DEBUG
329: if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
1.52 christos 330: printf("msync(%d): addr %lx len %lx flags %x\n",
331: p->p_pid, addr, size, flags);
1.1 cgd 332: #endif
1.35 gwr 333:
1.52 christos 334: /* sanity check flags */
335: if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
336: (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
337: (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
338: return (EINVAL);
339: if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
340: flags |= MS_SYNC;
341:
1.35 gwr 342: /*
343: * Align the address to a page boundary,
344: * and adjust the size accordingly.
345: */
346: pageoff = (addr & PAGE_MASK);
347: addr -= pageoff;
348: size += pageoff;
1.60 eeh 349: size = (vsize_t) round_page(size);
1.35 gwr 350:
351: /* Disallow wrap-around. */
1.52 christos 352: if (addr + size < addr)
353: return (ENOMEM);
1.35 gwr 354:
1.28 cgd 355: map = &p->p_vmspace->vm_map;
356: /*
357: * XXX Gak! If size is zero we are supposed to sync "all modified
358: * pages with the region containing addr". Unfortunately, we
359: * don't really keep track of individual mmaps so we approximate
360: * by flushing the range of the map entry containing addr.
361: * This can be incorrect if the region splits or is coalesced
362: * with a neighbor.
1.1 cgd 363: */
1.28 cgd 364: if (size == 0) {
365: vm_map_entry_t entry;
366:
367: vm_map_lock_read(map);
368: rv = vm_map_lookup_entry(map, addr, &entry);
369: vm_map_unlock_read(map);
1.29 cgd 370: if (rv == FALSE)
1.52 christos 371: return (ENOMEM);
1.28 cgd 372: addr = entry->start;
373: size = entry->end - entry->start;
374: }
1.1 cgd 375: #ifdef DEBUG
376: if (mmapdebug & MDB_SYNC)
1.49 christos 377: printf("msync: cleaning/flushing address range [%lx-%lx)\n",
1.48 christos 378: addr, addr+size);
1.1 cgd 379: #endif
1.52 christos 380:
381: #if 0
1.1 cgd 382: /*
1.52 christos 383: * XXX Asynchronous msync() causes:
384: * . the process to hang on wchan "vospgw", and
385: * . a "vm_object_page_clean: pager_put error" message to
386: * be printed by the kernel.
1.1 cgd 387: */
1.52 christos 388: syncio = (flags & MS_SYNC) ? TRUE : FALSE;
389: #else
1.28 cgd 390: syncio = TRUE;
1.52 christos 391: #endif
392: invalidate = (flags & MS_INVALIDATE) ? TRUE : FALSE;
393:
1.1 cgd 394: /*
1.28 cgd 395: * Clean the pages and interpret the return value.
1.1 cgd 396: */
1.28 cgd 397: rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
398: switch (rv) {
399: case KERN_SUCCESS:
400: break;
401: case KERN_INVALID_ADDRESS:
1.52 christos 402: return (ENOMEM);
1.28 cgd 403: case KERN_FAILURE:
404: return (EIO);
1.52 christos 405: case KERN_PAGES_LOCKED:
406: return (EBUSY);
1.28 cgd 407: default:
408: return (EINVAL);
409: }
410: return (0);
1.1 cgd 411: }
412:
1.16 mycroft 413: int
1.41 mycroft 414: sys_munmap(p, v, retval)
1.1 cgd 415: register struct proc *p;
1.40 thorpej 416: void *v;
417: register_t *retval;
418: {
1.41 mycroft 419: register struct sys_munmap_args /* {
1.52 christos 420: syscallarg(void *) addr;
1.39 mycroft 421: syscallarg(size_t) len;
1.40 thorpej 422: } */ *uap = v;
1.60 eeh 423: vaddr_t addr;
424: vsize_t size, pageoff;
1.28 cgd 425: vm_map_t map;
1.60 eeh 426: vaddr_t vm_min_address = VM_MIN_ADDRESS;
1.44 christos 427:
1.1 cgd 428:
1.60 eeh 429: addr = (vaddr_t) SCARG(uap, addr);
430: size = (vsize_t) SCARG(uap, len);
1.1 cgd 431: #ifdef DEBUG
432: if (mmapdebug & MDB_FOLLOW)
1.49 christos 433: printf("munmap(%d): addr %lx len %lx\n",
1.48 christos 434: p->p_pid, addr, size);
1.1 cgd 435: #endif
1.28 cgd 436:
1.35 gwr 437: /*
438: * Align the address to a page boundary,
439: * and adjust the size accordingly.
440: */
441: pageoff = (addr & PAGE_MASK);
442: addr -= pageoff;
443: size += pageoff;
1.60 eeh 444: size = (vsize_t) round_page(size);
1.35 gwr 445: if ((int)size < 0)
1.1 cgd 446: return(EINVAL);
447: if (size == 0)
448: return(0);
1.28 cgd 449: /*
450: * Check for illegal addresses. Watch out for address wrap...
451: * Note that VM_*_ADDRESS are not constants due to casts (argh).
452: */
1.37 mycroft 453: if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
1.28 cgd 454: return (EINVAL);
1.44 christos 455: if (vm_min_address > 0 && addr < vm_min_address)
1.28 cgd 456: return (EINVAL);
457: if (addr > addr + size)
458: return (EINVAL);
459: map = &p->p_vmspace->vm_map;
1.54 fvdl 460: #if 0
1.28 cgd 461: /*
462: * Make sure entire range is allocated.
1.54 fvdl 463: * XXX Too strict?
1.28 cgd 464: */
465: if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
1.1 cgd 466: return(EINVAL);
1.54 fvdl 467: #endif
1.1 cgd 468: /* returns nothing but KERN_SUCCESS anyway */
1.28 cgd 469: (void) vm_map_remove(map, addr, addr+size);
1.1 cgd 470: return(0);
471: }
472:
1.16 mycroft 473: void
1.32 mycroft 474: munmapfd(p, fd)
475: struct proc *p;
1.17 mycroft 476: int fd;
1.1 cgd 477: {
478: #ifdef DEBUG
479: if (mmapdebug & MDB_FOLLOW)
1.49 christos 480: printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
1.1 cgd 481: #endif
482:
483: /*
1.28 cgd 484: * XXX should vm_deallocate any regions mapped to this file
1.1 cgd 485: */
1.32 mycroft 486: p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
1.1 cgd 487: }
488:
1.16 mycroft 489: int
1.41 mycroft 490: sys_mprotect(p, v, retval)
1.1 cgd 491: struct proc *p;
1.40 thorpej 492: void *v;
493: register_t *retval;
494: {
1.41 mycroft 495: struct sys_mprotect_args /* {
1.52 christos 496: syscallarg(void *) addr;
1.33 cgd 497: syscallarg(int) len;
498: syscallarg(int) prot;
1.40 thorpej 499: } */ *uap = v;
1.60 eeh 500: vaddr_t addr;
501: vsize_t size, pageoff;
1.1 cgd 502: register vm_prot_t prot;
503:
1.60 eeh 504: addr = (vaddr_t)SCARG(uap, addr);
505: size = (vsize_t)SCARG(uap, len);
1.35 gwr 506: prot = SCARG(uap, prot) & VM_PROT_ALL;
1.1 cgd 507: #ifdef DEBUG
508: if (mmapdebug & MDB_FOLLOW)
1.49 christos 509: printf("mprotect(%d): addr %lx len %lx prot %d\n", p->p_pid,
1.35 gwr 510: addr, size, prot);
1.1 cgd 511: #endif
1.35 gwr 512: /*
513: * Align the address to a page boundary,
514: * and adjust the size accordingly.
515: */
516: pageoff = (addr & PAGE_MASK);
517: addr -= pageoff;
518: size += pageoff;
1.60 eeh 519: size = (vsize_t) round_page(size);
1.35 gwr 520: if ((int)size < 0)
1.1 cgd 521: return(EINVAL);
1.28 cgd 522:
1.1 cgd 523: switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
524: FALSE)) {
1.51 chuck 525: case KERN_SUCCESS:
526: return (0);
527: case KERN_PROTECTION_FAILURE:
528: return (EACCES);
529: }
530: return (EINVAL);
531: }
532:
533:
534: int
535: sys_minherit(p, v, retval)
536: struct proc *p;
537: void *v;
538: register_t *retval;
539: {
540: struct sys_minherit_args /* {
1.52 christos 541: syscallarg(void *) addr;
1.51 chuck 542: syscallarg(int) len;
543: syscallarg(int) inherit;
544: } */ *uap = v;
1.60 eeh 545: vaddr_t addr;
546: vsize_t size, pageoff;
1.51 chuck 547: register vm_inherit_t inherit;
548:
1.60 eeh 549: addr = (vaddr_t)SCARG(uap, addr);
550: size = (vsize_t)SCARG(uap, len);
1.51 chuck 551: inherit = SCARG(uap, inherit);
552: #ifdef DEBUG
553: if (mmapdebug & MDB_FOLLOW)
554: printf("minherit(%d): addr 0x%lx len %lx inherit %d\n", p->p_pid,
555: addr, size, inherit);
556: #endif
557: /*
558: * Align the address to a page boundary,
559: * and adjust the size accordingly.
560: */
561: pageoff = (addr & PAGE_MASK);
562: addr -= pageoff;
563: size += pageoff;
1.60 eeh 564: size = (vsize_t) round_page(size);
1.51 chuck 565: if ((int)size < 0)
566: return(EINVAL);
567:
568: switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
569: inherit)) {
1.1 cgd 570: case KERN_SUCCESS:
571: return (0);
572: case KERN_PROTECTION_FAILURE:
573: return (EACCES);
574: }
1.28 cgd 575: return (EINVAL);
1.1 cgd 576: }
577:
578: /* ARGSUSED */
1.16 mycroft 579: int
1.41 mycroft 580: sys_madvise(p, v, retval)
1.1 cgd 581: struct proc *p;
1.40 thorpej 582: void *v;
583: register_t *retval;
584: {
1.44 christos 585: #if 0
1.41 mycroft 586: struct sys_madvise_args /* {
1.52 christos 587: syscallarg(void *) addr;
1.39 mycroft 588: syscallarg(size_t) len;
1.33 cgd 589: syscallarg(int) behav;
1.40 thorpej 590: } */ *uap = v;
1.44 christos 591: #endif
1.1 cgd 592:
1.62 mrg 593: /* Not yet implemented */
1.63 ! kleink 594: return (ENOSYS);
1.1 cgd 595: }
596:
597: /* ARGSUSED */
1.16 mycroft 598: int
1.41 mycroft 599: sys_mincore(p, v, retval)
1.1 cgd 600: struct proc *p;
1.40 thorpej 601: void *v;
602: register_t *retval;
603: {
1.44 christos 604: #if 0
1.41 mycroft 605: struct sys_mincore_args /* {
1.52 christos 606: syscallarg(void *) addr;
1.39 mycroft 607: syscallarg(size_t) len;
1.33 cgd 608: syscallarg(char *) vec;
1.40 thorpej 609: } */ *uap = v;
1.44 christos 610: #endif
1.1 cgd 611:
612: /* Not yet implemented */
1.63 ! kleink 613: return (ENOSYS);
1.1 cgd 614: }
615:
1.26 cgd 616: int
1.41 mycroft 617: sys_mlock(p, v, retval)
1.26 cgd 618: struct proc *p;
1.40 thorpej 619: void *v;
620: register_t *retval;
621: {
1.41 mycroft 622: struct sys_mlock_args /* {
1.58 kleink 623: syscallarg(const void *) addr;
1.33 cgd 624: syscallarg(size_t) len;
1.40 thorpej 625: } */ *uap = v;
1.60 eeh 626: vaddr_t addr;
627: vsize_t size, pageoff;
1.28 cgd 628: int error;
629: extern int vm_page_max_wired;
1.26 cgd 630:
1.60 eeh 631: addr = (vaddr_t)SCARG(uap, addr);
632: size = (vsize_t)SCARG(uap, len);
1.28 cgd 633: #ifdef DEBUG
634: if (mmapdebug & MDB_FOLLOW)
1.49 christos 635: printf("mlock(%d): addr %lx len %lx\n",
1.48 christos 636: p->p_pid, addr, size);
1.28 cgd 637: #endif
1.35 gwr 638: /*
639: * Align the address to a page boundary,
640: * and adjust the size accordingly.
641: */
642: pageoff = (addr & PAGE_MASK);
643: addr -= pageoff;
644: size += pageoff;
1.60 eeh 645: size = (vsize_t) round_page(size);
1.35 gwr 646:
647: /* Disallow wrap-around. */
648: if (addr + (int)size < addr)
1.28 cgd 649: return (EINVAL);
1.35 gwr 650:
1.28 cgd 651: if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
652: return (EAGAIN);
653: #ifdef pmap_wired_count
654: if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
655: p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
656: return (EAGAIN);
657: #else
1.44 christos 658: if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1.28 cgd 659: return (error);
660: #endif
661:
662: error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
663: return (error == KERN_SUCCESS ? 0 : ENOMEM);
1.26 cgd 664: }
665:
666: int
1.41 mycroft 667: sys_munlock(p, v, retval)
1.26 cgd 668: struct proc *p;
1.40 thorpej 669: void *v;
670: register_t *retval;
671: {
1.41 mycroft 672: struct sys_munlock_args /* {
1.58 kleink 673: syscallarg(const void *) addr;
1.33 cgd 674: syscallarg(size_t) len;
1.40 thorpej 675: } */ *uap = v;
1.60 eeh 676: vaddr_t addr;
677: vsize_t size, pageoff;
1.28 cgd 678: int error;
679:
1.60 eeh 680: addr = (vaddr_t)SCARG(uap, addr);
681: size = (vsize_t)SCARG(uap, len);
1.28 cgd 682: #ifdef DEBUG
683: if (mmapdebug & MDB_FOLLOW)
1.49 christos 684: printf("munlock(%d): addr %lx len %lx\n",
1.48 christos 685: p->p_pid, addr, size);
1.28 cgd 686: #endif
1.35 gwr 687: /*
688: * Align the address to a page boundary,
689: * and adjust the size accordingly.
690: */
691: pageoff = (addr & PAGE_MASK);
692: addr -= pageoff;
693: size += pageoff;
1.60 eeh 694: size = (vsize_t) round_page(size);
1.35 gwr 695:
696: /* Disallow wrap-around. */
697: if (addr + (int)size < addr)
1.28 cgd 698: return (EINVAL);
1.35 gwr 699:
1.28 cgd 700: #ifndef pmap_wired_count
1.44 christos 701: if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1.28 cgd 702: return (error);
703: #endif
1.26 cgd 704:
1.28 cgd 705: error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
706: return (error == KERN_SUCCESS ? 0 : ENOMEM);
1.26 cgd 707: }
708:
1.1 cgd 709: /*
710: * Internal version of mmap.
711: * Currently used by mmap, exec, and sys5 shared memory.
1.28 cgd 712: * Handle is either a vnode pointer or NULL for MAP_ANON.
1.35 gwr 713: * This (internal) interface requires the file offset to be
714: * page-aligned by the caller. (Also addr, if MAP_FIXED).
1.1 cgd 715: */
1.16 mycroft 716: int
1.3 cgd 717: vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
1.1 cgd 718: register vm_map_t map;
1.60 eeh 719: register vaddr_t *addr;
720: register vsize_t size;
1.28 cgd 721: vm_prot_t prot, maxprot;
1.1 cgd 722: register int flags;
723: caddr_t handle; /* XXX should be vp */
1.60 eeh 724: vaddr_t foff;
1.1 cgd 725: {
726: register vm_pager_t pager;
727: boolean_t fitit;
728: vm_object_t object;
1.28 cgd 729: struct vnode *vp = NULL;
1.1 cgd 730: int type;
731: int rv = KERN_SUCCESS;
732:
733: if (size == 0)
734: return (0);
1.28 cgd 735:
1.35 gwr 736: /* The file offset must be page aligned. */
737: if (foff & PAGE_MASK)
738: return (EINVAL);
739:
1.1 cgd 740: if ((flags & MAP_FIXED) == 0) {
1.35 gwr 741: /* The address is just a hint */
1.1 cgd 742: fitit = TRUE;
743: *addr = round_page(*addr);
744: } else {
1.35 gwr 745: /*
746: * Use the specified address exactly
747: * (but check alignment first).
748: */
1.1 cgd 749: fitit = FALSE;
1.35 gwr 750: if (*addr & PAGE_MASK)
751: return (EINVAL);
1.28 cgd 752: (void)vm_deallocate(map, *addr, size);
1.1 cgd 753: }
1.28 cgd 754:
1.1 cgd 755: /*
756: * Lookup/allocate pager. All except an unnamed anonymous lookup
757: * gain a reference to ensure continued existance of the object.
758: * (XXX the exception is to appease the pageout daemon)
759: */
1.27 mycroft 760: if (flags & MAP_ANON) {
1.1 cgd 761: type = PG_DFLT;
1.27 mycroft 762: foff = 0;
763: } else {
1.1 cgd 764: vp = (struct vnode *)handle;
765: if (vp->v_type == VCHR) {
766: type = PG_DEVICE;
1.34 cgd 767: handle = (caddr_t)(long)vp->v_rdev;
1.1 cgd 768: } else
769: type = PG_VNODE;
770: }
1.12 cgd 771: pager = vm_pager_allocate(type, handle, size, prot, foff);
1.1 cgd 772: if (pager == NULL)
773: return (type == PG_DEVICE ? EINVAL : ENOMEM);
774: /*
775: * Find object and release extra reference gained by lookup
776: */
777: object = vm_object_lookup(pager);
778: vm_object_deallocate(object);
1.28 cgd 779:
1.1 cgd 780: /*
781: * Anonymous memory.
782: */
1.20 cgd 783: if (flags & MAP_ANON) {
1.1 cgd 784: rv = vm_allocate_with_pager(map, addr, size, fitit,
1.28 cgd 785: pager, foff, TRUE);
1.1 cgd 786: if (rv != KERN_SUCCESS) {
787: if (handle == NULL)
788: vm_pager_deallocate(pager);
789: else
790: vm_object_deallocate(object);
791: goto out;
792: }
793: /*
794: * Don't cache anonymous objects.
795: * Loses the reference gained by vm_pager_allocate.
1.28 cgd 796: * Note that object will be NULL when handle == NULL,
797: * this is ok since vm_allocate_with_pager has made
798: * sure that these objects are uncached.
1.1 cgd 799: */
800: (void) pager_cache(object, FALSE);
801: #ifdef DEBUG
802: if (mmapdebug & MDB_MAPIT)
1.49 christos 803: printf("vm_mmap(%d): ANON *addr %lx size %lx pager %p\n",
1.48 christos 804: curproc->p_pid, *addr, size, pager);
1.1 cgd 805: #endif
806: }
807: /*
1.28 cgd 808: * Must be a mapped file.
1.1 cgd 809: * Distinguish between character special and regular files.
810: */
811: else if (vp->v_type == VCHR) {
812: rv = vm_allocate_with_pager(map, addr, size, fitit,
1.28 cgd 813: pager, foff, FALSE);
1.1 cgd 814: /*
815: * Uncache the object and lose the reference gained
816: * by vm_pager_allocate(). If the call to
817: * vm_allocate_with_pager() was sucessful, then we
818: * gained an additional reference ensuring the object
819: * will continue to exist. If the call failed then
820: * the deallocate call below will terminate the
821: * object which is fine.
822: */
823: (void) pager_cache(object, FALSE);
824: if (rv != KERN_SUCCESS)
825: goto out;
826: }
827: /*
1.59 thorpej 828: * A regular file or block special file
1.1 cgd 829: */
830: else {
831: #ifdef DEBUG
832: if (object == NULL)
1.49 christos 833: printf("vm_mmap: no object: vp %p, pager %p\n",
1.48 christos 834: vp, pager);
1.1 cgd 835: #endif
836: /*
837: * Map it directly.
838: * Allows modifications to go out to the vnode.
839: */
840: if (flags & MAP_SHARED) {
841: rv = vm_allocate_with_pager(map, addr, size,
842: fitit, pager,
1.28 cgd 843: foff, FALSE);
1.1 cgd 844: if (rv != KERN_SUCCESS) {
845: vm_object_deallocate(object);
846: goto out;
847: }
848: /*
849: * Don't cache the object. This is the easiest way
850: * of ensuring that data gets back to the filesystem
851: * because vnode_pager_deallocate() will fsync the
852: * vnode. pager_cache() will lose the extra ref.
853: */
854: if (prot & VM_PROT_WRITE)
855: pager_cache(object, FALSE);
856: else
857: vm_object_deallocate(object);
858: }
859: /*
860: * Copy-on-write of file. Two flavors.
861: * MAP_COPY is true COW, you essentially get a snapshot of
862: * the region at the time of mapping. MAP_PRIVATE means only
863: * that your changes are not reflected back to the object.
864: * Changes made by others will be seen.
865: */
866: else {
867: vm_map_t tmap;
1.60 eeh 868: vaddr_t off;
1.1 cgd 869:
870: /* locate and allocate the target address space */
1.43 pk 871: vm_map_lock(map);
1.36 pk 872: if (fitit) {
873: /*
1.43 pk 874: * Find space in the map at a location
875: * that is compatible with the object/offset
876: * we're going to attach there.
1.36 pk 877: */
878: again:
879: if (vm_map_findspace(map, *addr, size,
880: addr) == 1) {
881: rv = KERN_NO_SPACE;
882: } else {
1.46 gwr 883: #ifdef PMAP_PREFER
884: PMAP_PREFER(foff, addr);
885: #endif
1.36 pk 886: rv = vm_map_insert(map, NULL,
1.60 eeh 887: (vaddr_t)0,
1.43 pk 888: *addr, *addr+size);
889: /*
890: * vm_map_insert() may fail if
1.46 gwr 891: * PMAP_PREFER() has altered
1.43 pk 892: * the initial address.
893: * If so, we start again.
894: */
1.36 pk 895: if (rv == KERN_NO_SPACE)
896: goto again;
897: }
898: } else {
1.60 eeh 899: rv = vm_map_insert(map, NULL, (vaddr_t)0,
1.43 pk 900: *addr, *addr + size);
1.36 pk 901:
1.43 pk 902: #ifdef DEBUG
1.36 pk 903: /*
904: * Check against PMAP preferred address. If
905: * there's a mismatch, these pages should not
906: * be shared with others. <howto?>
907: */
1.43 pk 908: if (rv == KERN_SUCCESS &&
909: (mmapdebug & MDB_MAPIT)) {
1.60 eeh 910: paddr_t paddr = *addr;
1.46 gwr 911: #ifdef PMAP_PREFER
912: PMAP_PREFER(foff, &paddr);
913: #endif
1.36 pk 914: if (paddr != *addr)
1.49 christos 915: printf(
1.48 christos 916: "vm_mmap: pmap botch! "
917: "[foff %lx, addr %lx, paddr %lx]\n",
918: foff, *addr, paddr);
1.36 pk 919: }
1.43 pk 920: #endif
1.36 pk 921: }
1.43 pk 922: vm_map_unlock(map);
1.36 pk 923:
1.1 cgd 924: if (rv != KERN_SUCCESS) {
925: vm_object_deallocate(object);
926: goto out;
927: }
928: tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
929: VM_MIN_ADDRESS+size, TRUE);
930: off = VM_MIN_ADDRESS;
931: rv = vm_allocate_with_pager(tmap, &off, size,
1.43 pk 932: FALSE, pager,
1.28 cgd 933: foff, FALSE);
1.1 cgd 934: if (rv != KERN_SUCCESS) {
935: vm_object_deallocate(object);
936: vm_map_deallocate(tmap);
937: goto out;
938: }
939: /*
940: * (XXX)
941: * MAP_PRIVATE implies that we see changes made by
942: * others. To ensure that we need to guarentee that
943: * no copy object is created (otherwise original
944: * pages would be pushed to the copy object and we
945: * would never see changes made by others). We
946: * totally sleeze it right now by marking the object
947: * internal temporarily.
948: */
949: if ((flags & MAP_COPY) == 0)
1.15 cgd 950: object->flags |= OBJ_INTERNAL;
1.1 cgd 951: rv = vm_map_copy(map, tmap, *addr, size, off,
952: FALSE, FALSE);
1.15 cgd 953: object->flags &= ~OBJ_INTERNAL;
1.1 cgd 954: /*
955: * (XXX)
956: * My oh my, this only gets worse...
957: * Force creation of a shadow object so that
958: * vm_map_fork will do the right thing.
959: */
960: if ((flags & MAP_COPY) == 0) {
961: vm_map_t tmap;
962: vm_map_entry_t tentry;
963: vm_object_t tobject;
1.60 eeh 964: vaddr_t toffset;
1.1 cgd 965: vm_prot_t tprot;
966: boolean_t twired, tsu;
967:
968: tmap = map;
969: vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
970: &tentry, &tobject, &toffset,
971: &tprot, &twired, &tsu);
972: vm_map_lookup_done(tmap, tentry);
973: }
974: /*
975: * (XXX)
976: * Map copy code cannot detect sharing unless a
977: * sharing map is involved. So we cheat and write
978: * protect everything ourselves.
979: */
1.28 cgd 980: vm_object_pmap_copy(object, foff, foff + size);
1.1 cgd 981: vm_object_deallocate(object);
982: vm_map_deallocate(tmap);
983: if (rv != KERN_SUCCESS)
984: goto out;
985: }
986: #ifdef DEBUG
987: if (mmapdebug & MDB_MAPIT)
1.49 christos 988: printf("vm_mmap(%d): FILE *addr %lx size %lx pager %p\n",
1.48 christos 989: curproc->p_pid, *addr, size, pager);
1.1 cgd 990: #endif
991: }
992: /*
1.28 cgd 993: * Correct protection (default is VM_PROT_ALL).
994: * If maxprot is different than prot, we must set both explicitly.
1.1 cgd 995: */
1.28 cgd 996: rv = KERN_SUCCESS;
997: if (maxprot != VM_PROT_ALL)
1.17 mycroft 998: rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
1.28 cgd 999: if (rv == KERN_SUCCESS && prot != maxprot)
1.17 mycroft 1000: rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
1.28 cgd 1001: if (rv != KERN_SUCCESS) {
1002: (void) vm_deallocate(map, *addr, size);
1003: goto out;
1.1 cgd 1004: }
1005: /*
1006: * Shared memory is also shared with children.
1007: */
1008: if (flags & MAP_SHARED) {
1.28 cgd 1009: rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
1.1 cgd 1010: if (rv != KERN_SUCCESS) {
1011: (void) vm_deallocate(map, *addr, size);
1012: goto out;
1013: }
1014: }
1015: out:
1016: #ifdef DEBUG
1017: if (mmapdebug & MDB_MAPIT)
1.49 christos 1018: printf("vm_mmap: rv %d\n", rv);
1.1 cgd 1019: #endif
1020: switch (rv) {
1021: case KERN_SUCCESS:
1022: return (0);
1023: case KERN_INVALID_ADDRESS:
1024: case KERN_NO_SPACE:
1025: return (ENOMEM);
1026: case KERN_PROTECTION_FAILURE:
1027: return (EACCES);
1028: default:
1029: return (EINVAL);
1030: }
1031: }
CVSweb <webmaster@jp.NetBSD.org>