Annotation of src/sys/uvm/uvm_mmap.c, Revision 1.35.2.1
1.35.2.1! bouyer 1: /* $NetBSD: uvm_mmap.c,v 1.44 2000/09/13 15:00:25 thorpej Exp $ */
1.1 mrg 2:
3: /*
4: * Copyright (c) 1997 Charles D. Cranor and Washington University.
5: * Copyright (c) 1991, 1993 The Regents of the University of California.
6: * Copyright (c) 1988 University of Utah.
7: *
8: * All rights reserved.
9: *
10: * This code is derived from software contributed to Berkeley by
11: * the Systems Programming Group of the University of Utah Computer
12: * Science Department.
13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: * 3. All advertising materials mentioning features or use of this software
23: * must display the following acknowledgement:
24: * This product includes software developed by the Charles D. Cranor,
25: * Washington University, University of California, Berkeley and
26: * its contributors.
27: * 4. Neither the name of the University nor the names of its contributors
28: * may be used to endorse or promote products derived from this software
29: * without specific prior written permission.
30: *
31: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41: * SUCH DAMAGE.
42: *
43: * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
44: * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
1.3 mrg 45: * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
1.1 mrg 46: */
47:
48: /*
49: * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
50: * function.
51: */
52: #include <sys/param.h>
53: #include <sys/systm.h>
54: #include <sys/file.h>
55: #include <sys/filedesc.h>
56: #include <sys/resourcevar.h>
57: #include <sys/mman.h>
58: #include <sys/mount.h>
59: #include <sys/proc.h>
60: #include <sys/malloc.h>
61: #include <sys/vnode.h>
62: #include <sys/conf.h>
1.9 mrg 63: #include <sys/stat.h>
1.1 mrg 64:
65: #include <miscfs/specfs/specdev.h>
66:
67: #include <sys/syscallargs.h>
68:
69: #include <uvm/uvm.h>
70: #include <uvm/uvm_device.h>
71: #include <uvm/uvm_vnode.h>
72:
73:
74: /*
75: * unimplemented VM system calls:
76: */
77:
78: /*
79: * sys_sbrk: sbrk system call.
80: */
81:
82: /* ARGSUSED */
1.6 mrg 83: int
84: sys_sbrk(p, v, retval)
85: struct proc *p;
86: void *v;
87: register_t *retval;
1.1 mrg 88: {
89: #if 0
1.6 mrg 90: struct sys_sbrk_args /* {
1.33 kleink 91: syscallarg(intptr_t) incr;
1.20 mrg 92: } */ *uap = v;
1.1 mrg 93: #endif
1.6 mrg 94:
1.17 kleink 95: return (ENOSYS);
1.1 mrg 96: }
97:
98: /*
99: * sys_sstk: sstk system call.
100: */
101:
102: /* ARGSUSED */
1.6 mrg 103: int
104: sys_sstk(p, v, retval)
105: struct proc *p;
106: void *v;
107: register_t *retval;
1.1 mrg 108: {
109: #if 0
1.6 mrg 110: struct sys_sstk_args /* {
1.20 mrg 111: syscallarg(int) incr;
112: } */ *uap = v;
1.1 mrg 113: #endif
1.6 mrg 114:
1.17 kleink 115: return (ENOSYS);
1.1 mrg 116: }
117:
118: /*
119: * sys_mincore: determine if pages are in core or not.
120: */
121:
122: /* ARGSUSED */
1.6 mrg 123: int
124: sys_mincore(p, v, retval)
125: struct proc *p;
126: void *v;
127: register_t *retval;
1.1 mrg 128: {
1.6 mrg 129: struct sys_mincore_args /* {
1.22 thorpej 130: syscallarg(void *) addr;
1.20 mrg 131: syscallarg(size_t) len;
132: syscallarg(char *) vec;
133: } */ *uap = v;
1.22 thorpej 134: vm_page_t m;
135: char *vec, pgi;
136: struct uvm_object *uobj;
137: struct vm_amap *amap;
138: struct vm_anon *anon;
139: vm_map_entry_t entry;
140: vaddr_t start, end, lim;
141: vm_map_t map;
142: vsize_t len;
143: int error = 0, npgs;
144:
145: map = &p->p_vmspace->vm_map;
146:
147: start = (vaddr_t)SCARG(uap, addr);
148: len = SCARG(uap, len);
149: vec = SCARG(uap, vec);
150:
151: if (start & PAGE_MASK)
152: return (EINVAL);
153: len = round_page(len);
154: end = start + len;
155: if (end <= start)
156: return (EINVAL);
157:
158: npgs = len >> PAGE_SHIFT;
159:
160: if (uvm_useracc(vec, npgs, B_WRITE) == FALSE)
161: return (EFAULT);
162:
163: /*
164: * Lock down vec, so our returned status isn't outdated by
165: * storing the status byte for a page.
166: */
167: uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
168:
169: vm_map_lock_read(map);
170:
171: if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
172: error = ENOMEM;
173: goto out;
174: }
175:
176: for (/* nothing */;
177: entry != &map->header && entry->start < end;
178: entry = entry->next) {
179: #ifdef DIAGNOSTIC
180: if (UVM_ET_ISSUBMAP(entry))
181: panic("mincore: user map has submap");
182: if (start < entry->start)
183: panic("mincore: hole");
1.1 mrg 184: #endif
1.22 thorpej 185: /* Make sure there are no holes. */
186: if (entry->end < end &&
187: (entry->next == &map->header ||
188: entry->next->start > entry->end)) {
189: error = ENOMEM;
190: goto out;
191: }
1.6 mrg 192:
1.22 thorpej 193: lim = end < entry->end ? end : entry->end;
194:
195: /*
1.31 thorpej 196: * Special case for objects with no "real" pages. Those
197: * are always considered resident (mapped devices).
1.22 thorpej 198: */
199: if (UVM_ET_ISOBJ(entry)) {
200: #ifdef DIAGNOSTIC
201: if (UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj))
202: panic("mincore: user map has kernel object");
203: #endif
1.31 thorpej 204: if (entry->object.uvm_obj->pgops->pgo_releasepg
205: == NULL) {
1.22 thorpej 206: for (/* nothing */; start < lim;
207: start += PAGE_SIZE, vec++)
208: subyte(vec, 1);
209: continue;
210: }
211: }
212:
1.32 thorpej 213: amap = entry->aref.ar_amap; /* top layer */
214: uobj = entry->object.uvm_obj; /* bottom layer */
1.22 thorpej 215:
216: if (amap != NULL)
217: amap_lock(amap);
218: if (uobj != NULL)
219: simple_lock(&uobj->vmobjlock);
220:
221: for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
222: pgi = 0;
223: if (amap != NULL) {
224: /* Check the top layer first. */
225: anon = amap_lookup(&entry->aref,
226: start - entry->start);
227: /* Don't need to lock anon here. */
228: if (anon != NULL && anon->u.an_page != NULL) {
229: /*
230: * Anon has the page for this entry
231: * offset.
232: */
233: pgi = 1;
234: }
235: }
236:
237: if (uobj != NULL && pgi == 0) {
238: /* Check the bottom layer. */
239: m = uvm_pagelookup(uobj,
240: entry->offset + (start - entry->start));
241: if (m != NULL) {
242: /*
243: * Object has the page for this entry
244: * offset.
245: */
246: pgi = 1;
247: }
248: }
249:
250: (void) subyte(vec, pgi);
251: }
252:
253: if (uobj != NULL)
1.27 thorpej 254: simple_unlock(&uobj->vmobjlock);
1.22 thorpej 255: if (amap != NULL)
256: amap_unlock(amap);
257: }
258:
259: out:
260: vm_map_unlock_read(map);
261: uvm_vsunlock(p, SCARG(uap, vec), npgs);
262: return (error);
1.1 mrg 263: }
264:
265: #if 0
266: /*
267: * munmapfd: unmap file descriptor
268: *
269: * XXX: is this acutally a useful function? could it be useful?
270: */
271:
1.6 mrg 272: void
273: munmapfd(p, fd)
274: struct proc *p;
275: int fd;
276: {
1.1 mrg 277:
1.6 mrg 278: /*
279: * XXX should vm_deallocate any regions mapped to this file
280: */
281: p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
1.1 mrg 282: }
283: #endif
284:
285: /*
286: * sys_mmap: mmap system call.
287: *
288: * => file offest and address may not be page aligned
289: * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
290: * - if address isn't page aligned the mapping starts at trunc_page(addr)
291: * and the return value is adjusted up by the page offset.
292: */
293:
1.6 mrg 294: int
295: sys_mmap(p, v, retval)
296: struct proc *p;
297: void *v;
298: register_t *retval;
299: {
1.35.2.1! bouyer 300: struct sys_mmap_args /* {
1.6 mrg 301: syscallarg(caddr_t) addr;
302: syscallarg(size_t) len;
303: syscallarg(int) prot;
304: syscallarg(int) flags;
305: syscallarg(int) fd;
306: syscallarg(long) pad;
307: syscallarg(off_t) pos;
308: } */ *uap = v;
1.12 eeh 309: vaddr_t addr;
1.9 mrg 310: struct vattr va;
1.6 mrg 311: off_t pos;
1.12 eeh 312: vsize_t size, pageoff;
1.6 mrg 313: vm_prot_t prot, maxprot;
314: int flags, fd;
1.12 eeh 315: vaddr_t vm_min_address = VM_MIN_ADDRESS;
1.35.2.1! bouyer 316: struct filedesc *fdp = p->p_fd;
! 317: struct file *fp;
1.6 mrg 318: struct vnode *vp;
319: caddr_t handle;
320: int error;
321:
322: /*
323: * first, extract syscall args from the uap.
324: */
325:
1.12 eeh 326: addr = (vaddr_t) SCARG(uap, addr);
327: size = (vsize_t) SCARG(uap, len);
1.6 mrg 328: prot = SCARG(uap, prot) & VM_PROT_ALL;
329: flags = SCARG(uap, flags);
330: fd = SCARG(uap, fd);
331: pos = SCARG(uap, pos);
332:
333: /*
1.24 thorpej 334: * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
335: * validate the flags.
336: */
337: if (flags & MAP_COPY)
338: flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
339: if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
340: return (EINVAL);
341:
342: /*
1.6 mrg 343: * align file position and save offset. adjust size.
344: */
345:
346: pageoff = (pos & PAGE_MASK);
347: pos -= pageoff;
348: size += pageoff; /* add offset */
1.12 eeh 349: size = (vsize_t) round_page(size); /* round up */
1.6 mrg 350: if ((ssize_t) size < 0)
351: return (EINVAL); /* don't allow wrap */
352:
353: /*
354: * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
355: */
356:
357: if (flags & MAP_FIXED) {
358:
359: /* ensure address and file offset are aligned properly */
360: addr -= pageoff;
361: if (addr & PAGE_MASK)
362: return (EINVAL);
363:
364: if (VM_MAXUSER_ADDRESS > 0 &&
365: (addr + size) > VM_MAXUSER_ADDRESS)
366: return (EINVAL);
367: if (vm_min_address > 0 && addr < vm_min_address)
368: return (EINVAL);
369: if (addr > addr + size)
370: return (EINVAL); /* no wrapping! */
371:
372: } else {
373:
374: /*
375: * not fixed: make sure we skip over the largest possible heap.
376: * we will refine our guess later (e.g. to account for VAC, etc)
377: */
1.35.2.1! bouyer 378: if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr+MAXDSIZ))
! 379: addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
! 380: MAXDSIZ);
1.6 mrg 381: }
382:
383: /*
384: * check for file mappings (i.e. not anonymous) and verify file.
385: */
386:
387: if ((flags & MAP_ANON) == 0) {
388:
389: if (fd < 0 || fd >= fdp->fd_nfiles)
390: return(EBADF); /* failed range check? */
391: fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
392: if (fp == NULL)
393: return(EBADF);
394:
395: if (fp->f_type != DTYPE_VNODE)
1.7 kleink 396: return (ENODEV); /* only mmap vnodes! */
1.6 mrg 397: vp = (struct vnode *)fp->f_data; /* convert to vnode */
398:
1.11 thorpej 399: if (vp->v_type != VREG && vp->v_type != VCHR &&
400: vp->v_type != VBLK)
401: return (ENODEV); /* only REG/CHR/BLK support mmap */
1.6 mrg 402:
1.35.2.1! bouyer 403: if (vp->v_type == VREG && (pos + size) < pos)
! 404: return (EOVERFLOW); /* no offset wrapping */
! 405:
1.6 mrg 406: /* special case: catch SunOS style /dev/zero */
407: if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
408: flags |= MAP_ANON;
409: goto is_anon;
410: }
411:
412: /*
413: * Old programs may not select a specific sharing type, so
414: * default to an appropriate one.
415: *
416: * XXX: how does MAP_ANON fit in the picture?
417: */
1.24 thorpej 418: if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
1.8 tv 419: #if defined(DEBUG)
1.6 mrg 420: printf("WARNING: defaulted mmap() share type to "
421: "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
422: "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
423: p->p_comm);
1.1 mrg 424: #endif
1.6 mrg 425: if (vp->v_type == VCHR)
426: flags |= MAP_SHARED; /* for a device */
427: else
428: flags |= MAP_PRIVATE; /* for a file */
429: }
430:
431: /*
432: * MAP_PRIVATE device mappings don't make sense (and aren't
433: * supported anyway). However, some programs rely on this,
434: * so just change it to MAP_SHARED.
435: */
436: if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
1.1 mrg 437: #if defined(DIAGNOSTIC)
1.6 mrg 438: printf("WARNING: converted MAP_PRIVATE device mapping "
439: "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
440: p->p_comm);
1.1 mrg 441: #endif
1.6 mrg 442: flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
443: }
1.1 mrg 444:
1.6 mrg 445: /*
446: * now check protection
447: */
448:
449: maxprot = VM_PROT_EXECUTE;
450:
451: /* check read access */
452: if (fp->f_flag & FREAD)
453: maxprot |= VM_PROT_READ;
454: else if (prot & PROT_READ)
455: return (EACCES);
456:
1.9 mrg 457: /* check write access, shared case first */
1.6 mrg 458: if (flags & MAP_SHARED) {
1.9 mrg 459: /*
460: * if the file is writable, only add PROT_WRITE to
461: * maxprot if the file is not immutable, append-only.
462: * otherwise, if we have asked for PROT_WRITE, return
463: * EPERM.
464: */
465: if (fp->f_flag & FWRITE) {
466: if ((error =
467: VOP_GETATTR(vp, &va, p->p_ucred, p)))
468: return (error);
469: if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
470: maxprot |= VM_PROT_WRITE;
471: else if (prot & PROT_WRITE)
472: return (EPERM);
473: }
1.6 mrg 474: else if (prot & PROT_WRITE)
475: return (EACCES);
476: } else {
477: /* MAP_PRIVATE mappings can always write to */
478: maxprot |= VM_PROT_WRITE;
479: }
480:
481: /*
482: * set handle to vnode
483: */
1.1 mrg 484:
1.6 mrg 485: handle = (caddr_t)vp;
1.1 mrg 486:
1.6 mrg 487: } else { /* MAP_ANON case */
1.24 thorpej 488: /*
489: * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
490: */
1.6 mrg 491: if (fd != -1)
492: return (EINVAL);
1.1 mrg 493:
1.24 thorpej 494: is_anon: /* label for SunOS style /dev/zero */
1.6 mrg 495: handle = NULL;
496: maxprot = VM_PROT_ALL;
497: pos = 0;
1.28 cgd 498: }
499:
500: /*
501: * XXX (in)sanity check. We don't do proper datasize checking
502: * XXX for anonymous (or private writable) mmap(). However,
503: * XXX know that if we're trying to allocate more than the amount
504: * XXX remaining under our current data size limit, _that_ should
505: * XXX be disallowed.
506: */
507: if ((flags & MAP_ANON) != 0 ||
508: ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
509: if (size >
510: (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dsize))) {
511: return (ENOMEM);
512: }
1.6 mrg 513: }
514:
515: /*
516: * now let kernel internal function uvm_mmap do the work.
517: */
518:
519: error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
1.25 thorpej 520: flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1.6 mrg 521:
522: if (error == 0)
523: /* remember to add offset */
524: *retval = (register_t)(addr + pageoff);
1.1 mrg 525:
1.6 mrg 526: return (error);
1.1 mrg 527: }
528:
529: /*
530: * sys___msync13: the msync system call (a front-end for flush)
531: */
532:
1.6 mrg 533: int
534: sys___msync13(p, v, retval)
535: struct proc *p;
536: void *v;
537: register_t *retval;
538: {
539: struct sys___msync13_args /* {
540: syscallarg(caddr_t) addr;
541: syscallarg(size_t) len;
542: syscallarg(int) flags;
543: } */ *uap = v;
1.12 eeh 544: vaddr_t addr;
545: vsize_t size, pageoff;
1.6 mrg 546: vm_map_t map;
547: int rv, flags, uvmflags;
548:
549: /*
550: * extract syscall args from the uap
551: */
552:
1.12 eeh 553: addr = (vaddr_t)SCARG(uap, addr);
554: size = (vsize_t)SCARG(uap, len);
1.6 mrg 555: flags = SCARG(uap, flags);
556:
557: /* sanity check flags */
558: if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
559: (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
560: (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
1.1 mrg 561: return (EINVAL);
1.6 mrg 562: if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
1.1 mrg 563: flags |= MS_SYNC;
564:
1.6 mrg 565: /*
566: * align the address to a page boundary, and adjust the size accordingly
567: */
568:
569: pageoff = (addr & PAGE_MASK);
570: addr -= pageoff;
571: size += pageoff;
1.12 eeh 572: size = (vsize_t) round_page(size);
1.6 mrg 573:
574: /* disallow wrap-around. */
575: if (addr + size < addr)
576: return (EINVAL);
577:
578: /*
579: * get map
580: */
581:
582: map = &p->p_vmspace->vm_map;
583:
584: /*
585: * XXXCDC: do we really need this semantic?
586: *
587: * XXX Gak! If size is zero we are supposed to sync "all modified
588: * pages with the region containing addr". Unfortunately, we
589: * don't really keep track of individual mmaps so we approximate
590: * by flushing the range of the map entry containing addr.
591: * This can be incorrect if the region splits or is coalesced
592: * with a neighbor.
593: */
594: if (size == 0) {
595: vm_map_entry_t entry;
596:
597: vm_map_lock_read(map);
598: rv = uvm_map_lookup_entry(map, addr, &entry);
599: if (rv == TRUE) {
600: addr = entry->start;
601: size = entry->end - entry->start;
602: }
603: vm_map_unlock_read(map);
604: if (rv == FALSE)
605: return (EINVAL);
606: }
607:
608: /*
609: * translate MS_ flags into PGO_ flags
610: */
1.34 thorpej 611: uvmflags = PGO_CLEANIT;
612: if (flags & MS_INVALIDATE)
613: uvmflags |= PGO_FREE;
1.6 mrg 614: if (flags & MS_SYNC)
615: uvmflags |= PGO_SYNCIO;
616: else
617: uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
618:
619: /*
620: * doit!
621: */
622: rv = uvm_map_clean(map, addr, addr+size, uvmflags);
623:
624: /*
625: * and return...
626: */
627: switch (rv) {
628: case KERN_SUCCESS:
629: return(0);
630: case KERN_INVALID_ADDRESS:
631: return (ENOMEM);
632: case KERN_FAILURE:
633: return (EIO);
634: case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
635: return (EBUSY);
636: default:
637: return (EINVAL);
638: }
639: /*NOTREACHED*/
1.1 mrg 640: }
641:
642: /*
643: * sys_munmap: unmap a users memory
644: */
645:
1.6 mrg 646: int
647: sys_munmap(p, v, retval)
1.35.2.1! bouyer 648: struct proc *p;
1.6 mrg 649: void *v;
650: register_t *retval;
651: {
1.35.2.1! bouyer 652: struct sys_munmap_args /* {
1.6 mrg 653: syscallarg(caddr_t) addr;
654: syscallarg(size_t) len;
655: } */ *uap = v;
1.12 eeh 656: vaddr_t addr;
657: vsize_t size, pageoff;
1.6 mrg 658: vm_map_t map;
1.12 eeh 659: vaddr_t vm_min_address = VM_MIN_ADDRESS;
1.6 mrg 660: struct vm_map_entry *dead_entries;
661:
662: /*
663: * get syscall args...
664: */
665:
1.12 eeh 666: addr = (vaddr_t) SCARG(uap, addr);
667: size = (vsize_t) SCARG(uap, len);
1.6 mrg 668:
669: /*
670: * align the address to a page boundary, and adjust the size accordingly
671: */
672:
673: pageoff = (addr & PAGE_MASK);
674: addr -= pageoff;
675: size += pageoff;
1.12 eeh 676: size = (vsize_t) round_page(size);
1.6 mrg 677:
678: if ((int)size < 0)
679: return (EINVAL);
680: if (size == 0)
681: return (0);
682:
683: /*
684: * Check for illegal addresses. Watch out for address wrap...
685: * Note that VM_*_ADDRESS are not constants due to casts (argh).
686: */
687: if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
688: return (EINVAL);
689: if (vm_min_address > 0 && addr < vm_min_address)
690: return (EINVAL);
691: if (addr > addr + size)
692: return (EINVAL);
693: map = &p->p_vmspace->vm_map;
694:
695:
696: vm_map_lock(map); /* lock map so we can checkprot */
697:
698: /*
699: * interesting system call semantic: make sure entire range is
700: * allocated before allowing an unmap.
701: */
702:
703: if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
704: vm_map_unlock(map);
705: return (EINVAL);
706: }
707:
708: /*
709: * doit!
710: */
1.15 chuck 711: (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
1.1 mrg 712:
1.6 mrg 713: vm_map_unlock(map); /* and unlock */
1.1 mrg 714:
1.6 mrg 715: if (dead_entries != NULL)
716: uvm_unmap_detach(dead_entries, 0);
1.1 mrg 717:
1.6 mrg 718: return (0);
1.1 mrg 719: }
720:
721: /*
722: * sys_mprotect: the mprotect system call
723: */
724:
1.6 mrg 725: int
726: sys_mprotect(p, v, retval)
727: struct proc *p;
728: void *v;
729: register_t *retval;
730: {
731: struct sys_mprotect_args /* {
732: syscallarg(caddr_t) addr;
733: syscallarg(int) len;
734: syscallarg(int) prot;
735: } */ *uap = v;
1.12 eeh 736: vaddr_t addr;
737: vsize_t size, pageoff;
1.6 mrg 738: vm_prot_t prot;
739: int rv;
740:
741: /*
742: * extract syscall args from uap
743: */
744:
1.12 eeh 745: addr = (vaddr_t)SCARG(uap, addr);
746: size = (vsize_t)SCARG(uap, len);
1.6 mrg 747: prot = SCARG(uap, prot) & VM_PROT_ALL;
748:
749: /*
750: * align the address to a page boundary, and adjust the size accordingly
751: */
752: pageoff = (addr & PAGE_MASK);
753: addr -= pageoff;
754: size += pageoff;
1.12 eeh 755: size = (vsize_t) round_page(size);
1.6 mrg 756: if ((int)size < 0)
757: return (EINVAL);
758:
759: /*
760: * doit
761: */
1.1 mrg 762:
1.6 mrg 763: rv = uvm_map_protect(&p->p_vmspace->vm_map,
1.1 mrg 764: addr, addr+size, prot, FALSE);
765:
1.6 mrg 766: if (rv == KERN_SUCCESS)
767: return (0);
768: if (rv == KERN_PROTECTION_FAILURE)
769: return (EACCES);
770: return (EINVAL);
1.1 mrg 771: }
772:
773: /*
774: * sys_minherit: the minherit system call
775: */
776:
1.6 mrg 777: int
778: sys_minherit(p, v, retval)
779: struct proc *p;
780: void *v;
781: register_t *retval;
782: {
783: struct sys_minherit_args /* {
784: syscallarg(caddr_t) addr;
785: syscallarg(int) len;
786: syscallarg(int) inherit;
787: } */ *uap = v;
1.12 eeh 788: vaddr_t addr;
789: vsize_t size, pageoff;
1.35.2.1! bouyer 790: vm_inherit_t inherit;
1.6 mrg 791:
1.12 eeh 792: addr = (vaddr_t)SCARG(uap, addr);
793: size = (vsize_t)SCARG(uap, len);
1.6 mrg 794: inherit = SCARG(uap, inherit);
795: /*
796: * align the address to a page boundary, and adjust the size accordingly
797: */
798:
799: pageoff = (addr & PAGE_MASK);
800: addr -= pageoff;
801: size += pageoff;
1.12 eeh 802: size = (vsize_t) round_page(size);
1.6 mrg 803:
804: if ((int)size < 0)
805: return (EINVAL);
806:
807: switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
1.1 mrg 808: inherit)) {
1.21 mrg 809: case KERN_SUCCESS:
810: return (0);
811: case KERN_PROTECTION_FAILURE:
812: return (EACCES);
813: }
814: return (EINVAL);
815: }
816:
817: /*
818: * sys_madvise: give advice about memory usage.
819: */
820:
821: /* ARGSUSED */
822: int
823: sys_madvise(p, v, retval)
824: struct proc *p;
825: void *v;
826: register_t *retval;
827: {
828: struct sys_madvise_args /* {
829: syscallarg(caddr_t) addr;
830: syscallarg(size_t) len;
831: syscallarg(int) behav;
832: } */ *uap = v;
833: vaddr_t addr;
834: vsize_t size, pageoff;
1.29 thorpej 835: int advice, rv;;
1.21 mrg 836:
837: addr = (vaddr_t)SCARG(uap, addr);
838: size = (vsize_t)SCARG(uap, len);
839: advice = SCARG(uap, behav);
840:
841: /*
842: * align the address to a page boundary, and adjust the size accordingly
843: */
844: pageoff = (addr & PAGE_MASK);
845: addr -= pageoff;
846: size += pageoff;
847: size = (vsize_t) round_page(size);
848:
1.29 thorpej 849: if ((ssize_t)size <= 0)
850: return (EINVAL);
851:
852: switch (advice) {
853: case MADV_NORMAL:
854: case MADV_RANDOM:
855: case MADV_SEQUENTIAL:
856: rv = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
857: advice);
858: break;
859:
860: case MADV_WILLNEED:
861: /*
862: * Activate all these pages, pre-faulting them in if
863: * necessary.
864: */
865: /*
866: * XXX IMPLEMENT ME.
867: * Should invent a "weak" mode for uvm_fault()
868: * which would only do the PGO_LOCKED pgo_get().
869: */
870: return (0);
871:
872: case MADV_DONTNEED:
873: /*
874: * Deactivate all these pages. We don't need them
875: * any more. We don't, however, toss the data in
876: * the pages.
877: */
878: rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
879: PGO_DEACTIVATE);
880: break;
881:
882: case MADV_FREE:
883: /*
884: * These pages contain no valid data, and may be
885: * grbage-collected. Toss all resources, including
1.30 thorpej 886: * any swap space in use.
1.29 thorpej 887: */
888: rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
889: PGO_FREE);
890: break;
891:
892: case MADV_SPACEAVAIL:
893: /*
894: * XXXMRG What is this? I think it's:
895: *
896: * Ensure that we have allocated backing-store
897: * for these pages.
898: *
899: * This is going to require changes to the page daemon,
900: * as it will free swap space allocated to pages in core.
901: * There's also what to do for device/file/anonymous memory.
902: */
903: return (EINVAL);
904:
905: default:
1.21 mrg 906: return (EINVAL);
1.29 thorpej 907: }
908:
909: switch (rv) {
1.6 mrg 910: case KERN_SUCCESS:
911: return (0);
1.29 thorpej 912: case KERN_NO_SPACE:
913: return (EAGAIN);
914: case KERN_INVALID_ADDRESS:
915: return (ENOMEM);
916: case KERN_FAILURE:
917: return (EIO);
1.6 mrg 918: }
1.29 thorpej 919:
1.6 mrg 920: return (EINVAL);
1.1 mrg 921: }
922:
923: /*
924: * sys_mlock: memory lock
925: */
926:
1.6 mrg 927: int
928: sys_mlock(p, v, retval)
929: struct proc *p;
930: void *v;
931: register_t *retval;
932: {
933: struct sys_mlock_args /* {
1.10 kleink 934: syscallarg(const void *) addr;
1.6 mrg 935: syscallarg(size_t) len;
936: } */ *uap = v;
1.12 eeh 937: vaddr_t addr;
938: vsize_t size, pageoff;
1.6 mrg 939: int error;
940:
941: /*
942: * extract syscall args from uap
943: */
1.12 eeh 944: addr = (vaddr_t)SCARG(uap, addr);
945: size = (vsize_t)SCARG(uap, len);
1.6 mrg 946:
947: /*
948: * align the address to a page boundary and adjust the size accordingly
949: */
950: pageoff = (addr & PAGE_MASK);
951: addr -= pageoff;
952: size += pageoff;
1.12 eeh 953: size = (vsize_t) round_page(size);
1.6 mrg 954:
955: /* disallow wrap-around. */
956: if (addr + (int)size < addr)
957: return (EINVAL);
1.1 mrg 958:
1.6 mrg 959: if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
960: return (EAGAIN);
1.1 mrg 961:
962: #ifdef pmap_wired_count
1.6 mrg 963: if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
964: p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
965: return (EAGAIN);
1.1 mrg 966: #else
1.6 mrg 967: if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
968: return (error);
1.1 mrg 969: #endif
970:
1.25 thorpej 971: error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
1.35 thorpej 972: 0);
1.6 mrg 973: return (error == KERN_SUCCESS ? 0 : ENOMEM);
1.1 mrg 974: }
975:
976: /*
977: * sys_munlock: unlock wired pages
978: */
979:
1.6 mrg 980: int
981: sys_munlock(p, v, retval)
982: struct proc *p;
983: void *v;
984: register_t *retval;
985: {
986: struct sys_munlock_args /* {
1.10 kleink 987: syscallarg(const void *) addr;
1.6 mrg 988: syscallarg(size_t) len;
989: } */ *uap = v;
1.12 eeh 990: vaddr_t addr;
991: vsize_t size, pageoff;
1.6 mrg 992: int error;
993:
994: /*
995: * extract syscall args from uap
996: */
997:
1.12 eeh 998: addr = (vaddr_t)SCARG(uap, addr);
999: size = (vsize_t)SCARG(uap, len);
1.6 mrg 1000:
1001: /*
1002: * align the address to a page boundary, and adjust the size accordingly
1003: */
1004: pageoff = (addr & PAGE_MASK);
1005: addr -= pageoff;
1006: size += pageoff;
1.12 eeh 1007: size = (vsize_t) round_page(size);
1.6 mrg 1008:
1009: /* disallow wrap-around. */
1010: if (addr + (int)size < addr)
1011: return (EINVAL);
1.1 mrg 1012:
1013: #ifndef pmap_wired_count
1.6 mrg 1014: if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1015: return (error);
1.1 mrg 1016: #endif
1017:
1.25 thorpej 1018: error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
1.35 thorpej 1019: 0);
1.6 mrg 1020: return (error == KERN_SUCCESS ? 0 : ENOMEM);
1.22 thorpej 1021: }
1022:
1023: /*
1024: * sys_mlockall: lock all pages mapped into an address space.
1025: */
1026:
1027: int
1028: sys_mlockall(p, v, retval)
1029: struct proc *p;
1030: void *v;
1031: register_t *retval;
1032: {
1033: struct sys_mlockall_args /* {
1034: syscallarg(int) flags;
1035: } */ *uap = v;
1036: int error, flags;
1037:
1038: flags = SCARG(uap, flags);
1039:
1040: if (flags == 0 ||
1041: (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
1042: return (EINVAL);
1043:
1.25 thorpej 1044: #ifndef pmap_wired_count
1.22 thorpej 1045: if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1046: return (error);
1047: #endif
1048:
1.25 thorpej 1049: error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
1050: p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1.22 thorpej 1051: switch (error) {
1052: case KERN_SUCCESS:
1053: error = 0;
1054: break;
1055:
1056: case KERN_NO_SPACE: /* XXX overloaded */
1057: error = ENOMEM;
1058: break;
1059:
1060: default:
1061: /*
1062: * "Some or all of the memory could not be locked when
1063: * the call was made."
1064: */
1065: error = EAGAIN;
1066: }
1067:
1068: return (error);
1069: }
1070:
1071: /*
1072: * sys_munlockall: unlock all pages mapped into an address space.
1073: */
1074:
1075: int
1076: sys_munlockall(p, v, retval)
1077: struct proc *p;
1078: void *v;
1079: register_t *retval;
1080: {
1081:
1082: (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
1083: return (0);
1.1 mrg 1084: }
1085:
1086: /*
1087: * uvm_mmap: internal version of mmap
1088: *
1089: * - used by sys_mmap, exec, and sysv shm
1090: * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
1091: * sysv shm uses "named anonymous memory")
1092: * - caller must page-align the file offset
1093: */
1094:
1.6 mrg 1095: int
1.25 thorpej 1096: uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
1.6 mrg 1097: vm_map_t map;
1.12 eeh 1098: vaddr_t *addr;
1099: vsize_t size;
1.6 mrg 1100: vm_prot_t prot, maxprot;
1101: int flags;
1102: caddr_t handle; /* XXX: VNODE? */
1.35.2.1! bouyer 1103: voff_t foff;
1.25 thorpej 1104: vsize_t locklimit;
1.6 mrg 1105: {
1106: struct uvm_object *uobj;
1107: struct vnode *vp;
1108: int retval;
1109: int advice = UVM_ADV_NORMAL;
1110: uvm_flag_t uvmflag = 0;
1111:
1112: /*
1113: * check params
1114: */
1115:
1116: if (size == 0)
1117: return(0);
1118: if (foff & PAGE_MASK)
1119: return(EINVAL);
1120: if ((prot & maxprot) != prot)
1121: return(EINVAL);
1122:
1123: /*
1124: * for non-fixed mappings, round off the suggested address.
1125: * for fixed mappings, check alignment and zap old mappings.
1126: */
1127:
1128: if ((flags & MAP_FIXED) == 0) {
1129: *addr = round_page(*addr); /* round */
1130: } else {
1131:
1132: if (*addr & PAGE_MASK)
1133: return(EINVAL);
1134: uvmflag |= UVM_FLAG_FIXED;
1.15 chuck 1135: (void) uvm_unmap(map, *addr, *addr + size); /* zap! */
1.6 mrg 1136: }
1137:
1138: /*
1139: * handle anon vs. non-anon mappings. for non-anon mappings attach
1140: * to underlying vm object.
1141: */
1142:
1143: if (flags & MAP_ANON) {
1.35.2.1! bouyer 1144: foff = UVM_UNKNOWN_OFFSET;
1.6 mrg 1145: uobj = NULL;
1146: if ((flags & MAP_SHARED) == 0)
1147: /* XXX: defer amap create */
1148: uvmflag |= UVM_FLAG_COPYONW;
1149: else
1150: /* shared: create amap now */
1151: uvmflag |= UVM_FLAG_OVERLAY;
1152:
1153: } else {
1154:
1155: vp = (struct vnode *) handle; /* get vnode */
1156: if (vp->v_type != VCHR) {
1157: uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
1158: maxprot : (maxprot & ~VM_PROT_WRITE));
1159:
1160: /*
1161: * XXXCDC: hack from old code
1162: * don't allow vnodes which have been mapped
1163: * shared-writeable to persist [forces them to be
1164: * flushed out when last reference goes].
1165: * XXXCDC: interesting side effect: avoids a bug.
1166: * note that in WRITE [ufs_readwrite.c] that we
1167: * allocate buffer, uncache, and then do the write.
1168: * the problem with this is that if the uncache causes
1169: * VM data to be flushed to the same area of the file
1170: * we are writing to... in that case we've got the
1171: * buffer locked and our process goes to sleep forever.
1172: *
1173: * XXXCDC: checking maxprot protects us from the
1174: * "persistbug" program but this is not a long term
1175: * solution.
1176: *
1177: * XXXCDC: we don't bother calling uncache with the vp
1178: * VOP_LOCKed since we know that we are already
1179: * holding a valid reference to the uvn (from the
1180: * uvn_attach above), and thus it is impossible for
1181: * the uncache to kill the uvn and trigger I/O.
1182: */
1183: if (flags & MAP_SHARED) {
1184: if ((prot & VM_PROT_WRITE) ||
1185: (maxprot & VM_PROT_WRITE)) {
1186: uvm_vnp_uncache(vp);
1187: }
1188: }
1189:
1190: } else {
1191: uobj = udv_attach((void *) &vp->v_rdev,
1192: (flags & MAP_SHARED) ?
1.18 cgd 1193: maxprot : (maxprot & ~VM_PROT_WRITE), foff, size);
1.6 mrg 1194: advice = UVM_ADV_RANDOM;
1195: }
1196:
1197: if (uobj == NULL)
1.11 thorpej 1198: return((vp->v_type == VREG) ? ENOMEM : EINVAL);
1.6 mrg 1199:
1200: if ((flags & MAP_SHARED) == 0)
1201: uvmflag |= UVM_FLAG_COPYONW;
1202: }
1203:
1204: /*
1205: * set up mapping flags
1206: */
1.1 mrg 1207:
1.6 mrg 1208: uvmflag = UVM_MAPFLAG(prot, maxprot,
1.1 mrg 1209: (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
1210: advice, uvmflag);
1211:
1.6 mrg 1212: /*
1213: * do it!
1214: */
1215:
1.35.2.1! bouyer 1216: retval = uvm_map(map, addr, size, uobj, foff, 0, uvmflag);
1.6 mrg 1217:
1.25 thorpej 1218: if (retval == KERN_SUCCESS) {
1219: /*
1220: * POSIX 1003.1b -- if our address space was configured
1221: * to lock all future mappings, wire the one we just made.
1222: */
1223: if (prot == VM_PROT_NONE) {
1224: /*
1225: * No more work to do in this case.
1226: */
1227: return (0);
1228: }
1229:
1230: vm_map_lock(map);
1231:
1232: if (map->flags & VM_MAP_WIREFUTURE) {
1233: if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
1234: #ifdef pmap_wired_count
1.26 thorpej 1235: || (locklimit != 0 && (size +
1.25 thorpej 1236: ptoa(pmap_wired_count(vm_map_pmap(map)))) >
1.26 thorpej 1237: locklimit)
1.25 thorpej 1238: #endif
1239: ) {
1240: retval = KERN_RESOURCE_SHORTAGE;
1.35.2.1! bouyer 1241: vm_map_unlock(map);
1.25 thorpej 1242: /* unmap the region! */
1243: (void) uvm_unmap(map, *addr, *addr + size);
1244: goto bad;
1245: }
1.35.2.1! bouyer 1246: /*
! 1247: * uvm_map_pageable() always returns the map
! 1248: * unlocked.
! 1249: */
1.25 thorpej 1250: retval = uvm_map_pageable(map, *addr, *addr + size,
1.35 thorpej 1251: FALSE, UVM_LK_ENTER);
1.25 thorpej 1252: if (retval != KERN_SUCCESS) {
1253: /* unmap the region! */
1254: (void) uvm_unmap(map, *addr, *addr + size);
1255: goto bad;
1256: }
1257: return (0);
1258: }
1259:
1260: vm_map_unlock(map);
1261:
1262: return (0);
1263: }
1.6 mrg 1264:
1265: /*
1266: * errors: first detach from the uobj, if any.
1267: */
1268:
1269: if (uobj)
1270: uobj->pgops->pgo_detach(uobj);
1271:
1.25 thorpej 1272: bad:
1.6 mrg 1273: switch (retval) {
1274: case KERN_INVALID_ADDRESS:
1275: case KERN_NO_SPACE:
1276: return(ENOMEM);
1.25 thorpej 1277: case KERN_RESOURCE_SHORTAGE:
1278: return (EAGAIN);
1.6 mrg 1279: case KERN_PROTECTION_FAILURE:
1280: return(EACCES);
1281: }
1282: return(EINVAL);
1.1 mrg 1283: }
CVSweb <webmaster@jp.NetBSD.org>