[BACK]Return to uvm_mmap.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / uvm

Annotation of src/sys/uvm/uvm_mmap.c, Revision 1.35.2.1

1.35.2.1! bouyer      1: /*     $NetBSD: uvm_mmap.c,v 1.44 2000/09/13 15:00:25 thorpej Exp $    */
1.1       mrg         2:
                      3: /*
                      4:  * Copyright (c) 1997 Charles D. Cranor and Washington University.
                      5:  * Copyright (c) 1991, 1993 The Regents of the University of California.
                      6:  * Copyright (c) 1988 University of Utah.
                      7:  *
                      8:  * All rights reserved.
                      9:  *
                     10:  * This code is derived from software contributed to Berkeley by
                     11:  * the Systems Programming Group of the University of Utah Computer
                     12:  * Science Department.
                     13:  *
                     14:  * Redistribution and use in source and binary forms, with or without
                     15:  * modification, are permitted provided that the following conditions
                     16:  * are met:
                     17:  * 1. Redistributions of source code must retain the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer.
                     19:  * 2. Redistributions in binary form must reproduce the above copyright
                     20:  *    notice, this list of conditions and the following disclaimer in the
                     21:  *    documentation and/or other materials provided with the distribution.
                     22:  * 3. All advertising materials mentioning features or use of this software
                     23:  *    must display the following acknowledgement:
                     24:  *      This product includes software developed by the Charles D. Cranor,
                     25:  *     Washington University, University of California, Berkeley and
                     26:  *     its contributors.
                     27:  * 4. Neither the name of the University nor the names of its contributors
                     28:  *    may be used to endorse or promote products derived from this software
                     29:  *    without specific prior written permission.
                     30:  *
                     31:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     32:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     33:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     34:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     35:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     36:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     37:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     38:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     39:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     40:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     41:  * SUCH DAMAGE.
                     42:  *
                     43:  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
                     44:  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
1.3       mrg        45:  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
1.1       mrg        46:  */
                     47:
                     48: /*
                     49:  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
                     50:  * function.
                     51:  */
                     52: #include <sys/param.h>
                     53: #include <sys/systm.h>
                     54: #include <sys/file.h>
                     55: #include <sys/filedesc.h>
                     56: #include <sys/resourcevar.h>
                     57: #include <sys/mman.h>
                     58: #include <sys/mount.h>
                     59: #include <sys/proc.h>
                     60: #include <sys/malloc.h>
                     61: #include <sys/vnode.h>
                     62: #include <sys/conf.h>
1.9       mrg        63: #include <sys/stat.h>
1.1       mrg        64:
                     65: #include <miscfs/specfs/specdev.h>
                     66:
                     67: #include <sys/syscallargs.h>
                     68:
                     69: #include <uvm/uvm.h>
                     70: #include <uvm/uvm_device.h>
                     71: #include <uvm/uvm_vnode.h>
                     72:
                     73:
                     74: /*
                     75:  * unimplemented VM system calls:
                     76:  */
                     77:
                     78: /*
                     79:  * sys_sbrk: sbrk system call.
                     80:  */
                     81:
                     82: /* ARGSUSED */
1.6       mrg        83: int
                     84: sys_sbrk(p, v, retval)
                     85:        struct proc *p;
                     86:        void *v;
                     87:        register_t *retval;
1.1       mrg        88: {
                     89: #if 0
1.6       mrg        90:        struct sys_sbrk_args /* {
1.33      kleink     91:                syscallarg(intptr_t) incr;
1.20      mrg        92:        } */ *uap = v;
1.1       mrg        93: #endif
1.6       mrg        94:
1.17      kleink     95:        return (ENOSYS);
1.1       mrg        96: }
                     97:
                     98: /*
                     99:  * sys_sstk: sstk system call.
                    100:  */
                    101:
                    102: /* ARGSUSED */
1.6       mrg       103: int
                    104: sys_sstk(p, v, retval)
                    105:        struct proc *p;
                    106:        void *v;
                    107:        register_t *retval;
1.1       mrg       108: {
                    109: #if 0
1.6       mrg       110:        struct sys_sstk_args /* {
1.20      mrg       111:                syscallarg(int) incr;
                    112:        } */ *uap = v;
1.1       mrg       113: #endif
1.6       mrg       114:
1.17      kleink    115:        return (ENOSYS);
1.1       mrg       116: }
                    117:
                    118: /*
                    119:  * sys_mincore: determine if pages are in core or not.
                    120:  */
                    121:
                    122: /* ARGSUSED */
1.6       mrg       123: int
                    124: sys_mincore(p, v, retval)
                    125:        struct proc *p;
                    126:        void *v;
                    127:        register_t *retval;
1.1       mrg       128: {
1.6       mrg       129:        struct sys_mincore_args /* {
1.22      thorpej   130:                syscallarg(void *) addr;
1.20      mrg       131:                syscallarg(size_t) len;
                    132:                syscallarg(char *) vec;
                    133:        } */ *uap = v;
1.22      thorpej   134:        vm_page_t m;
                    135:        char *vec, pgi;
                    136:        struct uvm_object *uobj;
                    137:        struct vm_amap *amap;
                    138:        struct vm_anon *anon;
                    139:        vm_map_entry_t entry;
                    140:        vaddr_t start, end, lim;
                    141:        vm_map_t map;
                    142:        vsize_t len;
                    143:        int error = 0, npgs;
                    144:
                    145:        map = &p->p_vmspace->vm_map;
                    146:
                    147:        start = (vaddr_t)SCARG(uap, addr);
                    148:        len = SCARG(uap, len);
                    149:        vec = SCARG(uap, vec);
                    150:
                    151:        if (start & PAGE_MASK)
                    152:                return (EINVAL);
                    153:        len = round_page(len);
                    154:        end = start + len;
                    155:        if (end <= start)
                    156:                return (EINVAL);
                    157:
                    158:        npgs = len >> PAGE_SHIFT;
                    159:
                    160:        if (uvm_useracc(vec, npgs, B_WRITE) == FALSE)
                    161:                return (EFAULT);
                    162:
                    163:        /*
                    164:         * Lock down vec, so our returned status isn't outdated by
                    165:         * storing the status byte for a page.
                    166:         */
                    167:        uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
                    168:
                    169:        vm_map_lock_read(map);
                    170:
                    171:        if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
                    172:                error = ENOMEM;
                    173:                goto out;
                    174:        }
                    175:
                    176:        for (/* nothing */;
                    177:             entry != &map->header && entry->start < end;
                    178:             entry = entry->next) {
                    179: #ifdef DIAGNOSTIC
                    180:                if (UVM_ET_ISSUBMAP(entry))
                    181:                        panic("mincore: user map has submap");
                    182:                if (start < entry->start)
                    183:                        panic("mincore: hole");
1.1       mrg       184: #endif
1.22      thorpej   185:                /* Make sure there are no holes. */
                    186:                if (entry->end < end &&
                    187:                     (entry->next == &map->header ||
                    188:                      entry->next->start > entry->end)) {
                    189:                        error = ENOMEM;
                    190:                        goto out;
                    191:                }
1.6       mrg       192:
1.22      thorpej   193:                lim = end < entry->end ? end : entry->end;
                    194:
                    195:                /*
1.31      thorpej   196:                 * Special case for objects with no "real" pages.  Those
                    197:                 * are always considered resident (mapped devices).
1.22      thorpej   198:                 */
                    199:                if (UVM_ET_ISOBJ(entry)) {
                    200: #ifdef DIAGNOSTIC
                    201:                        if (UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj))
                    202:                                panic("mincore: user map has kernel object");
                    203: #endif
1.31      thorpej   204:                        if (entry->object.uvm_obj->pgops->pgo_releasepg
                    205:                            == NULL) {
1.22      thorpej   206:                                for (/* nothing */; start < lim;
                    207:                                     start += PAGE_SIZE, vec++)
                    208:                                        subyte(vec, 1);
                    209:                                continue;
                    210:                        }
                    211:                }
                    212:
1.32      thorpej   213:                amap = entry->aref.ar_amap;     /* top layer */
                    214:                uobj = entry->object.uvm_obj;   /* bottom layer */
1.22      thorpej   215:
                    216:                if (amap != NULL)
                    217:                        amap_lock(amap);
                    218:                if (uobj != NULL)
                    219:                        simple_lock(&uobj->vmobjlock);
                    220:
                    221:                for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
                    222:                        pgi = 0;
                    223:                        if (amap != NULL) {
                    224:                                /* Check the top layer first. */
                    225:                                anon = amap_lookup(&entry->aref,
                    226:                                    start - entry->start);
                    227:                                /* Don't need to lock anon here. */
                    228:                                if (anon != NULL && anon->u.an_page != NULL) {
                    229:                                        /*
                    230:                                         * Anon has the page for this entry
                    231:                                         * offset.
                    232:                                         */
                    233:                                        pgi = 1;
                    234:                                }
                    235:                        }
                    236:
                    237:                        if (uobj != NULL && pgi == 0) {
                    238:                                /* Check the bottom layer. */
                    239:                                m = uvm_pagelookup(uobj,
                    240:                                    entry->offset + (start - entry->start));
                    241:                                if (m != NULL) {
                    242:                                        /*
                    243:                                         * Object has the page for this entry
                    244:                                         * offset.
                    245:                                         */
                    246:                                        pgi = 1;
                    247:                                }
                    248:                        }
                    249:
                    250:                        (void) subyte(vec, pgi);
                    251:                }
                    252:
                    253:                if (uobj != NULL)
1.27      thorpej   254:                        simple_unlock(&uobj->vmobjlock);
1.22      thorpej   255:                if (amap != NULL)
                    256:                        amap_unlock(amap);
                    257:        }
                    258:
                    259:  out:
                    260:        vm_map_unlock_read(map);
                    261:        uvm_vsunlock(p, SCARG(uap, vec), npgs);
                    262:        return (error);
1.1       mrg       263: }
                    264:
                    265: #if 0
                    266: /*
                    267:  * munmapfd: unmap file descriptor
                    268:  *
                    269:  * XXX: is this acutally a useful function?   could it be useful?
                    270:  */
                    271:
1.6       mrg       272: void
                    273: munmapfd(p, fd)
                    274:        struct proc *p;
                    275:        int fd;
                    276: {
1.1       mrg       277:
1.6       mrg       278:        /*
                    279:         * XXX should vm_deallocate any regions mapped to this file
                    280:         */
                    281:        p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
1.1       mrg       282: }
                    283: #endif
                    284:
                    285: /*
                    286:  * sys_mmap: mmap system call.
                    287:  *
                    288:  * => file offest and address may not be page aligned
                    289:  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
                    290:  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
                    291:  *      and the return value is adjusted up by the page offset.
                    292:  */
                    293:
1.6       mrg       294: int
                    295: sys_mmap(p, v, retval)
                    296:        struct proc *p;
                    297:        void *v;
                    298:        register_t *retval;
                    299: {
1.35.2.1! bouyer    300:        struct sys_mmap_args /* {
1.6       mrg       301:                syscallarg(caddr_t) addr;
                    302:                syscallarg(size_t) len;
                    303:                syscallarg(int) prot;
                    304:                syscallarg(int) flags;
                    305:                syscallarg(int) fd;
                    306:                syscallarg(long) pad;
                    307:                syscallarg(off_t) pos;
                    308:        } */ *uap = v;
1.12      eeh       309:        vaddr_t addr;
1.9       mrg       310:        struct vattr va;
1.6       mrg       311:        off_t pos;
1.12      eeh       312:        vsize_t size, pageoff;
1.6       mrg       313:        vm_prot_t prot, maxprot;
                    314:        int flags, fd;
1.12      eeh       315:        vaddr_t vm_min_address = VM_MIN_ADDRESS;
1.35.2.1! bouyer    316:        struct filedesc *fdp = p->p_fd;
        !           317:        struct file *fp;
1.6       mrg       318:        struct vnode *vp;
                    319:        caddr_t handle;
                    320:        int error;
                    321:
                    322:        /*
                    323:         * first, extract syscall args from the uap.
                    324:         */
                    325:
1.12      eeh       326:        addr = (vaddr_t) SCARG(uap, addr);
                    327:        size = (vsize_t) SCARG(uap, len);
1.6       mrg       328:        prot = SCARG(uap, prot) & VM_PROT_ALL;
                    329:        flags = SCARG(uap, flags);
                    330:        fd = SCARG(uap, fd);
                    331:        pos = SCARG(uap, pos);
                    332:
                    333:        /*
1.24      thorpej   334:         * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
                    335:         * validate the flags.
                    336:         */
                    337:        if (flags & MAP_COPY)
                    338:                flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
                    339:        if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
                    340:                return (EINVAL);
                    341:
                    342:        /*
1.6       mrg       343:         * align file position and save offset.  adjust size.
                    344:         */
                    345:
                    346:        pageoff = (pos & PAGE_MASK);
                    347:        pos  -= pageoff;
                    348:        size += pageoff;                        /* add offset */
1.12      eeh       349:        size = (vsize_t) round_page(size);      /* round up */
1.6       mrg       350:        if ((ssize_t) size < 0)
                    351:                return (EINVAL);                        /* don't allow wrap */
                    352:
                    353:        /*
                    354:         * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
                    355:         */
                    356:
                    357:        if (flags & MAP_FIXED) {
                    358:
                    359:                /* ensure address and file offset are aligned properly */
                    360:                addr -= pageoff;
                    361:                if (addr & PAGE_MASK)
                    362:                        return (EINVAL);
                    363:
                    364:                if (VM_MAXUSER_ADDRESS > 0 &&
                    365:                    (addr + size) > VM_MAXUSER_ADDRESS)
                    366:                        return (EINVAL);
                    367:                if (vm_min_address > 0 && addr < vm_min_address)
                    368:                        return (EINVAL);
                    369:                if (addr > addr + size)
                    370:                        return (EINVAL);                /* no wrapping! */
                    371:
                    372:        } else {
                    373:
                    374:                /*
                    375:                 * not fixed: make sure we skip over the largest possible heap.
                    376:                 * we will refine our guess later (e.g. to account for VAC, etc)
                    377:                 */
1.35.2.1! bouyer    378:                if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr+MAXDSIZ))
        !           379:                        addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
        !           380:                            MAXDSIZ);
1.6       mrg       381:        }
                    382:
                    383:        /*
                    384:         * check for file mappings (i.e. not anonymous) and verify file.
                    385:         */
                    386:
                    387:        if ((flags & MAP_ANON) == 0) {
                    388:
                    389:                if (fd < 0 || fd >= fdp->fd_nfiles)
                    390:                        return(EBADF);          /* failed range check? */
                    391:                fp = fdp->fd_ofiles[fd];        /* convert to file pointer */
                    392:                if (fp == NULL)
                    393:                        return(EBADF);
                    394:
                    395:                if (fp->f_type != DTYPE_VNODE)
1.7       kleink    396:                        return (ENODEV);                /* only mmap vnodes! */
1.6       mrg       397:                vp = (struct vnode *)fp->f_data;        /* convert to vnode */
                    398:
1.11      thorpej   399:                if (vp->v_type != VREG && vp->v_type != VCHR &&
                    400:                    vp->v_type != VBLK)
                    401:                        return (ENODEV);  /* only REG/CHR/BLK support mmap */
1.6       mrg       402:
1.35.2.1! bouyer    403:                if (vp->v_type == VREG && (pos + size) < pos)
        !           404:                        return (EOVERFLOW);             /* no offset wrapping */
        !           405:
1.6       mrg       406:                /* special case: catch SunOS style /dev/zero */
                    407:                if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
                    408:                        flags |= MAP_ANON;
                    409:                        goto is_anon;
                    410:                }
                    411:
                    412:                /*
                    413:                 * Old programs may not select a specific sharing type, so
                    414:                 * default to an appropriate one.
                    415:                 *
                    416:                 * XXX: how does MAP_ANON fit in the picture?
                    417:                 */
1.24      thorpej   418:                if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
1.8       tv        419: #if defined(DEBUG)
1.6       mrg       420:                        printf("WARNING: defaulted mmap() share type to "
                    421:                           "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
                    422:                           "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
                    423:                            p->p_comm);
1.1       mrg       424: #endif
1.6       mrg       425:                        if (vp->v_type == VCHR)
                    426:                                flags |= MAP_SHARED;    /* for a device */
                    427:                        else
                    428:                                flags |= MAP_PRIVATE;   /* for a file */
                    429:                }
                    430:
                    431:                /*
                    432:                 * MAP_PRIVATE device mappings don't make sense (and aren't
                    433:                 * supported anyway).  However, some programs rely on this,
                    434:                 * so just change it to MAP_SHARED.
                    435:                 */
                    436:                if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
1.1       mrg       437: #if defined(DIAGNOSTIC)
1.6       mrg       438:                        printf("WARNING: converted MAP_PRIVATE device mapping "
                    439:                            "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
                    440:                            p->p_comm);
1.1       mrg       441: #endif
1.6       mrg       442:                        flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
                    443:                }
1.1       mrg       444:
1.6       mrg       445:                /*
                    446:                 * now check protection
                    447:                 */
                    448:
                    449:                maxprot = VM_PROT_EXECUTE;
                    450:
                    451:                /* check read access */
                    452:                if (fp->f_flag & FREAD)
                    453:                        maxprot |= VM_PROT_READ;
                    454:                else if (prot & PROT_READ)
                    455:                        return (EACCES);
                    456:
1.9       mrg       457:                /* check write access, shared case first */
1.6       mrg       458:                if (flags & MAP_SHARED) {
1.9       mrg       459:                        /*
                    460:                         * if the file is writable, only add PROT_WRITE to
                    461:                         * maxprot if the file is not immutable, append-only.
                    462:                         * otherwise, if we have asked for PROT_WRITE, return
                    463:                         * EPERM.
                    464:                         */
                    465:                        if (fp->f_flag & FWRITE) {
                    466:                                if ((error =
                    467:                                    VOP_GETATTR(vp, &va, p->p_ucred, p)))
                    468:                                        return (error);
                    469:                                if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
                    470:                                        maxprot |= VM_PROT_WRITE;
                    471:                                else if (prot & PROT_WRITE)
                    472:                                        return (EPERM);
                    473:                        }
1.6       mrg       474:                        else if (prot & PROT_WRITE)
                    475:                                return (EACCES);
                    476:                } else {
                    477:                        /* MAP_PRIVATE mappings can always write to */
                    478:                        maxprot |= VM_PROT_WRITE;
                    479:                }
                    480:
                    481:                /*
                    482:                 * set handle to vnode
                    483:                 */
1.1       mrg       484:
1.6       mrg       485:                handle = (caddr_t)vp;
1.1       mrg       486:
1.6       mrg       487:        } else {                /* MAP_ANON case */
1.24      thorpej   488:                /*
                    489:                 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
                    490:                 */
1.6       mrg       491:                if (fd != -1)
                    492:                        return (EINVAL);
1.1       mrg       493:
1.24      thorpej   494:  is_anon:              /* label for SunOS style /dev/zero */
1.6       mrg       495:                handle = NULL;
                    496:                maxprot = VM_PROT_ALL;
                    497:                pos = 0;
1.28      cgd       498:        }
                    499:
                    500:        /*
                    501:         * XXX (in)sanity check.  We don't do proper datasize checking
                    502:         * XXX for anonymous (or private writable) mmap().  However,
                    503:         * XXX know that if we're trying to allocate more than the amount
                    504:         * XXX remaining under our current data size limit, _that_ should
                    505:         * XXX be disallowed.
                    506:         */
                    507:        if ((flags & MAP_ANON) != 0 ||
                    508:            ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
                    509:                if (size >
                    510:                    (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dsize))) {
                    511:                        return (ENOMEM);
                    512:                }
1.6       mrg       513:        }
                    514:
                    515:        /*
                    516:         * now let kernel internal function uvm_mmap do the work.
                    517:         */
                    518:
                    519:        error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
1.25      thorpej   520:            flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1.6       mrg       521:
                    522:        if (error == 0)
                    523:                /* remember to add offset */
                    524:                *retval = (register_t)(addr + pageoff);
1.1       mrg       525:
1.6       mrg       526:        return (error);
1.1       mrg       527: }
                    528:
                    529: /*
                    530:  * sys___msync13: the msync system call (a front-end for flush)
                    531:  */
                    532:
1.6       mrg       533: int
                    534: sys___msync13(p, v, retval)
                    535:        struct proc *p;
                    536:        void *v;
                    537:        register_t *retval;
                    538: {
                    539:        struct sys___msync13_args /* {
                    540:                syscallarg(caddr_t) addr;
                    541:                syscallarg(size_t) len;
                    542:                syscallarg(int) flags;
                    543:        } */ *uap = v;
1.12      eeh       544:        vaddr_t addr;
                    545:        vsize_t size, pageoff;
1.6       mrg       546:        vm_map_t map;
                    547:        int rv, flags, uvmflags;
                    548:
                    549:        /*
                    550:         * extract syscall args from the uap
                    551:         */
                    552:
1.12      eeh       553:        addr = (vaddr_t)SCARG(uap, addr);
                    554:        size = (vsize_t)SCARG(uap, len);
1.6       mrg       555:        flags = SCARG(uap, flags);
                    556:
                    557:        /* sanity check flags */
                    558:        if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
                    559:                        (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
                    560:                        (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
1.1       mrg       561:          return (EINVAL);
1.6       mrg       562:        if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
1.1       mrg       563:          flags |= MS_SYNC;
                    564:
1.6       mrg       565:        /*
                    566:         * align the address to a page boundary, and adjust the size accordingly
                    567:         */
                    568:
                    569:        pageoff = (addr & PAGE_MASK);
                    570:        addr -= pageoff;
                    571:        size += pageoff;
1.12      eeh       572:        size = (vsize_t) round_page(size);
1.6       mrg       573:
                    574:        /* disallow wrap-around. */
                    575:        if (addr + size < addr)
                    576:                return (EINVAL);
                    577:
                    578:        /*
                    579:         * get map
                    580:         */
                    581:
                    582:        map = &p->p_vmspace->vm_map;
                    583:
                    584:        /*
                    585:         * XXXCDC: do we really need this semantic?
                    586:         *
                    587:         * XXX Gak!  If size is zero we are supposed to sync "all modified
                    588:         * pages with the region containing addr".  Unfortunately, we
                    589:         * don't really keep track of individual mmaps so we approximate
                    590:         * by flushing the range of the map entry containing addr.
                    591:         * This can be incorrect if the region splits or is coalesced
                    592:         * with a neighbor.
                    593:         */
                    594:        if (size == 0) {
                    595:                vm_map_entry_t entry;
                    596:
                    597:                vm_map_lock_read(map);
                    598:                rv = uvm_map_lookup_entry(map, addr, &entry);
                    599:                if (rv == TRUE) {
                    600:                        addr = entry->start;
                    601:                        size = entry->end - entry->start;
                    602:                }
                    603:                vm_map_unlock_read(map);
                    604:                if (rv == FALSE)
                    605:                        return (EINVAL);
                    606:        }
                    607:
                    608:        /*
                    609:         * translate MS_ flags into PGO_ flags
                    610:         */
1.34      thorpej   611:        uvmflags = PGO_CLEANIT;
                    612:        if (flags & MS_INVALIDATE)
                    613:                uvmflags |= PGO_FREE;
1.6       mrg       614:        if (flags & MS_SYNC)
                    615:                uvmflags |= PGO_SYNCIO;
                    616:        else
                    617:                uvmflags |= PGO_SYNCIO;  /* XXXCDC: force sync for now! */
                    618:
                    619:        /*
                    620:         * doit!
                    621:         */
                    622:        rv = uvm_map_clean(map, addr, addr+size, uvmflags);
                    623:
                    624:        /*
                    625:         * and return...
                    626:         */
                    627:        switch (rv) {
                    628:        case KERN_SUCCESS:
                    629:                return(0);
                    630:        case KERN_INVALID_ADDRESS:
                    631:                return (ENOMEM);
                    632:        case KERN_FAILURE:
                    633:                return (EIO);
                    634:        case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
                    635:                return (EBUSY);
                    636:        default:
                    637:                return (EINVAL);
                    638:        }
                    639:        /*NOTREACHED*/
1.1       mrg       640: }
                    641:
                    642: /*
                    643:  * sys_munmap: unmap a users memory
                    644:  */
                    645:
1.6       mrg       646: int
                    647: sys_munmap(p, v, retval)
1.35.2.1! bouyer    648:        struct proc *p;
1.6       mrg       649:        void *v;
                    650:        register_t *retval;
                    651: {
1.35.2.1! bouyer    652:        struct sys_munmap_args /* {
1.6       mrg       653:                syscallarg(caddr_t) addr;
                    654:                syscallarg(size_t) len;
                    655:        } */ *uap = v;
1.12      eeh       656:        vaddr_t addr;
                    657:        vsize_t size, pageoff;
1.6       mrg       658:        vm_map_t map;
1.12      eeh       659:        vaddr_t vm_min_address = VM_MIN_ADDRESS;
1.6       mrg       660:        struct vm_map_entry *dead_entries;
                    661:
                    662:        /*
                    663:         * get syscall args...
                    664:         */
                    665:
1.12      eeh       666:        addr = (vaddr_t) SCARG(uap, addr);
                    667:        size = (vsize_t) SCARG(uap, len);
1.6       mrg       668:
                    669:        /*
                    670:         * align the address to a page boundary, and adjust the size accordingly
                    671:         */
                    672:
                    673:        pageoff = (addr & PAGE_MASK);
                    674:        addr -= pageoff;
                    675:        size += pageoff;
1.12      eeh       676:        size = (vsize_t) round_page(size);
1.6       mrg       677:
                    678:        if ((int)size < 0)
                    679:                return (EINVAL);
                    680:        if (size == 0)
                    681:                return (0);
                    682:
                    683:        /*
                    684:         * Check for illegal addresses.  Watch out for address wrap...
                    685:         * Note that VM_*_ADDRESS are not constants due to casts (argh).
                    686:         */
                    687:        if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
                    688:                return (EINVAL);
                    689:        if (vm_min_address > 0 && addr < vm_min_address)
                    690:                return (EINVAL);
                    691:        if (addr > addr + size)
                    692:                return (EINVAL);
                    693:        map = &p->p_vmspace->vm_map;
                    694:
                    695:
                    696:        vm_map_lock(map);       /* lock map so we can checkprot */
                    697:
                    698:        /*
                    699:         * interesting system call semantic: make sure entire range is
                    700:         * allocated before allowing an unmap.
                    701:         */
                    702:
                    703:        if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
                    704:                vm_map_unlock(map);
                    705:                return (EINVAL);
                    706:        }
                    707:
                    708:        /*
                    709:         * doit!
                    710:         */
1.15      chuck     711:        (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
1.1       mrg       712:
1.6       mrg       713:        vm_map_unlock(map);     /* and unlock */
1.1       mrg       714:
1.6       mrg       715:        if (dead_entries != NULL)
                    716:                uvm_unmap_detach(dead_entries, 0);
1.1       mrg       717:
1.6       mrg       718:        return (0);
1.1       mrg       719: }
                    720:
                    721: /*
                    722:  * sys_mprotect: the mprotect system call
                    723:  */
                    724:
1.6       mrg       725: int
                    726: sys_mprotect(p, v, retval)
                    727:        struct proc *p;
                    728:        void *v;
                    729:        register_t *retval;
                    730: {
                    731:        struct sys_mprotect_args /* {
                    732:                syscallarg(caddr_t) addr;
                    733:                syscallarg(int) len;
                    734:                syscallarg(int) prot;
                    735:        } */ *uap = v;
1.12      eeh       736:        vaddr_t addr;
                    737:        vsize_t size, pageoff;
1.6       mrg       738:        vm_prot_t prot;
                    739:        int rv;
                    740:
                    741:        /*
                    742:         * extract syscall args from uap
                    743:         */
                    744:
1.12      eeh       745:        addr = (vaddr_t)SCARG(uap, addr);
                    746:        size = (vsize_t)SCARG(uap, len);
1.6       mrg       747:        prot = SCARG(uap, prot) & VM_PROT_ALL;
                    748:
                    749:        /*
                    750:         * align the address to a page boundary, and adjust the size accordingly
                    751:         */
                    752:        pageoff = (addr & PAGE_MASK);
                    753:        addr -= pageoff;
                    754:        size += pageoff;
1.12      eeh       755:        size = (vsize_t) round_page(size);
1.6       mrg       756:        if ((int)size < 0)
                    757:                return (EINVAL);
                    758:
                    759:        /*
                    760:         * doit
                    761:         */
1.1       mrg       762:
1.6       mrg       763:        rv = uvm_map_protect(&p->p_vmspace->vm_map,
1.1       mrg       764:                           addr, addr+size, prot, FALSE);
                    765:
1.6       mrg       766:        if (rv == KERN_SUCCESS)
                    767:                return (0);
                    768:        if (rv == KERN_PROTECTION_FAILURE)
                    769:                return (EACCES);
                    770:        return (EINVAL);
1.1       mrg       771: }
                    772:
                    773: /*
                    774:  * sys_minherit: the minherit system call
                    775:  */
                    776:
1.6       mrg       777: int
                    778: sys_minherit(p, v, retval)
                    779:        struct proc *p;
                    780:        void *v;
                    781:        register_t *retval;
                    782: {
                    783:        struct sys_minherit_args /* {
                    784:                syscallarg(caddr_t) addr;
                    785:                syscallarg(int) len;
                    786:                syscallarg(int) inherit;
                    787:        } */ *uap = v;
1.12      eeh       788:        vaddr_t addr;
                    789:        vsize_t size, pageoff;
1.35.2.1! bouyer    790:        vm_inherit_t inherit;
1.6       mrg       791:
1.12      eeh       792:        addr = (vaddr_t)SCARG(uap, addr);
                    793:        size = (vsize_t)SCARG(uap, len);
1.6       mrg       794:        inherit = SCARG(uap, inherit);
                    795:        /*
                    796:         * align the address to a page boundary, and adjust the size accordingly
                    797:         */
                    798:
                    799:        pageoff = (addr & PAGE_MASK);
                    800:        addr -= pageoff;
                    801:        size += pageoff;
1.12      eeh       802:        size = (vsize_t) round_page(size);
1.6       mrg       803:
                    804:        if ((int)size < 0)
                    805:                return (EINVAL);
                    806:
                    807:        switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
1.1       mrg       808:                         inherit)) {
1.21      mrg       809:        case KERN_SUCCESS:
                    810:                return (0);
                    811:        case KERN_PROTECTION_FAILURE:
                    812:                return (EACCES);
                    813:        }
                    814:        return (EINVAL);
                    815: }
                    816:
                    817: /*
                    818:  * sys_madvise: give advice about memory usage.
                    819:  */
                    820:
                    821: /* ARGSUSED */
                    822: int
                    823: sys_madvise(p, v, retval)
                    824:        struct proc *p;
                    825:        void *v;
                    826:        register_t *retval;
                    827: {
                    828:        struct sys_madvise_args /* {
                    829:                syscallarg(caddr_t) addr;
                    830:                syscallarg(size_t) len;
                    831:                syscallarg(int) behav;
                    832:        } */ *uap = v;
                    833:        vaddr_t addr;
                    834:        vsize_t size, pageoff;
1.29      thorpej   835:        int advice, rv;;
1.21      mrg       836:
                    837:        addr = (vaddr_t)SCARG(uap, addr);
                    838:        size = (vsize_t)SCARG(uap, len);
                    839:        advice = SCARG(uap, behav);
                    840:
                    841:        /*
                    842:         * align the address to a page boundary, and adjust the size accordingly
                    843:         */
                    844:        pageoff = (addr & PAGE_MASK);
                    845:        addr -= pageoff;
                    846:        size += pageoff;
                    847:        size = (vsize_t) round_page(size);
                    848:
1.29      thorpej   849:        if ((ssize_t)size <= 0)
                    850:                return (EINVAL);
                    851:
                    852:        switch (advice) {
                    853:        case MADV_NORMAL:
                    854:        case MADV_RANDOM:
                    855:        case MADV_SEQUENTIAL:
                    856:                rv = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
                    857:                    advice);
                    858:                break;
                    859:
                    860:        case MADV_WILLNEED:
                    861:                /*
                    862:                 * Activate all these pages, pre-faulting them in if
                    863:                 * necessary.
                    864:                 */
                    865:                /*
                    866:                 * XXX IMPLEMENT ME.
                    867:                 * Should invent a "weak" mode for uvm_fault()
                    868:                 * which would only do the PGO_LOCKED pgo_get().
                    869:                 */
                    870:                return (0);
                    871:
                    872:        case MADV_DONTNEED:
                    873:                /*
                    874:                 * Deactivate all these pages.  We don't need them
                    875:                 * any more.  We don't, however, toss the data in
                    876:                 * the pages.
                    877:                 */
                    878:                rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
                    879:                    PGO_DEACTIVATE);
                    880:                break;
                    881:
                    882:        case MADV_FREE:
                    883:                /*
                    884:                 * These pages contain no valid data, and may be
                    885:                 * grbage-collected.  Toss all resources, including
1.30      thorpej   886:                 * any swap space in use.
1.29      thorpej   887:                 */
                    888:                rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
                    889:                    PGO_FREE);
                    890:                break;
                    891:
                    892:        case MADV_SPACEAVAIL:
                    893:                /*
                    894:                 * XXXMRG What is this?  I think it's:
                    895:                 *
                    896:                 *      Ensure that we have allocated backing-store
                    897:                 *      for these pages.
                    898:                 *
                    899:                 * This is going to require changes to the page daemon,
                    900:                 * as it will free swap space allocated to pages in core.
                    901:                 * There's also what to do for device/file/anonymous memory.
                    902:                 */
                    903:                return (EINVAL);
                    904:
                    905:        default:
1.21      mrg       906:                return (EINVAL);
1.29      thorpej   907:        }
                    908:
                    909:        switch (rv) {
1.6       mrg       910:        case KERN_SUCCESS:
                    911:                return (0);
1.29      thorpej   912:        case KERN_NO_SPACE:
                    913:                return (EAGAIN);
                    914:        case KERN_INVALID_ADDRESS:
                    915:                return (ENOMEM);
                    916:        case KERN_FAILURE:
                    917:                return (EIO);
1.6       mrg       918:        }
1.29      thorpej   919:
1.6       mrg       920:        return (EINVAL);
1.1       mrg       921: }
                    922:
                    923: /*
                    924:  * sys_mlock: memory lock
                    925:  */
                    926:
1.6       mrg       927: int
                    928: sys_mlock(p, v, retval)
                    929:        struct proc *p;
                    930:        void *v;
                    931:        register_t *retval;
                    932: {
                    933:        struct sys_mlock_args /* {
1.10      kleink    934:                syscallarg(const void *) addr;
1.6       mrg       935:                syscallarg(size_t) len;
                    936:        } */ *uap = v;
1.12      eeh       937:        vaddr_t addr;
                    938:        vsize_t size, pageoff;
1.6       mrg       939:        int error;
                    940:
                    941:        /*
                    942:         * extract syscall args from uap
                    943:         */
1.12      eeh       944:        addr = (vaddr_t)SCARG(uap, addr);
                    945:        size = (vsize_t)SCARG(uap, len);
1.6       mrg       946:
                    947:        /*
                    948:         * align the address to a page boundary and adjust the size accordingly
                    949:         */
                    950:        pageoff = (addr & PAGE_MASK);
                    951:        addr -= pageoff;
                    952:        size += pageoff;
1.12      eeh       953:        size = (vsize_t) round_page(size);
1.6       mrg       954:
                    955:        /* disallow wrap-around. */
                    956:        if (addr + (int)size < addr)
                    957:                return (EINVAL);
1.1       mrg       958:
1.6       mrg       959:        if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
                    960:                return (EAGAIN);
1.1       mrg       961:
                    962: #ifdef pmap_wired_count
1.6       mrg       963:        if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
                    964:                        p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
                    965:                return (EAGAIN);
1.1       mrg       966: #else
1.6       mrg       967:        if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
                    968:                return (error);
1.1       mrg       969: #endif
                    970:
1.25      thorpej   971:        error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
1.35      thorpej   972:            0);
1.6       mrg       973:        return (error == KERN_SUCCESS ? 0 : ENOMEM);
1.1       mrg       974: }
                    975:
                    976: /*
                    977:  * sys_munlock: unlock wired pages
                    978:  */
                    979:
1.6       mrg       980: int
                    981: sys_munlock(p, v, retval)
                    982:        struct proc *p;
                    983:        void *v;
                    984:        register_t *retval;
                    985: {
                    986:        struct sys_munlock_args /* {
1.10      kleink    987:                syscallarg(const void *) addr;
1.6       mrg       988:                syscallarg(size_t) len;
                    989:        } */ *uap = v;
1.12      eeh       990:        vaddr_t addr;
                    991:        vsize_t size, pageoff;
1.6       mrg       992:        int error;
                    993:
                    994:        /*
                    995:         * extract syscall args from uap
                    996:         */
                    997:
1.12      eeh       998:        addr = (vaddr_t)SCARG(uap, addr);
                    999:        size = (vsize_t)SCARG(uap, len);
1.6       mrg      1000:
                   1001:        /*
                   1002:         * align the address to a page boundary, and adjust the size accordingly
                   1003:         */
                   1004:        pageoff = (addr & PAGE_MASK);
                   1005:        addr -= pageoff;
                   1006:        size += pageoff;
1.12      eeh      1007:        size = (vsize_t) round_page(size);
1.6       mrg      1008:
                   1009:        /* disallow wrap-around. */
                   1010:        if (addr + (int)size < addr)
                   1011:                return (EINVAL);
1.1       mrg      1012:
                   1013: #ifndef pmap_wired_count
1.6       mrg      1014:        if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
                   1015:                return (error);
1.1       mrg      1016: #endif
                   1017:
1.25      thorpej  1018:        error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
1.35      thorpej  1019:            0);
1.6       mrg      1020:        return (error == KERN_SUCCESS ? 0 : ENOMEM);
1.22      thorpej  1021: }
                   1022:
                   1023: /*
                   1024:  * sys_mlockall: lock all pages mapped into an address space.
                   1025:  */
                   1026:
                   1027: int
                   1028: sys_mlockall(p, v, retval)
                   1029:        struct proc *p;
                   1030:        void *v;
                   1031:        register_t *retval;
                   1032: {
                   1033:        struct sys_mlockall_args /* {
                   1034:                syscallarg(int) flags;
                   1035:        } */ *uap = v;
                   1036:        int error, flags;
                   1037:
                   1038:        flags = SCARG(uap, flags);
                   1039:
                   1040:        if (flags == 0 ||
                   1041:            (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
                   1042:                return (EINVAL);
                   1043:
1.25      thorpej  1044: #ifndef pmap_wired_count
1.22      thorpej  1045:        if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
                   1046:                return (error);
                   1047: #endif
                   1048:
1.25      thorpej  1049:        error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
                   1050:            p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1.22      thorpej  1051:        switch (error) {
                   1052:        case KERN_SUCCESS:
                   1053:                error = 0;
                   1054:                break;
                   1055:
                   1056:        case KERN_NO_SPACE:     /* XXX overloaded */
                   1057:                error = ENOMEM;
                   1058:                break;
                   1059:
                   1060:        default:
                   1061:                /*
                   1062:                 * "Some or all of the memory could not be locked when
                   1063:                 * the call was made."
                   1064:                 */
                   1065:                error = EAGAIN;
                   1066:        }
                   1067:
                   1068:        return (error);
                   1069: }
                   1070:
                   1071: /*
                   1072:  * sys_munlockall: unlock all pages mapped into an address space.
                   1073:  */
                   1074:
                   1075: int
                   1076: sys_munlockall(p, v, retval)
                   1077:        struct proc *p;
                   1078:        void *v;
                   1079:        register_t *retval;
                   1080: {
                   1081:
                   1082:        (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
                   1083:        return (0);
1.1       mrg      1084: }
                   1085:
                   1086: /*
                   1087:  * uvm_mmap: internal version of mmap
                   1088:  *
                   1089:  * - used by sys_mmap, exec, and sysv shm
                   1090:  * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
                   1091:  *     sysv shm uses "named anonymous memory")
                   1092:  * - caller must page-align the file offset
                   1093:  */
                   1094:
1.6       mrg      1095: int
1.25      thorpej  1096: uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
1.6       mrg      1097:        vm_map_t map;
1.12      eeh      1098:        vaddr_t *addr;
                   1099:        vsize_t size;
1.6       mrg      1100:        vm_prot_t prot, maxprot;
                   1101:        int flags;
                   1102:        caddr_t handle;         /* XXX: VNODE? */
1.35.2.1! bouyer   1103:        voff_t foff;
1.25      thorpej  1104:        vsize_t locklimit;
1.6       mrg      1105: {
                   1106:        struct uvm_object *uobj;
                   1107:        struct vnode *vp;
                   1108:        int retval;
                   1109:        int advice = UVM_ADV_NORMAL;
                   1110:        uvm_flag_t uvmflag = 0;
                   1111:
                   1112:        /*
                   1113:         * check params
                   1114:         */
                   1115:
                   1116:        if (size == 0)
                   1117:                return(0);
                   1118:        if (foff & PAGE_MASK)
                   1119:                return(EINVAL);
                   1120:        if ((prot & maxprot) != prot)
                   1121:                return(EINVAL);
                   1122:
                   1123:        /*
                   1124:         * for non-fixed mappings, round off the suggested address.
                   1125:         * for fixed mappings, check alignment and zap old mappings.
                   1126:         */
                   1127:
                   1128:        if ((flags & MAP_FIXED) == 0) {
                   1129:                *addr = round_page(*addr);      /* round */
                   1130:        } else {
                   1131:
                   1132:                if (*addr & PAGE_MASK)
                   1133:                        return(EINVAL);
                   1134:                uvmflag |= UVM_FLAG_FIXED;
1.15      chuck    1135:                (void) uvm_unmap(map, *addr, *addr + size);     /* zap! */
1.6       mrg      1136:        }
                   1137:
                   1138:        /*
                   1139:         * handle anon vs. non-anon mappings.   for non-anon mappings attach
                   1140:         * to underlying vm object.
                   1141:         */
                   1142:
                   1143:        if (flags & MAP_ANON) {
1.35.2.1! bouyer   1144:                foff = UVM_UNKNOWN_OFFSET;
1.6       mrg      1145:                uobj = NULL;
                   1146:                if ((flags & MAP_SHARED) == 0)
                   1147:                        /* XXX: defer amap create */
                   1148:                        uvmflag |= UVM_FLAG_COPYONW;
                   1149:                else
                   1150:                        /* shared: create amap now */
                   1151:                        uvmflag |= UVM_FLAG_OVERLAY;
                   1152:
                   1153:        } else {
                   1154:
                   1155:                vp = (struct vnode *) handle;   /* get vnode */
                   1156:                if (vp->v_type != VCHR) {
                   1157:                        uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
                   1158:                           maxprot : (maxprot & ~VM_PROT_WRITE));
                   1159:
                   1160:                        /*
                   1161:                         * XXXCDC: hack from old code
                   1162:                         * don't allow vnodes which have been mapped
                   1163:                         * shared-writeable to persist [forces them to be
                   1164:                         * flushed out when last reference goes].
                   1165:                         * XXXCDC: interesting side effect: avoids a bug.
                   1166:                         * note that in WRITE [ufs_readwrite.c] that we
                   1167:                         * allocate buffer, uncache, and then do the write.
                   1168:                         * the problem with this is that if the uncache causes
                   1169:                         * VM data to be flushed to the same area of the file
                   1170:                         * we are writing to... in that case we've got the
                   1171:                         * buffer locked and our process goes to sleep forever.
                   1172:                         *
                   1173:                         * XXXCDC: checking maxprot protects us from the
                   1174:                         * "persistbug" program but this is not a long term
                   1175:                         * solution.
                   1176:                         *
                   1177:                         * XXXCDC: we don't bother calling uncache with the vp
                   1178:                         * VOP_LOCKed since we know that we are already
                   1179:                         * holding a valid reference to the uvn (from the
                   1180:                         * uvn_attach above), and thus it is impossible for
                   1181:                         * the uncache to kill the uvn and trigger I/O.
                   1182:                         */
                   1183:                        if (flags & MAP_SHARED) {
                   1184:                                if ((prot & VM_PROT_WRITE) ||
                   1185:                                    (maxprot & VM_PROT_WRITE)) {
                   1186:                                        uvm_vnp_uncache(vp);
                   1187:                                }
                   1188:                        }
                   1189:
                   1190:                } else {
                   1191:                        uobj = udv_attach((void *) &vp->v_rdev,
                   1192:                            (flags & MAP_SHARED) ?
1.18      cgd      1193:                            maxprot : (maxprot & ~VM_PROT_WRITE), foff, size);
1.6       mrg      1194:                        advice = UVM_ADV_RANDOM;
                   1195:                }
                   1196:
                   1197:                if (uobj == NULL)
1.11      thorpej  1198:                        return((vp->v_type == VREG) ? ENOMEM : EINVAL);
1.6       mrg      1199:
                   1200:                if ((flags & MAP_SHARED) == 0)
                   1201:                        uvmflag |= UVM_FLAG_COPYONW;
                   1202:        }
                   1203:
                   1204:        /*
                   1205:         * set up mapping flags
                   1206:         */
1.1       mrg      1207:
1.6       mrg      1208:        uvmflag = UVM_MAPFLAG(prot, maxprot,
1.1       mrg      1209:                        (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
                   1210:                        advice, uvmflag);
                   1211:
1.6       mrg      1212:        /*
                   1213:         * do it!
                   1214:         */
                   1215:
1.35.2.1! bouyer   1216:        retval = uvm_map(map, addr, size, uobj, foff, 0, uvmflag);
1.6       mrg      1217:
1.25      thorpej  1218:        if (retval == KERN_SUCCESS) {
                   1219:                /*
                   1220:                 * POSIX 1003.1b -- if our address space was configured
                   1221:                 * to lock all future mappings, wire the one we just made.
                   1222:                 */
                   1223:                if (prot == VM_PROT_NONE) {
                   1224:                        /*
                   1225:                         * No more work to do in this case.
                   1226:                         */
                   1227:                        return (0);
                   1228:                }
                   1229:
                   1230:                vm_map_lock(map);
                   1231:
                   1232:                if (map->flags & VM_MAP_WIREFUTURE) {
                   1233:                        if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
                   1234: #ifdef pmap_wired_count
1.26      thorpej  1235:                            || (locklimit != 0 && (size +
1.25      thorpej  1236:                                 ptoa(pmap_wired_count(vm_map_pmap(map)))) >
1.26      thorpej  1237:                                locklimit)
1.25      thorpej  1238: #endif
                   1239:                        ) {
                   1240:                                retval = KERN_RESOURCE_SHORTAGE;
1.35.2.1! bouyer   1241:                                vm_map_unlock(map);
1.25      thorpej  1242:                                /* unmap the region! */
                   1243:                                (void) uvm_unmap(map, *addr, *addr + size);
                   1244:                                goto bad;
                   1245:                        }
1.35.2.1! bouyer   1246:                        /*
        !          1247:                         * uvm_map_pageable() always returns the map
        !          1248:                         * unlocked.
        !          1249:                         */
1.25      thorpej  1250:                        retval = uvm_map_pageable(map, *addr, *addr + size,
1.35      thorpej  1251:                            FALSE, UVM_LK_ENTER);
1.25      thorpej  1252:                        if (retval != KERN_SUCCESS) {
                   1253:                                /* unmap the region! */
                   1254:                                (void) uvm_unmap(map, *addr, *addr + size);
                   1255:                                goto bad;
                   1256:                        }
                   1257:                        return (0);
                   1258:                }
                   1259:
                   1260:                vm_map_unlock(map);
                   1261:
                   1262:                return (0);
                   1263:        }
1.6       mrg      1264:
                   1265:        /*
                   1266:         * errors: first detach from the uobj, if any.
                   1267:         */
                   1268:
                   1269:        if (uobj)
                   1270:                uobj->pgops->pgo_detach(uobj);
                   1271:
1.25      thorpej  1272:  bad:
1.6       mrg      1273:        switch (retval) {
                   1274:        case KERN_INVALID_ADDRESS:
                   1275:        case KERN_NO_SPACE:
                   1276:                return(ENOMEM);
1.25      thorpej  1277:        case KERN_RESOURCE_SHORTAGE:
                   1278:                return (EAGAIN);
1.6       mrg      1279:        case KERN_PROTECTION_FAILURE:
                   1280:                return(EACCES);
                   1281:        }
                   1282:        return(EINVAL);
1.1       mrg      1283: }

CVSweb <webmaster@jp.NetBSD.org>