[BACK]Return to uvm_map.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / uvm

Annotation of src/sys/uvm/uvm_map.c, Revision 1.331.2.6

1.331.2.6! skrll       1: /*     $NetBSD: uvm_map.c,v 1.331.2.5 2016/07/09 20:25:25 skrll Exp $  */
1.1       mrg         2:
1.98      chs         3: /*
1.1       mrg         4:  * Copyright (c) 1997 Charles D. Cranor and Washington University.
1.98      chs         5:  * Copyright (c) 1991, 1993, The Regents of the University of California.
1.1       mrg         6:  *
                      7:  * All rights reserved.
                      8:  *
                      9:  * This code is derived from software contributed to Berkeley by
                     10:  * The Mach Operating System project at Carnegie-Mellon University.
                     11:  *
                     12:  * Redistribution and use in source and binary forms, with or without
                     13:  * modification, are permitted provided that the following conditions
                     14:  * are met:
                     15:  * 1. Redistributions of source code must retain the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer.
                     17:  * 2. Redistributions in binary form must reproduce the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer in the
                     19:  *    documentation and/or other materials provided with the distribution.
1.295     chuck      20:  * 3. Neither the name of the University nor the names of its contributors
1.1       mrg        21:  *    may be used to endorse or promote products derived from this software
                     22:  *    without specific prior written permission.
                     23:  *
                     24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     34:  * SUCH DAMAGE.
                     35:  *
                     36:  *     @(#)vm_map.c    8.3 (Berkeley) 1/12/94
1.3       mrg        37:  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
1.1       mrg        38:  *
                     39:  *
                     40:  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
                     41:  * All rights reserved.
1.98      chs        42:  *
1.1       mrg        43:  * Permission to use, copy, modify and distribute this software and
                     44:  * its documentation is hereby granted, provided that both the copyright
                     45:  * notice and this permission notice appear in all copies of the
                     46:  * software, derivative works or modified versions, and any portions
                     47:  * thereof, and that both notices appear in supporting documentation.
1.98      chs        48:  *
                     49:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     50:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
1.1       mrg        51:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
1.98      chs        52:  *
1.1       mrg        53:  * Carnegie Mellon requests users of this software to return to
                     54:  *
                     55:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     56:  *  School of Computer Science
                     57:  *  Carnegie Mellon University
                     58:  *  Pittsburgh PA 15213-3890
                     59:  *
                     60:  * any improvements or extensions that they make and grant Carnegie the
                     61:  * rights to redistribute these changes.
                     62:  */
                     63:
1.114     lukem      64: /*
                     65:  * uvm_map.c: uvm map operations
                     66:  */
                     67:
                     68: #include <sys/cdefs.h>
1.331.2.6! skrll      69: __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.331.2.5 2016/07/09 20:25:25 skrll Exp $");
1.114     lukem      70:
1.21      jonathan   71: #include "opt_ddb.h"
1.6       mrg        72: #include "opt_uvmhist.h"
1.169     petrov     73: #include "opt_uvm.h"
1.31      tron       74: #include "opt_sysv.h"
1.1       mrg        75:
                     76: #include <sys/param.h>
                     77: #include <sys/systm.h>
                     78: #include <sys/mman.h>
                     79: #include <sys/proc.h>
1.25      thorpej    80: #include <sys/pool.h>
1.104     chs        81: #include <sys/kernel.h>
1.112     thorpej    82: #include <sys/mount.h>
1.109     thorpej    83: #include <sys/vnode.h>
1.331.2.3  skrll      84: #include <sys/filedesc.h>
1.244     yamt       85: #include <sys/lockdebug.h>
1.248     ad         86: #include <sys/atomic.h>
1.288     drochner   87: #include <sys/sysctl.h>
1.331.2.3  skrll      88: #ifndef __USER_VA0_IS_SAFE
1.288     drochner   89: #include <sys/kauth.h>
1.290     drochner   90: #include "opt_user_va0_disable_default.h"
1.288     drochner   91: #endif
1.1       mrg        92:
                     93: #include <sys/shm.h>
                     94:
                     95: #include <uvm/uvm.h>
1.271     yamt       96: #include <uvm/uvm_readahead.h>
1.21      jonathan   97:
1.270     pooka      98: #if defined(DDB) || defined(DEBUGPRINT)
1.21      jonathan   99: #include <uvm/uvm_ddb.h>
                    100: #endif
                    101:
1.318     matt      102: #ifdef UVMHIST
1.328     matt      103: static struct kern_history_ent maphistbuf[100];
                    104: UVMHIST_DEFINE(maphist) = UVMHIST_INITIALIZER(maphist, maphistbuf);
1.318     matt      105: #endif
                    106:
1.258     ad        107: #if !defined(UVMMAP_COUNTERS)
1.207     yamt      108:
                    109: #define        UVMMAP_EVCNT_DEFINE(name)       /* nothing */
                    110: #define UVMMAP_EVCNT_INCR(ev)          /* nothing */
                    111: #define UVMMAP_EVCNT_DECR(ev)          /* nothing */
                    112:
                    113: #else /* defined(UVMMAP_NOCOUNTERS) */
                    114:
1.228     yamt      115: #include <sys/evcnt.h>
1.207     yamt      116: #define        UVMMAP_EVCNT_DEFINE(name) \
                    117: struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \
                    118:     "uvmmap", #name); \
                    119: EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name);
                    120: #define        UVMMAP_EVCNT_INCR(ev)           uvmmap_evcnt_##ev.ev_count++
                    121: #define        UVMMAP_EVCNT_DECR(ev)           uvmmap_evcnt_##ev.ev_count--
                    122:
                    123: #endif /* defined(UVMMAP_NOCOUNTERS) */
                    124:
                    125: UVMMAP_EVCNT_DEFINE(ubackmerge)
                    126: UVMMAP_EVCNT_DEFINE(uforwmerge)
                    127: UVMMAP_EVCNT_DEFINE(ubimerge)
                    128: UVMMAP_EVCNT_DEFINE(unomerge)
                    129: UVMMAP_EVCNT_DEFINE(kbackmerge)
                    130: UVMMAP_EVCNT_DEFINE(kforwmerge)
                    131: UVMMAP_EVCNT_DEFINE(kbimerge)
                    132: UVMMAP_EVCNT_DEFINE(knomerge)
                    133: UVMMAP_EVCNT_DEFINE(map_call)
                    134: UVMMAP_EVCNT_DEFINE(mlk_call)
                    135: UVMMAP_EVCNT_DEFINE(mlk_hint)
1.263     matt      136: UVMMAP_EVCNT_DEFINE(mlk_list)
                    137: UVMMAP_EVCNT_DEFINE(mlk_tree)
                    138: UVMMAP_EVCNT_DEFINE(mlk_treeloop)
                    139: UVMMAP_EVCNT_DEFINE(mlk_listloop)
1.169     petrov    140:
1.87      enami     141: const char vmmapbsy[] = "vmmapbsy";
1.1       mrg       142:
                    143: /*
1.248     ad        144:  * cache for vmspace structures.
1.25      thorpej   145:  */
                    146:
1.248     ad        147: static struct pool_cache uvm_vmspace_cache;
1.25      thorpej   148:
1.26      thorpej   149: /*
1.248     ad        150:  * cache for dynamically-allocated map entries.
1.26      thorpej   151:  */
                    152:
1.248     ad        153: static struct pool_cache uvm_map_entry_cache;
1.130     thorpej   154:
1.40      thorpej   155: #ifdef PMAP_GROWKERNEL
                    156: /*
                    157:  * This global represents the end of the kernel virtual address
                    158:  * space.  If we want to exceed this, we must grow the kernel
                    159:  * virtual address space dynamically.
                    160:  *
                    161:  * Note, this variable is locked by kernel_map's lock.
                    162:  */
                    163: vaddr_t uvm_maxkaddr;
                    164: #endif
                    165:
1.288     drochner  166: #ifndef __USER_VA0_IS_SAFE
1.290     drochner  167: #ifndef __USER_VA0_DISABLE_DEFAULT
                    168: #define __USER_VA0_DISABLE_DEFAULT 1
1.288     drochner  169: #endif
1.290     drochner  170: #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */
                    171: #undef __USER_VA0_DISABLE_DEFAULT
                    172: #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT
1.288     drochner  173: #endif
1.331.2.6! skrll     174: int user_va0_disable = __USER_VA0_DISABLE_DEFAULT;
1.288     drochner  175: #endif
                    176:
1.25      thorpej   177: /*
1.1       mrg       178:  * macros
                    179:  */
                    180:
                    181: /*
1.194     yamt      182:  * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging
                    183:  */
1.311     para      184: extern struct vm_map *pager_map;
1.194     yamt      185:
                    186: #define        UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \
                    187:     prot, maxprot, inh, adv, wire) \
                    188:        ((ent)->etype == (type) && \
1.311     para      189:        (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \
1.194     yamt      190:        (ent)->object.uvm_obj == (uobj) && \
                    191:        (ent)->protection == (prot) && \
                    192:        (ent)->max_protection == (maxprot) && \
                    193:        (ent)->inheritance == (inh) && \
                    194:        (ent)->advice == (adv) && \
                    195:        (ent)->wired_count == (wire))
                    196:
                    197: /*
1.1       mrg       198:  * uvm_map_entry_link: insert entry into a map
                    199:  *
                    200:  * => map must be locked
                    201:  */
1.10      mrg       202: #define uvm_map_entry_link(map, after_where, entry) do { \
1.218     yamt      203:        uvm_mapent_check(entry); \
1.10      mrg       204:        (map)->nentries++; \
                    205:        (entry)->prev = (after_where); \
                    206:        (entry)->next = (after_where)->next; \
                    207:        (entry)->prev->next = (entry); \
                    208:        (entry)->next->prev = (entry); \
1.144     yamt      209:        uvm_rb_insert((map), (entry)); \
1.124     perry     210: } while (/*CONSTCOND*/ 0)
1.10      mrg       211:
1.1       mrg       212: /*
                    213:  * uvm_map_entry_unlink: remove entry from a map
                    214:  *
                    215:  * => map must be locked
                    216:  */
1.10      mrg       217: #define uvm_map_entry_unlink(map, entry) do { \
1.221     yamt      218:        KASSERT((entry) != (map)->first_free); \
                    219:        KASSERT((entry) != (map)->hint); \
1.218     yamt      220:        uvm_mapent_check(entry); \
1.10      mrg       221:        (map)->nentries--; \
                    222:        (entry)->next->prev = (entry)->prev; \
                    223:        (entry)->prev->next = (entry)->next; \
1.144     yamt      224:        uvm_rb_remove((map), (entry)); \
1.124     perry     225: } while (/*CONSTCOND*/ 0)
1.1       mrg       226:
                    227: /*
                    228:  * SAVE_HINT: saves the specified entry as the hint for future lookups.
                    229:  *
1.248     ad        230:  * => map need not be locked.
1.1       mrg       231:  */
1.248     ad        232: #define SAVE_HINT(map, check, value) do { \
1.258     ad        233:        if ((map)->hint == (check)) \
                    234:                (map)->hint = (value); \
1.124     perry     235: } while (/*CONSTCOND*/ 0)
1.1       mrg       236:
                    237: /*
1.221     yamt      238:  * clear_hints: ensure that hints don't point to the entry.
                    239:  *
                    240:  * => map must be write-locked.
                    241:  */
                    242: static void
                    243: clear_hints(struct vm_map *map, struct vm_map_entry *ent)
                    244: {
                    245:
                    246:        SAVE_HINT(map, ent, ent->prev);
                    247:        if (map->first_free == ent) {
                    248:                map->first_free = ent->prev;
                    249:        }
                    250: }
                    251:
                    252: /*
1.1       mrg       253:  * VM_MAP_RANGE_CHECK: check and correct range
                    254:  *
                    255:  * => map must at least be read locked
                    256:  */
                    257:
1.10      mrg       258: #define VM_MAP_RANGE_CHECK(map, start, end) do { \
1.139     enami     259:        if (start < vm_map_min(map))            \
                    260:                start = vm_map_min(map);        \
                    261:        if (end > vm_map_max(map))              \
                    262:                end = vm_map_max(map);          \
                    263:        if (start > end)                        \
                    264:                start = end;                    \
1.124     perry     265: } while (/*CONSTCOND*/ 0)
1.1       mrg       266:
                    267: /*
                    268:  * local prototypes
                    269:  */
                    270:
1.138     enami     271: static struct vm_map_entry *
                    272:                uvm_mapent_alloc(struct vm_map *, int);
                    273: static void    uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
                    274: static void    uvm_mapent_free(struct vm_map_entry *);
1.218     yamt      275: #if defined(DEBUG)
                    276: static void    _uvm_mapent_check(const struct vm_map_entry *, const char *,
                    277:                    int);
                    278: #define        uvm_mapent_check(map)   _uvm_mapent_check(map, __FILE__, __LINE__)
                    279: #else /* defined(DEBUG) */
                    280: #define        uvm_mapent_check(e)     /* nothing */
                    281: #endif /* defined(DEBUG) */
1.219     yamt      282:
1.138     enami     283: static void    uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
                    284: static void    uvm_map_reference_amap(struct vm_map_entry *, int);
1.140     enami     285: static int     uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int,
1.304     matt      286:                    int, struct vm_map_entry *);
1.138     enami     287: static void    uvm_map_unreference_amap(struct vm_map_entry *, int);
1.1       mrg       288:
1.222     yamt      289: int _uvm_map_sanity(struct vm_map *);
                    290: int _uvm_tree_sanity(struct vm_map *);
1.263     matt      291: static vsize_t uvm_rb_maxgap(const struct vm_map_entry *);
1.144     yamt      292:
1.263     matt      293: #define        ROOT_ENTRY(map)         ((struct vm_map_entry *)(map)->rb_tree.rbt_root)
                    294: #define        LEFT_ENTRY(entry)       ((struct vm_map_entry *)(entry)->rb_node.rb_left)
                    295: #define        RIGHT_ENTRY(entry)      ((struct vm_map_entry *)(entry)->rb_node.rb_right)
                    296: #define        PARENT_ENTRY(map, entry) \
                    297:        (ROOT_ENTRY(map) == (entry) \
1.293     rmind     298:            ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node))
1.263     matt      299:
1.331.2.3  skrll     300: /*
                    301:  * These get filled in if/when SYSVSHM shared memory code is loaded
                    302:  *
                    303:  * We do this with function pointers rather the #ifdef SYSVSHM so the
                    304:  * SYSVSHM code can be loaded and unloaded
                    305:  */
                    306: void (*uvm_shmexit)(struct vmspace *) = NULL;
                    307: void (*uvm_shmfork)(struct vmspace *, struct vmspace *) = NULL;
                    308:
1.263     matt      309: static int
1.293     rmind     310: uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey)
1.144     yamt      311: {
1.293     rmind     312:        const struct vm_map_entry *eparent = nparent;
                    313:        const struct vm_map_entry *ekey = nkey;
1.144     yamt      314:
1.263     matt      315:        KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end);
                    316:        KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end);
1.164     junyoung  317:
1.293     rmind     318:        if (eparent->start < ekey->start)
1.263     matt      319:                return -1;
1.293     rmind     320:        if (eparent->end >= ekey->start)
1.263     matt      321:                return 1;
                    322:        return 0;
1.144     yamt      323: }
                    324:
1.263     matt      325: static int
1.293     rmind     326: uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey)
1.144     yamt      327: {
1.293     rmind     328:        const struct vm_map_entry *eparent = nparent;
1.263     matt      329:        const vaddr_t va = *(const vaddr_t *) vkey;
1.144     yamt      330:
1.293     rmind     331:        if (eparent->start < va)
1.263     matt      332:                return -1;
1.293     rmind     333:        if (eparent->end >= va)
1.263     matt      334:                return 1;
                    335:        return 0;
1.144     yamt      336: }
                    337:
1.293     rmind     338: static const rb_tree_ops_t uvm_map_tree_ops = {
1.263     matt      339:        .rbto_compare_nodes = uvm_map_compare_nodes,
                    340:        .rbto_compare_key = uvm_map_compare_key,
1.293     rmind     341:        .rbto_node_offset = offsetof(struct vm_map_entry, rb_node),
                    342:        .rbto_context = NULL
1.263     matt      343: };
1.144     yamt      344:
1.293     rmind     345: /*
                    346:  * uvm_rb_gap: return the gap size between our entry and next entry.
                    347:  */
1.206     perry     348: static inline vsize_t
1.263     matt      349: uvm_rb_gap(const struct vm_map_entry *entry)
1.144     yamt      350: {
1.293     rmind     351:
1.144     yamt      352:        KASSERT(entry->next != NULL);
                    353:        return entry->next->start - entry->end;
                    354: }
                    355:
                    356: static vsize_t
1.263     matt      357: uvm_rb_maxgap(const struct vm_map_entry *entry)
1.144     yamt      358: {
1.263     matt      359:        struct vm_map_entry *child;
                    360:        vsize_t maxgap = entry->gap;
1.144     yamt      361:
1.263     matt      362:        /*
                    363:         * We need maxgap to be the largest gap of us or any of our
                    364:         * descendents.  Since each of our children's maxgap is the
                    365:         * cached value of their largest gap of themselves or their
                    366:         * descendents, we can just use that value and avoid recursing
                    367:         * down the tree to calculate it.
                    368:         */
                    369:        if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
                    370:                maxgap = child->maxgap;
1.144     yamt      371:
1.263     matt      372:        if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
                    373:                maxgap = child->maxgap;
1.144     yamt      374:
1.263     matt      375:        return maxgap;
1.144     yamt      376: }
                    377:
1.263     matt      378: static void
1.144     yamt      379: uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
                    380: {
1.263     matt      381:        struct vm_map_entry *parent;
                    382:
                    383:        KASSERT(entry->gap == uvm_rb_gap(entry));
                    384:        entry->maxgap = uvm_rb_maxgap(entry);
                    385:
                    386:        while ((parent = PARENT_ENTRY(map, entry)) != NULL) {
                    387:                struct vm_map_entry *brother;
                    388:                vsize_t maxgap = parent->gap;
1.293     rmind     389:                unsigned int which;
1.263     matt      390:
                    391:                KDASSERT(parent->gap == uvm_rb_gap(parent));
                    392:                if (maxgap < entry->maxgap)
                    393:                        maxgap = entry->maxgap;
                    394:                /*
1.293     rmind     395:                 * Since we work towards the root, we know entry's maxgap
                    396:                 * value is OK, but its brothers may now be out-of-date due
                    397:                 * to rebalancing.  So refresh it.
1.263     matt      398:                 */
1.293     rmind     399:                which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER;
                    400:                brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which];
1.263     matt      401:                if (brother != NULL) {
                    402:                        KDASSERT(brother->gap == uvm_rb_gap(brother));
                    403:                        brother->maxgap = uvm_rb_maxgap(brother);
                    404:                        if (maxgap < brother->maxgap)
                    405:                                maxgap = brother->maxgap;
                    406:                }
                    407:
                    408:                parent->maxgap = maxgap;
                    409:                entry = parent;
                    410:        }
1.144     yamt      411: }
                    412:
1.203     thorpej   413: static void
1.144     yamt      414: uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
                    415: {
1.326     martin    416:        struct vm_map_entry *ret __diagused;
1.293     rmind     417:
1.263     matt      418:        entry->gap = entry->maxgap = uvm_rb_gap(entry);
                    419:        if (entry->prev != &map->header)
                    420:                entry->prev->gap = uvm_rb_gap(entry->prev);
1.144     yamt      421:
1.293     rmind     422:        ret = rb_tree_insert_node(&map->rb_tree, entry);
                    423:        KASSERTMSG(ret == entry,
1.305     jym       424:            "uvm_rb_insert: map %p: duplicate entry %p", map, ret);
1.263     matt      425:
                    426:        /*
                    427:         * If the previous entry is not our immediate left child, then it's an
                    428:         * ancestor and will be fixed up on the way to the root.  We don't
                    429:         * have to check entry->prev against &map->header since &map->header
                    430:         * will never be in the tree.
                    431:         */
                    432:        uvm_rb_fixup(map,
                    433:            LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry);
1.144     yamt      434: }
                    435:
1.203     thorpej   436: static void
1.144     yamt      437: uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
                    438: {
1.263     matt      439:        struct vm_map_entry *prev_parent = NULL, *next_parent = NULL;
1.144     yamt      440:
1.263     matt      441:        /*
                    442:         * If we are removing an interior node, then an adjacent node will
                    443:         * be used to replace its position in the tree.  Therefore we will
                    444:         * need to fixup the tree starting at the parent of the replacement
                    445:         * node.  So record their parents for later use.
                    446:         */
1.144     yamt      447:        if (entry->prev != &map->header)
1.263     matt      448:                prev_parent = PARENT_ENTRY(map, entry->prev);
                    449:        if (entry->next != &map->header)
                    450:                next_parent = PARENT_ENTRY(map, entry->next);
                    451:
1.293     rmind     452:        rb_tree_remove_node(&map->rb_tree, entry);
1.263     matt      453:
                    454:        /*
                    455:         * If the previous node has a new parent, fixup the tree starting
                    456:         * at the previous node's old parent.
                    457:         */
                    458:        if (entry->prev != &map->header) {
                    459:                /*
                    460:                 * Update the previous entry's gap due to our absence.
                    461:                 */
                    462:                entry->prev->gap = uvm_rb_gap(entry->prev);
1.144     yamt      463:                uvm_rb_fixup(map, entry->prev);
1.263     matt      464:                if (prev_parent != NULL
                    465:                    && prev_parent != entry
                    466:                    && prev_parent != PARENT_ENTRY(map, entry->prev))
                    467:                        uvm_rb_fixup(map, prev_parent);
                    468:        }
                    469:
                    470:        /*
                    471:         * If the next node has a new parent, fixup the tree starting
                    472:         * at the next node's old parent.
                    473:         */
                    474:        if (entry->next != &map->header) {
                    475:                uvm_rb_fixup(map, entry->next);
                    476:                if (next_parent != NULL
                    477:                    && next_parent != entry
                    478:                    && next_parent != PARENT_ENTRY(map, entry->next))
                    479:                        uvm_rb_fixup(map, next_parent);
                    480:        }
1.144     yamt      481: }
                    482:
1.222     yamt      483: #if defined(DEBUG)
                    484: int uvm_debug_check_map = 0;
1.159     yamt      485: int uvm_debug_check_rbtree = 0;
1.222     yamt      486: #define uvm_map_check(map, name) \
                    487:        _uvm_map_check((map), (name), __FILE__, __LINE__)
                    488: static void
                    489: _uvm_map_check(struct vm_map *map, const char *name,
                    490:     const char *file, int line)
                    491: {
                    492:
                    493:        if ((uvm_debug_check_map && _uvm_map_sanity(map)) ||
                    494:            (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) {
                    495:                panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)",
                    496:                    name, map, file, line);
                    497:        }
                    498: }
                    499: #else /* defined(DEBUG) */
                    500: #define uvm_map_check(map, name)       /* nothing */
                    501: #endif /* defined(DEBUG) */
                    502:
                    503: #if defined(DEBUG) || defined(DDB)
                    504: int
                    505: _uvm_map_sanity(struct vm_map *map)
                    506: {
1.234     thorpej   507:        bool first_free_found = false;
                    508:        bool hint_found = false;
1.222     yamt      509:        const struct vm_map_entry *e;
1.272     yamt      510:        struct vm_map_entry *hint = map->hint;
1.222     yamt      511:
1.331.2.2  skrll     512:        e = &map->header;
1.222     yamt      513:        for (;;) {
                    514:                if (map->first_free == e) {
1.234     thorpej   515:                        first_free_found = true;
1.222     yamt      516:                } else if (!first_free_found && e->next->start > e->end) {
                    517:                        printf("first_free %p should be %p\n",
                    518:                            map->first_free, e);
                    519:                        return -1;
                    520:                }
1.272     yamt      521:                if (hint == e) {
1.234     thorpej   522:                        hint_found = true;
1.222     yamt      523:                }
                    524:
                    525:                e = e->next;
                    526:                if (e == &map->header) {
                    527:                        break;
                    528:                }
                    529:        }
                    530:        if (!first_free_found) {
                    531:                printf("stale first_free\n");
                    532:                return -1;
                    533:        }
                    534:        if (!hint_found) {
                    535:                printf("stale hint\n");
                    536:                return -1;
                    537:        }
                    538:        return 0;
                    539: }
1.144     yamt      540:
                    541: int
1.222     yamt      542: _uvm_tree_sanity(struct vm_map *map)
1.144     yamt      543: {
                    544:        struct vm_map_entry *tmp, *trtmp;
                    545:        int n = 0, i = 1;
                    546:
1.263     matt      547:        for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
                    548:                if (tmp->gap != uvm_rb_gap(tmp)) {
1.331.2.2  skrll     549:                        printf("%d/%d gap %#lx != %#lx %s\n",
1.222     yamt      550:                            n + 1, map->nentries,
1.263     matt      551:                            (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp),
1.144     yamt      552:                            tmp->next == &map->header ? "(last)" : "");
                    553:                        goto error;
                    554:                }
1.263     matt      555:                /*
                    556:                 * If any entries are out of order, tmp->gap will be unsigned
                    557:                 * and will likely exceed the size of the map.
                    558:                 */
1.273     yamt      559:                if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) {
                    560:                        printf("too large gap %zu\n", (size_t)tmp->gap);
                    561:                        goto error;
                    562:                }
1.263     matt      563:                n++;
                    564:        }
                    565:
                    566:        if (n != map->nentries) {
                    567:                printf("nentries: %d vs %d\n", n, map->nentries);
                    568:                goto error;
1.144     yamt      569:        }
1.263     matt      570:
1.144     yamt      571:        trtmp = NULL;
1.263     matt      572:        for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
                    573:                if (tmp->maxgap != uvm_rb_maxgap(tmp)) {
1.331.2.2  skrll     574:                        printf("maxgap %#lx != %#lx\n",
1.263     matt      575:                            (ulong)tmp->maxgap,
                    576:                            (ulong)uvm_rb_maxgap(tmp));
1.144     yamt      577:                        goto error;
                    578:                }
                    579:                if (trtmp != NULL && trtmp->start >= tmp->start) {
1.285     matt      580:                        printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n",
1.222     yamt      581:                            trtmp->start, tmp->start);
1.144     yamt      582:                        goto error;
                    583:                }
                    584:
                    585:                trtmp = tmp;
                    586:        }
                    587:
1.263     matt      588:        for (tmp = map->header.next; tmp != &map->header;
1.144     yamt      589:            tmp = tmp->next, i++) {
1.293     rmind     590:                trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT);
1.263     matt      591:                if (trtmp == NULL)
                    592:                        trtmp = &map->header;
                    593:                if (tmp->prev != trtmp) {
                    594:                        printf("lookup: %d: %p->prev=%p: %p\n",
                    595:                            i, tmp, tmp->prev, trtmp);
                    596:                        goto error;
                    597:                }
1.293     rmind     598:                trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT);
1.263     matt      599:                if (trtmp == NULL)
                    600:                        trtmp = &map->header;
                    601:                if (tmp->next != trtmp) {
                    602:                        printf("lookup: %d: %p->next=%p: %p\n",
                    603:                            i, tmp, tmp->next, trtmp);
                    604:                        goto error;
                    605:                }
1.293     rmind     606:                trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start);
1.144     yamt      607:                if (trtmp != tmp) {
1.222     yamt      608:                        printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp,
1.263     matt      609:                            PARENT_ENTRY(map, tmp));
1.144     yamt      610:                        goto error;
                    611:                }
                    612:        }
                    613:
                    614:        return (0);
                    615:  error:
                    616:        return (-1);
                    617: }
1.222     yamt      618: #endif /* defined(DEBUG) || defined(DDB) */
1.144     yamt      619:
1.1       mrg       620: /*
1.238     ad        621:  * vm_map_lock: acquire an exclusive (write) lock on a map.
                    622:  *
                    623:  * => The locking protocol provides for guaranteed upgrade from shared ->
                    624:  *    exclusive by whichever thread currently has the map marked busy.
                    625:  *    See "LOCKING PROTOCOL NOTES" in uvm_map.h.  This is horrible; among
                    626:  *    other problems, it defeats any fairness guarantees provided by RW
                    627:  *    locks.
                    628:  */
                    629:
                    630: void
                    631: vm_map_lock(struct vm_map *map)
                    632: {
                    633:
                    634:        for (;;) {
                    635:                rw_enter(&map->lock, RW_WRITER);
1.314     rmind     636:                if (map->busy == NULL || map->busy == curlwp) {
1.249     yamt      637:                        break;
1.314     rmind     638:                }
1.238     ad        639:                mutex_enter(&map->misc_lock);
                    640:                rw_exit(&map->lock);
1.314     rmind     641:                if (map->busy != NULL) {
1.248     ad        642:                        cv_wait(&map->cv, &map->misc_lock);
1.314     rmind     643:                }
1.238     ad        644:                mutex_exit(&map->misc_lock);
                    645:        }
                    646:        map->timestamp++;
                    647: }
                    648:
                    649: /*
                    650:  * vm_map_lock_try: try to lock a map, failing if it is already locked.
                    651:  */
                    652:
                    653: bool
                    654: vm_map_lock_try(struct vm_map *map)
                    655: {
                    656:
1.314     rmind     657:        if (!rw_tryenter(&map->lock, RW_WRITER)) {
1.238     ad        658:                return false;
1.314     rmind     659:        }
1.238     ad        660:        if (map->busy != NULL) {
                    661:                rw_exit(&map->lock);
                    662:                return false;
                    663:        }
                    664:        map->timestamp++;
                    665:        return true;
                    666: }
                    667:
                    668: /*
                    669:  * vm_map_unlock: release an exclusive lock on a map.
                    670:  */
                    671:
                    672: void
                    673: vm_map_unlock(struct vm_map *map)
                    674: {
                    675:
1.314     rmind     676:        KASSERT(rw_write_held(&map->lock));
                    677:        KASSERT(map->busy == NULL || map->busy == curlwp);
                    678:        rw_exit(&map->lock);
1.238     ad        679: }
                    680:
                    681: /*
                    682:  * vm_map_unbusy: mark the map as unbusy, and wake any waiters that
                    683:  *     want an exclusive lock.
                    684:  */
                    685:
                    686: void
                    687: vm_map_unbusy(struct vm_map *map)
                    688: {
                    689:
                    690:        KASSERT(map->busy == curlwp);
                    691:
                    692:        /*
                    693:         * Safe to clear 'busy' and 'waiters' with only a read lock held:
                    694:         *
                    695:         * o they can only be set with a write lock held
                    696:         * o writers are blocked out with a read or write hold
                    697:         * o at any time, only one thread owns the set of values
                    698:         */
1.248     ad        699:        mutex_enter(&map->misc_lock);
1.238     ad        700:        map->busy = NULL;
                    701:        cv_broadcast(&map->cv);
                    702:        mutex_exit(&map->misc_lock);
                    703: }
                    704:
                    705: /*
1.248     ad        706:  * vm_map_lock_read: acquire a shared (read) lock on a map.
                    707:  */
                    708:
                    709: void
                    710: vm_map_lock_read(struct vm_map *map)
                    711: {
                    712:
                    713:        rw_enter(&map->lock, RW_READER);
                    714: }
                    715:
                    716: /*
                    717:  * vm_map_unlock_read: release a shared lock on a map.
                    718:  */
1.314     rmind     719:
1.248     ad        720: void
                    721: vm_map_unlock_read(struct vm_map *map)
                    722: {
                    723:
                    724:        rw_exit(&map->lock);
                    725: }
                    726:
                    727: /*
                    728:  * vm_map_busy: mark a map as busy.
                    729:  *
                    730:  * => the caller must hold the map write locked
                    731:  */
                    732:
                    733: void
                    734: vm_map_busy(struct vm_map *map)
                    735: {
                    736:
                    737:        KASSERT(rw_write_held(&map->lock));
                    738:        KASSERT(map->busy == NULL);
                    739:
                    740:        map->busy = curlwp;
                    741: }
                    742:
                    743: /*
                    744:  * vm_map_locked_p: return true if the map is write locked.
1.269     yamt      745:  *
                    746:  * => only for debug purposes like KASSERTs.
                    747:  * => should not be used to verify that a map is not locked.
1.248     ad        748:  */
                    749:
                    750: bool
                    751: vm_map_locked_p(struct vm_map *map)
                    752: {
                    753:
1.314     rmind     754:        return rw_write_held(&map->lock);
1.248     ad        755: }
                    756:
                    757: /*
1.1       mrg       758:  * uvm_mapent_alloc: allocate a map entry
                    759:  */
                    760:
1.203     thorpej   761: static struct vm_map_entry *
1.138     enami     762: uvm_mapent_alloc(struct vm_map *map, int flags)
1.10      mrg       763: {
1.99      chs       764:        struct vm_map_entry *me;
1.127     thorpej   765:        int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK;
1.104     chs       766:        UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist);
1.1       mrg       767:
1.311     para      768:        me = pool_cache_get(&uvm_map_entry_cache, pflags);
1.314     rmind     769:        if (__predict_false(me == NULL)) {
1.311     para      770:                return NULL;
1.314     rmind     771:        }
1.311     para      772:        me->flags = 0;
                    773:
1.331.2.2  skrll     774:        UVMHIST_LOG(maphist, "<- new entry=%p [kentry=%d]", me,
1.314     rmind     775:            (map == kernel_map), 0, 0);
                    776:        return me;
1.1       mrg       777: }
                    778:
                    779: /*
                    780:  * uvm_mapent_free: free map entry
                    781:  */
                    782:
1.203     thorpej   783: static void
1.138     enami     784: uvm_mapent_free(struct vm_map_entry *me)
1.1       mrg       785: {
1.104     chs       786:        UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist);
                    787:
1.331.2.2  skrll     788:        UVMHIST_LOG(maphist,"<- freeing map entry=%p [flags=%d]",
1.1       mrg       789:                me, me->flags, 0, 0);
1.311     para      790:        pool_cache_put(&uvm_map_entry_cache, me);
1.1       mrg       791: }
                    792:
                    793: /*
                    794:  * uvm_mapent_copy: copy a map entry, preserving flags
                    795:  */
                    796:
1.206     perry     797: static inline void
1.138     enami     798: uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
1.10      mrg       799: {
1.139     enami     800:
1.106     chs       801:        memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -
1.139     enami     802:            ((char *)src));
1.1       mrg       803: }
                    804:
1.218     yamt      805: #if defined(DEBUG)
                    806: static void
                    807: _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line)
                    808: {
                    809:
                    810:        if (entry->start >= entry->end) {
                    811:                goto bad;
                    812:        }
                    813:        if (UVM_ET_ISOBJ(entry)) {
                    814:                if (entry->object.uvm_obj == NULL) {
                    815:                        goto bad;
                    816:                }
                    817:        } else if (UVM_ET_ISSUBMAP(entry)) {
                    818:                if (entry->object.sub_map == NULL) {
                    819:                        goto bad;
                    820:                }
                    821:        } else {
                    822:                if (entry->object.uvm_obj != NULL ||
                    823:                    entry->object.sub_map != NULL) {
                    824:                        goto bad;
                    825:                }
                    826:        }
                    827:        if (!UVM_ET_ISOBJ(entry)) {
                    828:                if (entry->offset != 0) {
                    829:                        goto bad;
                    830:                }
                    831:        }
                    832:
                    833:        return;
                    834:
                    835: bad:
                    836:        panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line);
                    837: }
                    838: #endif /* defined(DEBUG) */
                    839:
1.1       mrg       840: /*
                    841:  * uvm_map_entry_unwire: unwire a map entry
                    842:  *
                    843:  * => map should be locked by caller
                    844:  */
                    845:
1.206     perry     846: static inline void
1.138     enami     847: uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
1.10      mrg       848: {
1.139     enami     849:
1.10      mrg       850:        entry->wired_count = 0;
1.57      thorpej   851:        uvm_fault_unwire_locked(map, entry->start, entry->end);
1.1       mrg       852: }
                    853:
1.85      chs       854:
                    855: /*
                    856:  * wrapper for calling amap_ref()
                    857:  */
1.206     perry     858: static inline void
1.138     enami     859: uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
1.85      chs       860: {
1.139     enami     861:
1.99      chs       862:        amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
1.139     enami     863:            (entry->end - entry->start) >> PAGE_SHIFT, flags);
1.85      chs       864: }
                    865:
                    866:
                    867: /*
1.98      chs       868:  * wrapper for calling amap_unref()
1.85      chs       869:  */
1.206     perry     870: static inline void
1.138     enami     871: uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
1.85      chs       872: {
1.139     enami     873:
1.99      chs       874:        amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
1.139     enami     875:            (entry->end - entry->start) >> PAGE_SHIFT, flags);
1.85      chs       876: }
                    877:
                    878:
1.1       mrg       879: /*
1.248     ad        880:  * uvm_map_init: init mapping system at boot time.
1.1       mrg       881:  */
                    882:
1.10      mrg       883: void
1.138     enami     884: uvm_map_init(void)
1.1       mrg       885: {
                    886: #if defined(UVMHIST)
1.297     mrg       887:        static struct kern_history_ent pdhistbuf[100];
1.1       mrg       888: #endif
1.10      mrg       889:
                    890:        /*
                    891:         * first, init logging system.
                    892:         */
1.1       mrg       893:
1.10      mrg       894:        UVMHIST_FUNC("uvm_map_init");
1.328     matt      895:        UVMHIST_LINK_STATIC(maphist);
1.10      mrg       896:        UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
                    897:        UVMHIST_CALLED(maphist);
                    898:        UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
                    899:
                    900:        /*
1.174     yamt      901:         * initialize the global lock for kernel map entry.
1.10      mrg       902:         */
                    903:
1.238     ad        904:        mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM);
1.311     para      905: }
1.248     ad        906:
1.311     para      907: /*
                    908:  * uvm_map_init_caches: init mapping system caches.
                    909:  */
                    910: void
                    911: uvm_map_init_caches(void)
1.312     rmind     912: {
1.248     ad        913:        /*
                    914:         * initialize caches.
                    915:         */
                    916:
                    917:        pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry),
                    918:            0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL);
                    919:        pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace),
                    920:            0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL);
1.1       mrg       921: }
                    922:
                    923: /*
                    924:  * clippers
                    925:  */
                    926:
                    927: /*
1.218     yamt      928:  * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy.
                    929:  */
                    930:
                    931: static void
                    932: uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2,
                    933:     vaddr_t splitat)
                    934: {
                    935:        vaddr_t adj;
                    936:
                    937:        KASSERT(entry1->start < splitat);
                    938:        KASSERT(splitat < entry1->end);
                    939:
                    940:        adj = splitat - entry1->start;
                    941:        entry1->end = entry2->start = splitat;
                    942:
                    943:        if (entry1->aref.ar_amap) {
                    944:                amap_splitref(&entry1->aref, &entry2->aref, adj);
                    945:        }
                    946:        if (UVM_ET_ISSUBMAP(entry1)) {
                    947:                /* ... unlikely to happen, but play it safe */
                    948:                 uvm_map_reference(entry1->object.sub_map);
                    949:        } else if (UVM_ET_ISOBJ(entry1)) {
                    950:                KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */
                    951:                entry2->offset += adj;
                    952:                if (entry1->object.uvm_obj->pgops &&
                    953:                    entry1->object.uvm_obj->pgops->pgo_reference)
                    954:                        entry1->object.uvm_obj->pgops->pgo_reference(
                    955:                            entry1->object.uvm_obj);
                    956:        }
                    957: }
                    958:
                    959: /*
1.1       mrg       960:  * uvm_map_clip_start: ensure that the entry begins at or after
                    961:  *     the starting address, if it doesn't we split the entry.
1.98      chs       962:  *
1.1       mrg       963:  * => caller should use UVM_MAP_CLIP_START macro rather than calling
                    964:  *    this directly
                    965:  * => map must be locked by caller
                    966:  */
                    967:
1.99      chs       968: void
1.138     enami     969: uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
1.311     para      970:     vaddr_t start)
1.1       mrg       971: {
1.99      chs       972:        struct vm_map_entry *new_entry;
1.1       mrg       973:
                    974:        /* uvm_map_simplify_entry(map, entry); */ /* XXX */
                    975:
1.222     yamt      976:        uvm_map_check(map, "clip_start entry");
1.218     yamt      977:        uvm_mapent_check(entry);
1.144     yamt      978:
1.10      mrg       979:        /*
                    980:         * Split off the front portion.  note that we must insert the new
                    981:         * entry BEFORE this one, so that this entry has the specified
1.1       mrg       982:         * starting address.
1.10      mrg       983:         */
1.311     para      984:        new_entry = uvm_mapent_alloc(map, 0);
1.1       mrg       985:        uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
1.218     yamt      986:        uvm_mapent_splitadj(new_entry, entry, start);
1.10      mrg       987:        uvm_map_entry_link(map, entry->prev, new_entry);
1.85      chs       988:
1.222     yamt      989:        uvm_map_check(map, "clip_start leave");
1.1       mrg       990: }
                    991:
                    992: /*
                    993:  * uvm_map_clip_end: ensure that the entry ends at or before
                    994:  *     the ending address, if it does't we split the reference
1.98      chs       995:  *
1.1       mrg       996:  * => caller should use UVM_MAP_CLIP_END macro rather than calling
                    997:  *    this directly
                    998:  * => map must be locked by caller
                    999:  */
                   1000:
1.10      mrg      1001: void
1.311     para     1002: uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)
1.1       mrg      1003: {
1.218     yamt     1004:        struct vm_map_entry *new_entry;
1.1       mrg      1005:
1.222     yamt     1006:        uvm_map_check(map, "clip_end entry");
1.218     yamt     1007:        uvm_mapent_check(entry);
1.174     yamt     1008:
1.1       mrg      1009:        /*
                   1010:         *      Create a new entry and insert it
                   1011:         *      AFTER the specified entry
                   1012:         */
1.311     para     1013:        new_entry = uvm_mapent_alloc(map, 0);
1.1       mrg      1014:        uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
1.218     yamt     1015:        uvm_mapent_splitadj(entry, new_entry, end);
1.1       mrg      1016:        uvm_map_entry_link(map, entry, new_entry);
                   1017:
1.222     yamt     1018:        uvm_map_check(map, "clip_end leave");
1.1       mrg      1019: }
                   1020:
                   1021: /*
                   1022:  *   M A P   -   m a i n   e n t r y   p o i n t
                   1023:  */
                   1024: /*
                   1025:  * uvm_map: establish a valid mapping in a map
                   1026:  *
                   1027:  * => assume startp is page aligned.
                   1028:  * => assume size is a multiple of PAGE_SIZE.
                   1029:  * => assume sys_mmap provides enough of a "hint" to have us skip
                   1030:  *     over text/data/bss area.
                   1031:  * => map must be unlocked (we will lock it)
                   1032:  * => <uobj,uoffset> value meanings (4 cases):
1.139     enami    1033:  *      [1] <NULL,uoffset>             == uoffset is a hint for PMAP_PREFER
1.1       mrg      1034:  *      [2] <NULL,UVM_UNKNOWN_OFFSET>  == don't PMAP_PREFER
                   1035:  *      [3] <uobj,uoffset>             == normal mapping
                   1036:  *      [4] <uobj,UVM_UNKNOWN_OFFSET>  == uvm_map finds offset based on VA
1.98      chs      1037:  *
1.1       mrg      1038:  *    case [4] is for kernel mappings where we don't know the offset until
1.8       chuck    1039:  *    we've found a virtual address.   note that kernel object offsets are
                   1040:  *    always relative to vm_map_min(kernel_map).
1.81      thorpej  1041:  *
1.165     yamt     1042:  * => if `align' is non-zero, we align the virtual address to the specified
                   1043:  *     alignment.
                   1044:  *     this is provided as a mechanism for large pages.
1.81      thorpej  1045:  *
1.1       mrg      1046:  * => XXXCDC: need way to map in external amap?
                   1047:  */
                   1048:
1.10      mrg      1049: int
1.138     enami    1050: uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size,
                   1051:     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
1.10      mrg      1052: {
1.174     yamt     1053:        struct uvm_map_args args;
                   1054:        struct vm_map_entry *new_entry;
                   1055:        int error;
                   1056:
1.187     yamt     1057:        KASSERT((size & PAGE_MASK) == 0);
1.174     yamt     1058:
1.288     drochner 1059: #ifndef __USER_VA0_IS_SAFE
                   1060:        if ((flags & UVM_FLAG_FIXED) && *startp == 0 &&
1.290     drochner 1061:            !VM_MAP_IS_KERNEL(map) && user_va0_disable)
1.288     drochner 1062:                return EACCES;
                   1063: #endif
                   1064:
1.174     yamt     1065:        /*
                   1066:         * for pager_map, allocate the new entry first to avoid sleeping
                   1067:         * for memory while we have the map locked.
                   1068:         */
                   1069:
                   1070:        new_entry = NULL;
1.311     para     1071:        if (map == pager_map) {
1.174     yamt     1072:                new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT));
                   1073:                if (__predict_false(new_entry == NULL))
                   1074:                        return ENOMEM;
                   1075:        }
                   1076:        if (map == pager_map)
                   1077:                flags |= UVM_FLAG_NOMERGE;
                   1078:
                   1079:        error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align,
                   1080:            flags, &args);
                   1081:        if (!error) {
                   1082:                error = uvm_map_enter(map, &args, new_entry);
                   1083:                *startp = args.uma_start;
1.189     yamt     1084:        } else if (new_entry) {
                   1085:                uvm_mapent_free(new_entry);
1.174     yamt     1086:        }
                   1087:
1.187     yamt     1088: #if defined(DEBUG)
1.331.2.1  skrll    1089:        if (!error && VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) {
1.264     ad       1090:                uvm_km_check_empty(map, *startp, *startp + size);
1.187     yamt     1091:        }
                   1092: #endif /* defined(DEBUG) */
                   1093:
1.174     yamt     1094:        return error;
                   1095: }
                   1096:
1.307     yamt     1097: /*
                   1098:  * uvm_map_prepare:
                   1099:  *
                   1100:  * called with map unlocked.
                   1101:  * on success, returns the map locked.
                   1102:  */
                   1103:
1.174     yamt     1104: int
                   1105: uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size,
                   1106:     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
                   1107:     struct uvm_map_args *args)
                   1108: {
                   1109:        struct vm_map_entry *prev_entry;
                   1110:        vm_prot_t prot = UVM_PROTECTION(flags);
                   1111:        vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
                   1112:
                   1113:        UVMHIST_FUNC("uvm_map_prepare");
1.10      mrg      1114:        UVMHIST_CALLED(maphist);
1.1       mrg      1115:
1.331.2.2  skrll    1116:        UVMHIST_LOG(maphist, "(map=%p, start=%#lx, size=%lu, flags=%#x)",
1.174     yamt     1117:            map, start, size, flags);
1.331.2.2  skrll    1118:        UVMHIST_LOG(maphist, "  uobj/offset %p/%ld", uobj, uoffset,0,0);
1.107     chs      1119:
                   1120:        /*
                   1121:         * detect a popular device driver bug.
                   1122:         */
                   1123:
1.314     rmind    1124:        KASSERT(doing_shutdown || curlwp != NULL);
1.1       mrg      1125:
1.10      mrg      1126:        /*
1.144     yamt     1127:         * zero-sized mapping doesn't make any sense.
                   1128:         */
                   1129:        KASSERT(size > 0);
                   1130:
1.180     yamt     1131:        KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0);
                   1132:
1.222     yamt     1133:        uvm_map_check(map, "map entry");
1.144     yamt     1134:
                   1135:        /*
1.106     chs      1136:         * check sanity of protection code
1.10      mrg      1137:         */
1.1       mrg      1138:
1.10      mrg      1139:        if ((prot & maxprot) != prot) {
1.331.2.2  skrll    1140:                UVMHIST_LOG(maphist, "<- prot. failure:  prot=%#x, max=%#x",
1.10      mrg      1141:                prot, maxprot,0,0);
1.94      chs      1142:                return EACCES;
1.10      mrg      1143:        }
1.1       mrg      1144:
1.10      mrg      1145:        /*
1.106     chs      1146:         * figure out where to put new VM range
1.10      mrg      1147:         */
1.180     yamt     1148: retry:
1.234     thorpej  1149:        if (vm_map_lock_try(map) == false) {
1.314     rmind    1150:                if ((flags & UVM_FLAG_TRYLOCK) != 0) {
1.94      chs      1151:                        return EAGAIN;
1.106     chs      1152:                }
1.10      mrg      1153:                vm_map_lock(map); /* could sleep here */
                   1154:        }
1.226     yamt     1155:        prev_entry = uvm_map_findspace(map, start, size, &start,
                   1156:            uobj, uoffset, align, flags);
                   1157:        if (prev_entry == NULL) {
1.180     yamt     1158:                unsigned int timestamp;
                   1159:
                   1160:                timestamp = map->timestamp;
1.331.2.2  skrll    1161:                UVMHIST_LOG(maphist,"waiting va timestamp=%#x",
1.180     yamt     1162:                            timestamp,0,0,0);
                   1163:                map->flags |= VM_MAP_WANTVA;
1.10      mrg      1164:                vm_map_unlock(map);
1.180     yamt     1165:
                   1166:                /*
1.226     yamt     1167:                 * try to reclaim kva and wait until someone does unmap.
1.238     ad       1168:                 * fragile locking here, so we awaken every second to
                   1169:                 * recheck the condition.
1.180     yamt     1170:                 */
                   1171:
1.238     ad       1172:                mutex_enter(&map->misc_lock);
1.180     yamt     1173:                while ((map->flags & VM_MAP_WANTVA) != 0 &&
                   1174:                   map->timestamp == timestamp) {
1.226     yamt     1175:                        if ((flags & UVM_FLAG_WAITVA) == 0) {
1.238     ad       1176:                                mutex_exit(&map->misc_lock);
1.226     yamt     1177:                                UVMHIST_LOG(maphist,
                   1178:                                    "<- uvm_map_findspace failed!", 0,0,0,0);
                   1179:                                return ENOMEM;
                   1180:                        } else {
1.238     ad       1181:                                cv_timedwait(&map->cv, &map->misc_lock, hz);
1.226     yamt     1182:                        }
1.180     yamt     1183:                }
1.238     ad       1184:                mutex_exit(&map->misc_lock);
1.180     yamt     1185:                goto retry;
1.10      mrg      1186:        }
1.1       mrg      1187:
1.40      thorpej  1188: #ifdef PMAP_GROWKERNEL
1.152     simonb   1189:        /*
                   1190:         * If the kernel pmap can't map the requested space,
                   1191:         * then allocate more resources for it.
                   1192:         */
1.229     yamt     1193:        if (map == kernel_map && uvm_maxkaddr < (start + size))
                   1194:                uvm_maxkaddr = pmap_growkernel(start + size);
1.10      mrg      1195: #endif
                   1196:
1.207     yamt     1197:        UVMMAP_EVCNT_INCR(map_call);
1.10      mrg      1198:
                   1199:        /*
                   1200:         * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
1.98      chs      1201:         * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in
                   1202:         * either case we want to zero it  before storing it in the map entry
1.10      mrg      1203:         * (because it looks strange and confusing when debugging...)
1.98      chs      1204:         *
                   1205:         * if uobj is not null
1.10      mrg      1206:         *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
                   1207:         *      and we do not need to change uoffset.
                   1208:         *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
                   1209:         *      now (based on the starting address of the map).   this case is
                   1210:         *      for kernel object mappings where we don't know the offset until
                   1211:         *      the virtual address is found (with uvm_map_findspace).   the
                   1212:         *      offset is the distance we are from the start of the map.
                   1213:         */
                   1214:
                   1215:        if (uobj == NULL) {
                   1216:                uoffset = 0;
                   1217:        } else {
                   1218:                if (uoffset == UVM_UNKNOWN_OFFSET) {
1.85      chs      1219:                        KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1.174     yamt     1220:                        uoffset = start - vm_map_min(kernel_map);
1.10      mrg      1221:                }
                   1222:        }
                   1223:
1.174     yamt     1224:        args->uma_flags = flags;
                   1225:        args->uma_prev = prev_entry;
                   1226:        args->uma_start = start;
                   1227:        args->uma_size = size;
                   1228:        args->uma_uobj = uobj;
                   1229:        args->uma_uoffset = uoffset;
                   1230:
1.276     matt     1231:        UVMHIST_LOG(maphist, "<- done!", 0,0,0,0);
1.174     yamt     1232:        return 0;
                   1233: }
                   1234:
1.307     yamt     1235: /*
                   1236:  * uvm_map_enter:
                   1237:  *
                   1238:  * called with map locked.
                   1239:  * unlock the map before returning.
                   1240:  */
                   1241:
1.174     yamt     1242: int
                   1243: uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args,
                   1244:     struct vm_map_entry *new_entry)
                   1245: {
                   1246:        struct vm_map_entry *prev_entry = args->uma_prev;
                   1247:        struct vm_map_entry *dead = NULL;
                   1248:
                   1249:        const uvm_flag_t flags = args->uma_flags;
                   1250:        const vm_prot_t prot = UVM_PROTECTION(flags);
                   1251:        const vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
                   1252:        const vm_inherit_t inherit = UVM_INHERIT(flags);
                   1253:        const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ?
                   1254:            AMAP_EXTEND_NOWAIT : 0;
                   1255:        const int advice = UVM_ADVICE(flags);
                   1256:
                   1257:        vaddr_t start = args->uma_start;
                   1258:        vsize_t size = args->uma_size;
                   1259:        struct uvm_object *uobj = args->uma_uobj;
                   1260:        voff_t uoffset = args->uma_uoffset;
                   1261:
                   1262:        const int kmap = (vm_map_pmap(map) == pmap_kernel());
                   1263:        int merged = 0;
                   1264:        int error;
1.176     yamt     1265:        int newetype;
1.174     yamt     1266:
                   1267:        UVMHIST_FUNC("uvm_map_enter");
                   1268:        UVMHIST_CALLED(maphist);
                   1269:
1.331.2.2  skrll    1270:        UVMHIST_LOG(maphist, "(map=%p, start=%#lx, size=%lu, flags=%#x)",
1.174     yamt     1271:            map, start, size, flags);
1.331.2.2  skrll    1272:        UVMHIST_LOG(maphist, "  uobj/offset %p/%ld", uobj, uoffset,0,0);
1.174     yamt     1273:
1.221     yamt     1274:        KASSERT(map->hint == prev_entry); /* bimerge case assumes this */
1.307     yamt     1275:        KASSERT(vm_map_locked_p(map));
1.221     yamt     1276:
1.176     yamt     1277:        if (uobj)
                   1278:                newetype = UVM_ET_OBJ;
                   1279:        else
                   1280:                newetype = 0;
                   1281:
                   1282:        if (flags & UVM_FLAG_COPYONW) {
                   1283:                newetype |= UVM_ET_COPYONWRITE;
                   1284:                if ((flags & UVM_FLAG_OVERLAY) == 0)
                   1285:                        newetype |= UVM_ET_NEEDSCOPY;
                   1286:        }
                   1287:
1.10      mrg      1288:        /*
1.106     chs      1289:         * try and insert in map by extending previous entry, if possible.
1.10      mrg      1290:         * XXX: we don't try and pull back the next entry.   might be useful
                   1291:         * for a stack, but we are currently allocating our stack in advance.
                   1292:         */
                   1293:
1.121     atatat   1294:        if (flags & UVM_FLAG_NOMERGE)
                   1295:                goto nomerge;
                   1296:
1.194     yamt     1297:        if (prev_entry->end == start &&
1.121     atatat   1298:            prev_entry != &map->header &&
1.312     rmind    1299:            UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0,
1.194     yamt     1300:            prot, maxprot, inherit, advice, 0)) {
1.161     matt     1301:
1.10      mrg      1302:                if (uobj && prev_entry->offset +
                   1303:                    (prev_entry->end - prev_entry->start) != uoffset)
1.121     atatat   1304:                        goto forwardmerge;
1.10      mrg      1305:
                   1306:                /*
1.98      chs      1307:                 * can't extend a shared amap.  note: no need to lock amap to
1.34      chuck    1308:                 * look at refs since we don't care about its exact value.
1.10      mrg      1309:                 * if it is one (i.e. we have only reference) it will stay there
                   1310:                 */
1.85      chs      1311:
1.10      mrg      1312:                if (prev_entry->aref.ar_amap &&
1.34      chuck    1313:                    amap_refs(prev_entry->aref.ar_amap) != 1) {
1.121     atatat   1314:                        goto forwardmerge;
1.10      mrg      1315:                }
1.85      chs      1316:
1.119     chs      1317:                if (prev_entry->aref.ar_amap) {
1.139     enami    1318:                        error = amap_extend(prev_entry, size,
1.126     bouyer   1319:                            amapwaitflag | AMAP_EXTEND_FORWARDS);
1.174     yamt     1320:                        if (error)
1.191     yamt     1321:                                goto nomerge;
1.119     chs      1322:                }
1.10      mrg      1323:
1.258     ad       1324:                if (kmap) {
1.207     yamt     1325:                        UVMMAP_EVCNT_INCR(kbackmerge);
1.258     ad       1326:                } else {
1.207     yamt     1327:                        UVMMAP_EVCNT_INCR(ubackmerge);
1.258     ad       1328:                }
1.10      mrg      1329:                UVMHIST_LOG(maphist,"  starting back merge", 0, 0, 0, 0);
                   1330:
                   1331:                /*
                   1332:                 * drop our reference to uobj since we are extending a reference
                   1333:                 * that we already have (the ref count can not drop to zero).
                   1334:                 */
1.119     chs      1335:
1.10      mrg      1336:                if (uobj && uobj->pgops->pgo_detach)
                   1337:                        uobj->pgops->pgo_detach(uobj);
                   1338:
1.263     matt     1339:                /*
                   1340:                 * Now that we've merged the entries, note that we've grown
                   1341:                 * and our gap has shrunk.  Then fix the tree.
                   1342:                 */
1.10      mrg      1343:                prev_entry->end += size;
1.263     matt     1344:                prev_entry->gap -= size;
1.145     yamt     1345:                uvm_rb_fixup(map, prev_entry);
                   1346:
1.222     yamt     1347:                uvm_map_check(map, "map backmerged");
1.10      mrg      1348:
                   1349:                UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
1.121     atatat   1350:                merged++;
1.106     chs      1351:        }
1.10      mrg      1352:
1.121     atatat   1353: forwardmerge:
1.194     yamt     1354:        if (prev_entry->next->start == (start + size) &&
1.121     atatat   1355:            prev_entry->next != &map->header &&
1.312     rmind    1356:            UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0,
1.194     yamt     1357:            prot, maxprot, inherit, advice, 0)) {
1.161     matt     1358:
1.121     atatat   1359:                if (uobj && prev_entry->next->offset != uoffset + size)
                   1360:                        goto nomerge;
                   1361:
                   1362:                /*
                   1363:                 * can't extend a shared amap.  note: no need to lock amap to
                   1364:                 * look at refs since we don't care about its exact value.
1.122     atatat   1365:                 * if it is one (i.e. we have only reference) it will stay there.
                   1366:                 *
                   1367:                 * note that we also can't merge two amaps, so if we
                   1368:                 * merged with the previous entry which has an amap,
                   1369:                 * and the next entry also has an amap, we give up.
                   1370:                 *
1.125     atatat   1371:                 * Interesting cases:
                   1372:                 * amap, new, amap -> give up second merge (single fwd extend)
                   1373:                 * amap, new, none -> double forward extend (extend again here)
                   1374:                 * none, new, amap -> double backward extend (done here)
                   1375:                 * uobj, new, amap -> single backward extend (done here)
                   1376:                 *
1.122     atatat   1377:                 * XXX should we attempt to deal with someone refilling
                   1378:                 * the deallocated region between two entries that are
                   1379:                 * backed by the same amap (ie, arefs is 2, "prev" and
                   1380:                 * "next" refer to it, and adding this allocation will
                   1381:                 * close the hole, thus restoring arefs to 1 and
                   1382:                 * deallocating the "next" vm_map_entry)?  -- @@@
1.121     atatat   1383:                 */
                   1384:
                   1385:                if (prev_entry->next->aref.ar_amap &&
1.122     atatat   1386:                    (amap_refs(prev_entry->next->aref.ar_amap) != 1 ||
                   1387:                     (merged && prev_entry->aref.ar_amap))) {
1.121     atatat   1388:                        goto nomerge;
                   1389:                }
                   1390:
1.122     atatat   1391:                if (merged) {
1.123     atatat   1392:                        /*
                   1393:                         * Try to extend the amap of the previous entry to
                   1394:                         * cover the next entry as well.  If it doesn't work
                   1395:                         * just skip on, don't actually give up, since we've
                   1396:                         * already completed the back merge.
                   1397:                         */
1.125     atatat   1398:                        if (prev_entry->aref.ar_amap) {
                   1399:                                if (amap_extend(prev_entry,
                   1400:                                    prev_entry->next->end -
                   1401:                                    prev_entry->next->start,
1.126     bouyer   1402:                                    amapwaitflag | AMAP_EXTEND_FORWARDS))
1.142     enami    1403:                                        goto nomerge;
1.125     atatat   1404:                        }
                   1405:
                   1406:                        /*
                   1407:                         * Try to extend the amap of the *next* entry
                   1408:                         * back to cover the new allocation *and* the
                   1409:                         * previous entry as well (the previous merge
                   1410:                         * didn't have an amap already otherwise we
                   1411:                         * wouldn't be checking here for an amap).  If
                   1412:                         * it doesn't work just skip on, again, don't
                   1413:                         * actually give up, since we've already
                   1414:                         * completed the back merge.
                   1415:                         */
                   1416:                        else if (prev_entry->next->aref.ar_amap) {
                   1417:                                if (amap_extend(prev_entry->next,
                   1418:                                    prev_entry->end -
1.141     atatat   1419:                                    prev_entry->start,
1.126     bouyer   1420:                                    amapwaitflag | AMAP_EXTEND_BACKWARDS))
1.142     enami    1421:                                        goto nomerge;
1.125     atatat   1422:                        }
                   1423:                } else {
                   1424:                        /*
                   1425:                         * Pull the next entry's amap backwards to cover this
                   1426:                         * new allocation.
                   1427:                         */
                   1428:                        if (prev_entry->next->aref.ar_amap) {
                   1429:                                error = amap_extend(prev_entry->next, size,
1.126     bouyer   1430:                                    amapwaitflag | AMAP_EXTEND_BACKWARDS);
1.174     yamt     1431:                                if (error)
1.191     yamt     1432:                                        goto nomerge;
1.125     atatat   1433:                        }
1.122     atatat   1434:                }
                   1435:
1.121     atatat   1436:                if (merged) {
                   1437:                        if (kmap) {
1.207     yamt     1438:                                UVMMAP_EVCNT_DECR(kbackmerge);
                   1439:                                UVMMAP_EVCNT_INCR(kbimerge);
1.121     atatat   1440:                        } else {
1.207     yamt     1441:                                UVMMAP_EVCNT_DECR(ubackmerge);
                   1442:                                UVMMAP_EVCNT_INCR(ubimerge);
1.121     atatat   1443:                        }
1.122     atatat   1444:                } else {
1.258     ad       1445:                        if (kmap) {
1.207     yamt     1446:                                UVMMAP_EVCNT_INCR(kforwmerge);
1.258     ad       1447:                        } else {
1.207     yamt     1448:                                UVMMAP_EVCNT_INCR(uforwmerge);
1.258     ad       1449:                        }
1.121     atatat   1450:                }
                   1451:                UVMHIST_LOG(maphist,"  starting forward merge", 0, 0, 0, 0);
1.10      mrg      1452:
1.121     atatat   1453:                /*
                   1454:                 * drop our reference to uobj since we are extending a reference
                   1455:                 * that we already have (the ref count can not drop to zero).
                   1456:                 */
1.319     chs      1457:                if (uobj && uobj->pgops->pgo_detach)
1.121     atatat   1458:                        uobj->pgops->pgo_detach(uobj);
1.1       mrg      1459:
1.121     atatat   1460:                if (merged) {
1.174     yamt     1461:                        dead = prev_entry->next;
1.121     atatat   1462:                        prev_entry->end = dead->end;
                   1463:                        uvm_map_entry_unlink(map, dead);
1.125     atatat   1464:                        if (dead->aref.ar_amap != NULL) {
                   1465:                                prev_entry->aref = dead->aref;
                   1466:                                dead->aref.ar_amap = NULL;
                   1467:                        }
1.121     atatat   1468:                } else {
                   1469:                        prev_entry->next->start -= size;
1.263     matt     1470:                        if (prev_entry != &map->header) {
                   1471:                                prev_entry->gap -= size;
                   1472:                                KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry));
1.145     yamt     1473:                                uvm_rb_fixup(map, prev_entry);
1.263     matt     1474:                        }
1.121     atatat   1475:                        if (uobj)
                   1476:                                prev_entry->next->offset = uoffset;
                   1477:                }
1.145     yamt     1478:
1.222     yamt     1479:                uvm_map_check(map, "map forwardmerged");
1.1       mrg      1480:
1.121     atatat   1481:                UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0);
                   1482:                merged++;
1.106     chs      1483:        }
1.121     atatat   1484:
                   1485: nomerge:
                   1486:        if (!merged) {
                   1487:                UVMHIST_LOG(maphist,"  allocating new map entry", 0, 0, 0, 0);
1.258     ad       1488:                if (kmap) {
1.207     yamt     1489:                        UVMMAP_EVCNT_INCR(knomerge);
1.258     ad       1490:                } else {
1.207     yamt     1491:                        UVMMAP_EVCNT_INCR(unomerge);
1.258     ad       1492:                }
1.106     chs      1493:
1.10      mrg      1494:                /*
1.121     atatat   1495:                 * allocate new entry and link it in.
1.10      mrg      1496:                 */
1.106     chs      1497:
1.121     atatat   1498:                if (new_entry == NULL) {
1.126     bouyer   1499:                        new_entry = uvm_mapent_alloc(map,
1.127     thorpej  1500:                                (flags & UVM_FLAG_NOWAIT));
1.126     bouyer   1501:                        if (__predict_false(new_entry == NULL)) {
1.174     yamt     1502:                                error = ENOMEM;
                   1503:                                goto done;
1.126     bouyer   1504:                        }
1.121     atatat   1505:                }
1.174     yamt     1506:                new_entry->start = start;
1.121     atatat   1507:                new_entry->end = new_entry->start + size;
                   1508:                new_entry->object.uvm_obj = uobj;
                   1509:                new_entry->offset = uoffset;
                   1510:
1.176     yamt     1511:                new_entry->etype = newetype;
1.121     atatat   1512:
1.161     matt     1513:                if (flags & UVM_FLAG_NOMERGE) {
                   1514:                        new_entry->flags |= UVM_MAP_NOMERGE;
                   1515:                }
1.121     atatat   1516:
                   1517:                new_entry->protection = prot;
                   1518:                new_entry->max_protection = maxprot;
                   1519:                new_entry->inheritance = inherit;
                   1520:                new_entry->wired_count = 0;
                   1521:                new_entry->advice = advice;
                   1522:                if (flags & UVM_FLAG_OVERLAY) {
                   1523:
                   1524:                        /*
                   1525:                         * to_add: for BSS we overallocate a little since we
                   1526:                         * are likely to extend
                   1527:                         */
                   1528:
                   1529:                        vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
                   1530:                                UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
1.126     bouyer   1531:                        struct vm_amap *amap = amap_alloc(size, to_add,
1.227     yamt     1532:                            (flags & UVM_FLAG_NOWAIT));
1.126     bouyer   1533:                        if (__predict_false(amap == NULL)) {
1.174     yamt     1534:                                error = ENOMEM;
                   1535:                                goto done;
1.126     bouyer   1536:                        }
1.121     atatat   1537:                        new_entry->aref.ar_pageoff = 0;
                   1538:                        new_entry->aref.ar_amap = amap;
                   1539:                } else {
                   1540:                        new_entry->aref.ar_pageoff = 0;
                   1541:                        new_entry->aref.ar_amap = NULL;
                   1542:                }
                   1543:                uvm_map_entry_link(map, prev_entry, new_entry);
1.1       mrg      1544:
1.121     atatat   1545:                /*
                   1546:                 * Update the free space hint
                   1547:                 */
1.10      mrg      1548:
1.121     atatat   1549:                if ((map->first_free == prev_entry) &&
                   1550:                    (prev_entry->end >= new_entry->start))
                   1551:                        map->first_free = new_entry;
1.174     yamt     1552:
                   1553:                new_entry = NULL;
1.121     atatat   1554:        }
1.10      mrg      1555:
1.146     yamt     1556:        map->size += size;
                   1557:
1.10      mrg      1558:        UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1.174     yamt     1559:
                   1560:        error = 0;
                   1561: done:
1.311     para     1562:        vm_map_unlock(map);
                   1563:
                   1564:        if (new_entry) {
                   1565:                uvm_mapent_free(new_entry);
1.174     yamt     1566:        }
1.311     para     1567:
1.174     yamt     1568:        if (dead) {
                   1569:                KDASSERT(merged);
1.311     para     1570:                uvm_mapent_free(dead);
1.248     ad       1571:        }
1.311     para     1572:
1.174     yamt     1573:        return error;
1.1       mrg      1574: }
                   1575:
                   1576: /*
1.247     yamt     1577:  * uvm_map_lookup_entry_bytree: lookup an entry in tree
                   1578:  */
                   1579:
1.263     matt     1580: static inline bool
1.247     yamt     1581: uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address,
                   1582:     struct vm_map_entry **entry        /* OUT */)
                   1583: {
                   1584:        struct vm_map_entry *prev = &map->header;
1.263     matt     1585:        struct vm_map_entry *cur = ROOT_ENTRY(map);
1.247     yamt     1586:
                   1587:        while (cur) {
1.263     matt     1588:                UVMMAP_EVCNT_INCR(mlk_treeloop);
1.247     yamt     1589:                if (address >= cur->start) {
                   1590:                        if (address < cur->end) {
                   1591:                                *entry = cur;
                   1592:                                return true;
                   1593:                        }
                   1594:                        prev = cur;
1.263     matt     1595:                        cur = RIGHT_ENTRY(cur);
1.247     yamt     1596:                } else
1.263     matt     1597:                        cur = LEFT_ENTRY(cur);
1.247     yamt     1598:        }
                   1599:        *entry = prev;
                   1600:        return false;
                   1601: }
                   1602:
                   1603: /*
1.1       mrg      1604:  * uvm_map_lookup_entry: find map entry at or before an address
                   1605:  *
                   1606:  * => map must at least be read-locked by caller
                   1607:  * => entry is returned in "entry"
                   1608:  * => return value is true if address is in the returned entry
                   1609:  */
                   1610:
1.233     thorpej  1611: bool
1.138     enami    1612: uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
                   1613:     struct vm_map_entry **entry        /* OUT */)
1.1       mrg      1614: {
1.99      chs      1615:        struct vm_map_entry *cur;
1.234     thorpej  1616:        bool use_tree = false;
1.1       mrg      1617:        UVMHIST_FUNC("uvm_map_lookup_entry");
                   1618:        UVMHIST_CALLED(maphist);
                   1619:
1.331.2.2  skrll    1620:        UVMHIST_LOG(maphist,"(map=%p,addr=%#lx,ent=%p)",
1.10      mrg      1621:            map, address, entry, 0);
1.1       mrg      1622:
                   1623:        /*
1.10      mrg      1624:         * start looking either from the head of the
                   1625:         * list, or from the hint.
1.1       mrg      1626:         */
                   1627:
                   1628:        cur = map->hint;
                   1629:
                   1630:        if (cur == &map->header)
                   1631:                cur = cur->next;
                   1632:
1.207     yamt     1633:        UVMMAP_EVCNT_INCR(mlk_call);
1.1       mrg      1634:        if (address >= cur->start) {
1.99      chs      1635:
1.139     enami    1636:                /*
1.10      mrg      1637:                 * go from hint to end of list.
1.1       mrg      1638:                 *
1.10      mrg      1639:                 * but first, make a quick check to see if
                   1640:                 * we are already looking at the entry we
                   1641:                 * want (which is usually the case).
                   1642:                 * note also that we don't need to save the hint
                   1643:                 * here... it is the same hint (unless we are
                   1644:                 * at the header, in which case the hint didn't
                   1645:                 * buy us anything anyway).
1.1       mrg      1646:                 */
1.99      chs      1647:
1.144     yamt     1648:                if (cur != &map->header && cur->end > address) {
1.207     yamt     1649:                        UVMMAP_EVCNT_INCR(mlk_hint);
1.1       mrg      1650:                        *entry = cur;
1.331.2.2  skrll    1651:                        UVMHIST_LOG(maphist,"<- got it via hint (%p)",
1.10      mrg      1652:                            cur, 0, 0, 0);
1.218     yamt     1653:                        uvm_mapent_check(*entry);
1.234     thorpej  1654:                        return (true);
1.1       mrg      1655:                }
1.144     yamt     1656:
1.263     matt     1657:                if (map->nentries > 15)
1.234     thorpej  1658:                        use_tree = true;
1.10      mrg      1659:        } else {
1.99      chs      1660:
1.139     enami    1661:                /*
1.144     yamt     1662:                 * invalid hint.  use tree.
1.1       mrg      1663:                 */
1.234     thorpej  1664:                use_tree = true;
1.144     yamt     1665:        }
                   1666:
1.222     yamt     1667:        uvm_map_check(map, __func__);
1.144     yamt     1668:
                   1669:        if (use_tree) {
                   1670:                /*
                   1671:                 * Simple lookup in the tree.  Happens when the hint is
                   1672:                 * invalid, or nentries reach a threshold.
                   1673:                 */
1.263     matt     1674:                UVMMAP_EVCNT_INCR(mlk_tree);
1.247     yamt     1675:                if (uvm_map_lookup_entry_bytree(map, address, entry)) {
                   1676:                        goto got;
                   1677:                } else {
                   1678:                        goto failed;
1.144     yamt     1679:                }
1.1       mrg      1680:        }
                   1681:
                   1682:        /*
1.10      mrg      1683:         * search linearly
1.1       mrg      1684:         */
                   1685:
1.263     matt     1686:        UVMMAP_EVCNT_INCR(mlk_list);
1.144     yamt     1687:        while (cur != &map->header) {
1.263     matt     1688:                UVMMAP_EVCNT_INCR(mlk_listloop);
1.1       mrg      1689:                if (cur->end > address) {
                   1690:                        if (address >= cur->start) {
1.139     enami    1691:                                /*
1.10      mrg      1692:                                 * save this lookup for future
                   1693:                                 * hints, and return
1.1       mrg      1694:                                 */
                   1695:
                   1696:                                *entry = cur;
1.144     yamt     1697: got:
                   1698:                                SAVE_HINT(map, map->hint, *entry);
1.331.2.2  skrll    1699:                                UVMHIST_LOG(maphist,"<- search got it (%p)",
1.10      mrg      1700:                                        cur, 0, 0, 0);
1.144     yamt     1701:                                KDASSERT((*entry)->start <= address);
                   1702:                                KDASSERT(address < (*entry)->end);
1.218     yamt     1703:                                uvm_mapent_check(*entry);
1.234     thorpej  1704:                                return (true);
1.1       mrg      1705:                        }
                   1706:                        break;
                   1707:                }
                   1708:                cur = cur->next;
                   1709:        }
                   1710:        *entry = cur->prev;
1.144     yamt     1711: failed:
1.82      thorpej  1712:        SAVE_HINT(map, map->hint, *entry);
1.1       mrg      1713:        UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
1.147     yamt     1714:        KDASSERT((*entry) == &map->header || (*entry)->end <= address);
1.144     yamt     1715:        KDASSERT((*entry)->next == &map->header ||
                   1716:            address < (*entry)->next->start);
1.234     thorpej  1717:        return (false);
1.1       mrg      1718: }
                   1719:
                   1720: /*
1.140     enami    1721:  * See if the range between start and start + length fits in the gap
                   1722:  * entry->next->start and entry->end.  Returns 1 if fits, 0 if doesn't
                   1723:  * fit, and -1 address wraps around.
                   1724:  */
1.203     thorpej  1725: static int
1.232     yamt     1726: uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset,
1.304     matt     1727:     vsize_t align, int flags, int topdown, struct vm_map_entry *entry)
1.140     enami    1728: {
                   1729:        vaddr_t end;
                   1730:
                   1731: #ifdef PMAP_PREFER
                   1732:        /*
                   1733:         * push start address forward as needed to avoid VAC alias problems.
                   1734:         * we only do this if a valid offset is specified.
                   1735:         */
                   1736:
                   1737:        if (uoffset != UVM_UNKNOWN_OFFSET)
1.182     atatat   1738:                PMAP_PREFER(uoffset, start, length, topdown);
1.140     enami    1739: #endif
1.304     matt     1740:        if ((flags & UVM_FLAG_COLORMATCH) != 0) {
                   1741:                KASSERT(align < uvmexp.ncolors);
                   1742:                if (uvmexp.ncolors > 1) {
                   1743:                        const u_int colormask = uvmexp.colormask;
                   1744:                        const u_int colorsize = colormask + 1;
                   1745:                        vaddr_t hint = atop(*start);
                   1746:                        const u_int color = hint & colormask;
                   1747:                        if (color != align) {
                   1748:                                hint -= color;  /* adjust to color boundary */
                   1749:                                KASSERT((hint & colormask) == 0);
                   1750:                                if (topdown) {
                   1751:                                        if (align > color)
                   1752:                                                hint -= colorsize;
                   1753:                                } else {
                   1754:                                        if (align < color)
                   1755:                                                hint += colorsize;
                   1756:                                }
                   1757:                                *start = ptoa(hint + align); /* adjust to color */
                   1758:                        }
                   1759:                }
                   1760:        } else if (align != 0) {
1.140     enami    1761:                if ((*start & (align - 1)) != 0) {
                   1762:                        if (topdown)
                   1763:                                *start &= ~(align - 1);
                   1764:                        else
                   1765:                                *start = roundup(*start, align);
                   1766:                }
                   1767:                /*
                   1768:                 * XXX Should we PMAP_PREFER() here again?
1.182     atatat   1769:                 * eh...i think we're okay
1.140     enami    1770:                 */
                   1771:        }
                   1772:
                   1773:        /*
                   1774:         * Find the end of the proposed new region.  Be sure we didn't
                   1775:         * wrap around the address; if so, we lose.  Otherwise, if the
                   1776:         * proposed new region fits before the next entry, we win.
                   1777:         */
                   1778:
                   1779:        end = *start + length;
                   1780:        if (end < *start)
                   1781:                return (-1);
                   1782:
                   1783:        if (entry->next->start >= end && *start >= entry->end)
                   1784:                return (1);
                   1785:
                   1786:        return (0);
                   1787: }
                   1788:
                   1789: /*
1.1       mrg      1790:  * uvm_map_findspace: find "length" sized space in "map".
                   1791:  *
1.167     junyoung 1792:  * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is
                   1793:  *     set in "flags" (in which case we insist on using "hint").
1.1       mrg      1794:  * => "result" is VA returned
                   1795:  * => uobj/uoffset are to be used to handle VAC alignment, if required
1.167     junyoung 1796:  * => if "align" is non-zero, we attempt to align to that value.
1.1       mrg      1797:  * => caller must at least have read-locked map
                   1798:  * => returns NULL on failure, or pointer to prev. map entry if success
                   1799:  * => note this is a cross between the old vm_map_findspace and vm_map_find
                   1800:  */
                   1801:
1.99      chs      1802: struct vm_map_entry *
1.138     enami    1803: uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
1.232     yamt     1804:     vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset,
1.138     enami    1805:     vsize_t align, int flags)
1.1       mrg      1806: {
1.140     enami    1807:        struct vm_map_entry *entry;
1.144     yamt     1808:        struct vm_map_entry *child, *prev, *tmp;
1.326     martin   1809:        vaddr_t orig_hint __diagused;
1.131     atatat   1810:        const int topdown = map->flags & VM_MAP_TOPDOWN;
1.1       mrg      1811:        UVMHIST_FUNC("uvm_map_findspace");
                   1812:        UVMHIST_CALLED(maphist);
                   1813:
1.331.2.2  skrll    1814:        UVMHIST_LOG(maphist, "(map=%p, hint=%l#x, len=%lu, flags=%#x)",
1.140     enami    1815:            map, hint, length, flags);
1.304     matt     1816:        KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || (align & (align - 1)) == 0);
                   1817:        KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors);
1.85      chs      1818:        KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
1.81      thorpej  1819:
1.222     yamt     1820:        uvm_map_check(map, "map_findspace entry");
1.144     yamt     1821:
1.81      thorpej  1822:        /*
                   1823:         * remember the original hint.  if we are aligning, then we
                   1824:         * may have to try again with no alignment constraint if
                   1825:         * we fail the first time.
                   1826:         */
1.85      chs      1827:
1.81      thorpej  1828:        orig_hint = hint;
1.184     chs      1829:        if (hint < vm_map_min(map)) {   /* check ranges ... */
1.81      thorpej  1830:                if (flags & UVM_FLAG_FIXED) {
1.1       mrg      1831:                        UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
1.139     enami    1832:                        return (NULL);
1.1       mrg      1833:                }
1.184     chs      1834:                hint = vm_map_min(map);
1.1       mrg      1835:        }
1.184     chs      1836:        if (hint > vm_map_max(map)) {
1.331.2.2  skrll    1837:                UVMHIST_LOG(maphist,"<- VA %#lx > range [%#lx->%#lx]",
1.184     chs      1838:                    hint, vm_map_min(map), vm_map_max(map), 0);
1.139     enami    1839:                return (NULL);
1.1       mrg      1840:        }
                   1841:
                   1842:        /*
                   1843:         * Look for the first possible address; if there's already
                   1844:         * something at this address, we have to start after it.
                   1845:         */
                   1846:
1.131     atatat   1847:        /*
                   1848:         * @@@: there are four, no, eight cases to consider.
                   1849:         *
                   1850:         * 0: found,     fixed,     bottom up -> fail
                   1851:         * 1: found,     fixed,     top down  -> fail
1.140     enami    1852:         * 2: found,     not fixed, bottom up -> start after entry->end,
                   1853:         *                                       loop up
                   1854:         * 3: found,     not fixed, top down  -> start before entry->start,
                   1855:         *                                       loop down
                   1856:         * 4: not found, fixed,     bottom up -> check entry->next->start, fail
                   1857:         * 5: not found, fixed,     top down  -> check entry->next->start, fail
                   1858:         * 6: not found, not fixed, bottom up -> check entry->next->start,
                   1859:         *                                       loop up
                   1860:         * 7: not found, not fixed, top down  -> check entry->next->start,
                   1861:         *                                       loop down
1.131     atatat   1862:         *
                   1863:         * as you can see, it reduces to roughly five cases, and that
                   1864:         * adding top down mapping only adds one unique case (without
                   1865:         * it, there would be four cases).
                   1866:         */
                   1867:
1.184     chs      1868:        if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) {
1.140     enami    1869:                entry = map->first_free;
1.1       mrg      1870:        } else {
1.140     enami    1871:                if (uvm_map_lookup_entry(map, hint, &entry)) {
1.1       mrg      1872:                        /* "hint" address already in use ... */
1.81      thorpej  1873:                        if (flags & UVM_FLAG_FIXED) {
1.140     enami    1874:                                UVMHIST_LOG(maphist, "<- fixed & VA in use",
1.10      mrg      1875:                                    0, 0, 0, 0);
1.139     enami    1876:                                return (NULL);
1.1       mrg      1877:                        }
1.140     enami    1878:                        if (topdown)
                   1879:                                /* Start from lower gap. */
                   1880:                                entry = entry->prev;
                   1881:                } else if (flags & UVM_FLAG_FIXED) {
                   1882:                        if (entry->next->start >= hint + length &&
                   1883:                            hint + length > hint)
                   1884:                                goto found;
                   1885:
                   1886:                        /* "hint" address is gap but too small */
                   1887:                        UVMHIST_LOG(maphist, "<- fixed mapping failed",
                   1888:                            0, 0, 0, 0);
                   1889:                        return (NULL); /* only one shot at it ... */
                   1890:                } else {
                   1891:                        /*
                   1892:                         * See if given hint fits in this gap.
                   1893:                         */
                   1894:                        switch (uvm_map_space_avail(&hint, length,
1.304     matt     1895:                            uoffset, align, flags, topdown, entry)) {
1.140     enami    1896:                        case 1:
                   1897:                                goto found;
                   1898:                        case -1:
                   1899:                                goto wraparound;
                   1900:                        }
                   1901:
1.148     yamt     1902:                        if (topdown) {
1.140     enami    1903:                                /*
                   1904:                                 * Still there is a chance to fit
                   1905:                                 * if hint > entry->end.
                   1906:                                 */
1.148     yamt     1907:                        } else {
1.168     junyoung 1908:                                /* Start from higher gap. */
1.148     yamt     1909:                                entry = entry->next;
                   1910:                                if (entry == &map->header)
                   1911:                                        goto notfound;
1.140     enami    1912:                                goto nextgap;
1.148     yamt     1913:                        }
1.1       mrg      1914:                }
                   1915:        }
                   1916:
                   1917:        /*
1.144     yamt     1918:         * Note that all UVM_FLAGS_FIXED case is already handled.
                   1919:         */
                   1920:        KDASSERT((flags & UVM_FLAG_FIXED) == 0);
                   1921:
                   1922:        /* Try to find the space in the red-black tree */
                   1923:
                   1924:        /* Check slot before any entry */
                   1925:        hint = topdown ? entry->next->start - length : entry->end;
1.304     matt     1926:        switch (uvm_map_space_avail(&hint, length, uoffset, align, flags,
1.144     yamt     1927:            topdown, entry)) {
                   1928:        case 1:
                   1929:                goto found;
                   1930:        case -1:
                   1931:                goto wraparound;
                   1932:        }
                   1933:
                   1934: nextgap:
1.148     yamt     1935:        KDASSERT((flags & UVM_FLAG_FIXED) == 0);
1.144     yamt     1936:        /* If there is not enough space in the whole tree, we fail */
1.263     matt     1937:        tmp = ROOT_ENTRY(map);
                   1938:        if (tmp == NULL || tmp->maxgap < length)
1.144     yamt     1939:                goto notfound;
                   1940:
                   1941:        prev = NULL; /* previous candidate */
                   1942:
                   1943:        /* Find an entry close to hint that has enough space */
                   1944:        for (; tmp;) {
1.263     matt     1945:                KASSERT(tmp->next->start == tmp->end + tmp->gap);
1.144     yamt     1946:                if (topdown) {
                   1947:                        if (tmp->next->start < hint + length &&
                   1948:                            (prev == NULL || tmp->end > prev->end)) {
1.263     matt     1949:                                if (tmp->gap >= length)
1.144     yamt     1950:                                        prev = tmp;
1.263     matt     1951:                                else if ((child = LEFT_ENTRY(tmp)) != NULL
                   1952:                                    && child->maxgap >= length)
1.144     yamt     1953:                                        prev = tmp;
                   1954:                        }
                   1955:                } else {
                   1956:                        if (tmp->end >= hint &&
                   1957:                            (prev == NULL || tmp->end < prev->end)) {
1.263     matt     1958:                                if (tmp->gap >= length)
1.144     yamt     1959:                                        prev = tmp;
1.263     matt     1960:                                else if ((child = RIGHT_ENTRY(tmp)) != NULL
                   1961:                                    && child->maxgap >= length)
1.144     yamt     1962:                                        prev = tmp;
                   1963:                        }
                   1964:                }
                   1965:                if (tmp->next->start < hint + length)
1.263     matt     1966:                        child = RIGHT_ENTRY(tmp);
1.144     yamt     1967:                else if (tmp->end > hint)
1.263     matt     1968:                        child = LEFT_ENTRY(tmp);
1.144     yamt     1969:                else {
1.263     matt     1970:                        if (tmp->gap >= length)
1.144     yamt     1971:                                break;
                   1972:                        if (topdown)
1.263     matt     1973:                                child = LEFT_ENTRY(tmp);
1.144     yamt     1974:                        else
1.263     matt     1975:                                child = RIGHT_ENTRY(tmp);
1.144     yamt     1976:                }
1.263     matt     1977:                if (child == NULL || child->maxgap < length)
1.144     yamt     1978:                        break;
                   1979:                tmp = child;
                   1980:        }
                   1981:
1.148     yamt     1982:        if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) {
1.164     junyoung 1983:                /*
1.144     yamt     1984:                 * Check if the entry that we found satifies the
                   1985:                 * space requirement
                   1986:                 */
1.148     yamt     1987:                if (topdown) {
1.149     yamt     1988:                        if (hint > tmp->next->start - length)
                   1989:                                hint = tmp->next->start - length;
1.148     yamt     1990:                } else {
1.149     yamt     1991:                        if (hint < tmp->end)
                   1992:                                hint = tmp->end;
1.148     yamt     1993:                }
                   1994:                switch (uvm_map_space_avail(&hint, length, uoffset, align,
1.304     matt     1995:                    flags, topdown, tmp)) {
1.148     yamt     1996:                case 1:
1.144     yamt     1997:                        entry = tmp;
                   1998:                        goto found;
1.148     yamt     1999:                case -1:
                   2000:                        goto wraparound;
1.144     yamt     2001:                }
1.263     matt     2002:                if (tmp->gap >= length)
1.144     yamt     2003:                        goto listsearch;
                   2004:        }
                   2005:        if (prev == NULL)
                   2006:                goto notfound;
                   2007:
1.148     yamt     2008:        if (topdown) {
1.150     yamt     2009:                KASSERT(orig_hint >= prev->next->start - length ||
1.148     yamt     2010:                    prev->next->start - length > prev->next->start);
                   2011:                hint = prev->next->start - length;
                   2012:        } else {
1.150     yamt     2013:                KASSERT(orig_hint <= prev->end);
1.148     yamt     2014:                hint = prev->end;
                   2015:        }
                   2016:        switch (uvm_map_space_avail(&hint, length, uoffset, align,
1.304     matt     2017:            flags, topdown, prev)) {
1.148     yamt     2018:        case 1:
1.144     yamt     2019:                entry = prev;
                   2020:                goto found;
1.148     yamt     2021:        case -1:
                   2022:                goto wraparound;
1.144     yamt     2023:        }
1.263     matt     2024:        if (prev->gap >= length)
1.144     yamt     2025:                goto listsearch;
1.164     junyoung 2026:
1.144     yamt     2027:        if (topdown)
1.263     matt     2028:                tmp = LEFT_ENTRY(prev);
1.144     yamt     2029:        else
1.263     matt     2030:                tmp = RIGHT_ENTRY(prev);
1.144     yamt     2031:        for (;;) {
1.263     matt     2032:                KASSERT(tmp && tmp->maxgap >= length);
1.144     yamt     2033:                if (topdown)
1.263     matt     2034:                        child = RIGHT_ENTRY(tmp);
1.144     yamt     2035:                else
1.263     matt     2036:                        child = LEFT_ENTRY(tmp);
                   2037:                if (child && child->maxgap >= length) {
1.144     yamt     2038:                        tmp = child;
                   2039:                        continue;
                   2040:                }
1.263     matt     2041:                if (tmp->gap >= length)
1.144     yamt     2042:                        break;
                   2043:                if (topdown)
1.263     matt     2044:                        tmp = LEFT_ENTRY(tmp);
1.144     yamt     2045:                else
1.263     matt     2046:                        tmp = RIGHT_ENTRY(tmp);
1.144     yamt     2047:        }
1.164     junyoung 2048:
1.148     yamt     2049:        if (topdown) {
1.150     yamt     2050:                KASSERT(orig_hint >= tmp->next->start - length ||
1.148     yamt     2051:                    tmp->next->start - length > tmp->next->start);
                   2052:                hint = tmp->next->start - length;
                   2053:        } else {
1.150     yamt     2054:                KASSERT(orig_hint <= tmp->end);
1.148     yamt     2055:                hint = tmp->end;
                   2056:        }
1.144     yamt     2057:        switch (uvm_map_space_avail(&hint, length, uoffset, align,
1.304     matt     2058:            flags, topdown, tmp)) {
1.144     yamt     2059:        case 1:
                   2060:                entry = tmp;
                   2061:                goto found;
1.148     yamt     2062:        case -1:
                   2063:                goto wraparound;
1.144     yamt     2064:        }
                   2065:
1.164     junyoung 2066:        /*
1.144     yamt     2067:         * The tree fails to find an entry because of offset or alignment
                   2068:         * restrictions.  Search the list instead.
                   2069:         */
                   2070:  listsearch:
                   2071:        /*
1.1       mrg      2072:         * Look through the rest of the map, trying to fit a new region in
                   2073:         * the gap between existing regions, or after the very last region.
1.140     enami    2074:         * note: entry->end = base VA of current gap,
                   2075:         *       entry->next->start = VA of end of current gap
1.1       mrg      2076:         */
1.99      chs      2077:
1.140     enami    2078:        for (;;) {
                   2079:                /* Update hint for current gap. */
                   2080:                hint = topdown ? entry->next->start - length : entry->end;
                   2081:
                   2082:                /* See if it fits. */
                   2083:                switch (uvm_map_space_avail(&hint, length, uoffset, align,
1.304     matt     2084:                    flags, topdown, entry)) {
1.140     enami    2085:                case 1:
                   2086:                        goto found;
                   2087:                case -1:
                   2088:                        goto wraparound;
                   2089:                }
                   2090:
                   2091:                /* Advance to next/previous gap */
                   2092:                if (topdown) {
                   2093:                        if (entry == &map->header) {
                   2094:                                UVMHIST_LOG(maphist, "<- failed (off start)",
                   2095:                                    0,0,0,0);
                   2096:                                goto notfound;
1.134     matt     2097:                        }
1.140     enami    2098:                        entry = entry->prev;
                   2099:                } else {
                   2100:                        entry = entry->next;
                   2101:                        if (entry == &map->header) {
                   2102:                                UVMHIST_LOG(maphist, "<- failed (off end)",
1.81      thorpej  2103:                                    0,0,0,0);
1.140     enami    2104:                                goto notfound;
1.81      thorpej  2105:                        }
1.1       mrg      2106:                }
                   2107:        }
1.140     enami    2108:
                   2109:  found:
1.82      thorpej  2110:        SAVE_HINT(map, map->hint, entry);
1.1       mrg      2111:        *result = hint;
1.331.2.2  skrll    2112:        UVMHIST_LOG(maphist,"<- got it!  (result=%#lx)", hint, 0,0,0);
1.331.2.5  skrll    2113:        KASSERTMSG( topdown || hint >= orig_hint, "hint: %jx, orig_hint: %jx",
                   2114:            (uintmax_t)hint, (uintmax_t)orig_hint);
                   2115:        KASSERTMSG(!topdown || hint <= orig_hint, "hint: %jx, orig_hint: %jx",
                   2116:            (uintmax_t)hint, (uintmax_t)orig_hint);
1.144     yamt     2117:        KASSERT(entry->end <= hint);
                   2118:        KASSERT(hint + length <= entry->next->start);
1.1       mrg      2119:        return (entry);
1.140     enami    2120:
                   2121:  wraparound:
                   2122:        UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0);
                   2123:
1.165     yamt     2124:        return (NULL);
                   2125:
1.140     enami    2126:  notfound:
1.165     yamt     2127:        UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0);
                   2128:
1.140     enami    2129:        return (NULL);
1.1       mrg      2130: }
                   2131:
                   2132: /*
                   2133:  *   U N M A P   -   m a i n   h e l p e r   f u n c t i o n s
                   2134:  */
                   2135:
                   2136: /*
                   2137:  * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
                   2138:  *
1.98      chs      2139:  * => caller must check alignment and size
1.1       mrg      2140:  * => map must be locked by caller
                   2141:  * => we return a list of map entries that we've remove from the map
                   2142:  *    in "entry_list"
                   2143:  */
                   2144:
1.94      chs      2145: void
1.138     enami    2146: uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1.311     para     2147:     struct vm_map_entry **entry_list /* OUT */, int flags)
1.10      mrg      2148: {
1.99      chs      2149:        struct vm_map_entry *entry, *first_entry, *next;
1.24      eeh      2150:        vaddr_t len;
1.99      chs      2151:        UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist);
1.10      mrg      2152:
1.331.2.2  skrll    2153:        UVMHIST_LOG(maphist,"(map=%p, start=%#lx, end=%#lx)",
1.10      mrg      2154:            map, start, end, 0);
                   2155:        VM_MAP_RANGE_CHECK(map, start, end);
                   2156:
1.222     yamt     2157:        uvm_map_check(map, "unmap_remove entry");
1.144     yamt     2158:
1.10      mrg      2159:        /*
                   2160:         * find first entry
                   2161:         */
1.99      chs      2162:
1.234     thorpej  2163:        if (uvm_map_lookup_entry(map, start, &first_entry) == true) {
1.29      chuck    2164:                /* clip and go... */
1.10      mrg      2165:                entry = first_entry;
1.311     para     2166:                UVM_MAP_CLIP_START(map, entry, start);
1.10      mrg      2167:                /* critical!  prevents stale hint */
1.82      thorpej  2168:                SAVE_HINT(map, entry, entry->prev);
1.10      mrg      2169:        } else {
                   2170:                entry = first_entry->next;
                   2171:        }
                   2172:
                   2173:        /*
                   2174:         * Save the free space hint
                   2175:         */
                   2176:
1.220     yamt     2177:        if (map->first_free != &map->header && map->first_free->start >= start)
1.10      mrg      2178:                map->first_free = entry->prev;
                   2179:
                   2180:        /*
                   2181:         * note: we now re-use first_entry for a different task.  we remove
                   2182:         * a number of map entries from the map and save them in a linked
                   2183:         * list headed by "first_entry".  once we remove them from the map
                   2184:         * the caller should unlock the map and drop the references to the
                   2185:         * backing objects [c.f. uvm_unmap_detach].  the object is to
1.100     wiz      2186:         * separate unmapping from reference dropping.  why?
1.10      mrg      2187:         *   [1] the map has to be locked for unmapping
                   2188:         *   [2] the map need not be locked for reference dropping
                   2189:         *   [3] dropping references may trigger pager I/O, and if we hit
                   2190:         *       a pager that does synchronous I/O we may have to wait for it.
                   2191:         *   [4] we would like all waiting for I/O to occur with maps unlocked
1.98      chs      2192:         *       so that we don't block other threads.
1.10      mrg      2193:         */
1.99      chs      2194:
1.10      mrg      2195:        first_entry = NULL;
1.106     chs      2196:        *entry_list = NULL;
1.10      mrg      2197:
                   2198:        /*
1.98      chs      2199:         * break up the area into map entry sized regions and unmap.  note
1.10      mrg      2200:         * that all mappings have to be removed before we can even consider
                   2201:         * dropping references to amaps or VM objects (otherwise we could end
                   2202:         * up with a mapping to a page on the free list which would be very bad)
                   2203:         */
                   2204:
                   2205:        while ((entry != &map->header) && (entry->start < end)) {
1.311     para     2206:                KASSERT((entry->flags & UVM_MAP_STATIC) == 0);
1.174     yamt     2207:
1.311     para     2208:                UVM_MAP_CLIP_END(map, entry, end);
1.10      mrg      2209:                next = entry->next;
                   2210:                len = entry->end - entry->start;
1.81      thorpej  2211:
1.10      mrg      2212:                /*
                   2213:                 * unwire before removing addresses from the pmap; otherwise
                   2214:                 * unwiring will put the entries back into the pmap (XXX).
                   2215:                 */
1.1       mrg      2216:
1.106     chs      2217:                if (VM_MAPENT_ISWIRED(entry)) {
1.10      mrg      2218:                        uvm_map_entry_unwire(map, entry);
1.106     chs      2219:                }
1.187     yamt     2220:                if (flags & UVM_FLAG_VAONLY) {
                   2221:
                   2222:                        /* nothing */
                   2223:
                   2224:                } else if ((map->flags & VM_MAP_PAGEABLE) == 0) {
1.10      mrg      2225:
1.106     chs      2226:                        /*
                   2227:                         * if the map is non-pageable, any pages mapped there
                   2228:                         * must be wired and entered with pmap_kenter_pa(),
                   2229:                         * and we should free any such pages immediately.
1.287     joerg    2230:                         * this is mostly used for kmem_map.
1.106     chs      2231:                         */
1.292     rmind    2232:                        KASSERT(vm_map_pmap(map) == pmap_kernel());
1.99      chs      2233:
1.323     para     2234:                        uvm_km_pgremove_intrsafe(map, entry->start, entry->end);
1.106     chs      2235:                } else if (UVM_ET_ISOBJ(entry) &&
                   2236:                           UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1.300     yamt     2237:                        panic("%s: kernel object %p %p\n",
                   2238:                            __func__, map, entry);
1.106     chs      2239:                } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) {
1.29      chuck    2240:                        /*
1.298     rmind    2241:                         * remove mappings the standard way.  lock object
                   2242:                         * and/or amap to ensure vm_page state does not
                   2243:                         * change while in pmap_remove().
1.139     enami    2244:                         */
1.99      chs      2245:
1.298     rmind    2246:                        uvm_map_lock_entry(entry);
1.29      chuck    2247:                        pmap_remove(map->pmap, entry->start, entry->end);
1.298     rmind    2248:                        uvm_map_unlock_entry(entry);
1.10      mrg      2249:                }
                   2250:
1.331     christos 2251: #if defined(UVMDEBUG)
1.323     para     2252:                /*
                   2253:                 * check if there's remaining mapping,
                   2254:                 * which is a bug in caller.
                   2255:                 */
1.177     yamt     2256:
1.323     para     2257:                vaddr_t va;
                   2258:                for (va = entry->start; va < entry->end;
                   2259:                    va += PAGE_SIZE) {
                   2260:                        if (pmap_extract(vm_map_pmap(map), va, NULL)) {
                   2261:                                panic("%s: %#"PRIxVADDR" has mapping",
                   2262:                                    __func__, va);
1.177     yamt     2263:                        }
1.323     para     2264:                }
1.187     yamt     2265:
1.331.2.1  skrll    2266:                if (VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) {
1.323     para     2267:                        uvm_km_check_empty(map, entry->start,
                   2268:                            entry->end);
1.177     yamt     2269:                }
1.331     christos 2270: #endif /* defined(UVMDEBUG) */
1.177     yamt     2271:
1.10      mrg      2272:                /*
1.98      chs      2273:                 * remove entry from map and put it on our list of entries
1.106     chs      2274:                 * that we've nuked.  then go to next entry.
1.10      mrg      2275:                 */
1.99      chs      2276:
1.331.2.2  skrll    2277:                UVMHIST_LOG(maphist, "  removed map entry %p", entry, 0, 0,0);
1.82      thorpej  2278:
                   2279:                /* critical!  prevents stale hint */
                   2280:                SAVE_HINT(map, entry, entry->prev);
                   2281:
1.10      mrg      2282:                uvm_map_entry_unlink(map, entry);
1.146     yamt     2283:                KASSERT(map->size >= len);
1.10      mrg      2284:                map->size -= len;
1.131     atatat   2285:                entry->prev = NULL;
1.10      mrg      2286:                entry->next = first_entry;
                   2287:                first_entry = entry;
1.106     chs      2288:                entry = next;
1.10      mrg      2289:        }
1.292     rmind    2290:
                   2291:        /*
                   2292:         * Note: if map is dying, leave pmap_update() for pmap_destroy(),
                   2293:         * which will be called later.
                   2294:         */
1.120     chs      2295:        if ((map->flags & VM_MAP_DYING) == 0) {
                   2296:                pmap_update(vm_map_pmap(map));
1.292     rmind    2297:        } else {
                   2298:                KASSERT(vm_map_pmap(map) != pmap_kernel());
1.120     chs      2299:        }
1.10      mrg      2300:
1.222     yamt     2301:        uvm_map_check(map, "unmap_remove leave");
1.144     yamt     2302:
1.10      mrg      2303:        /*
                   2304:         * now we've cleaned up the map and are ready for the caller to drop
1.98      chs      2305:         * references to the mapped objects.
1.10      mrg      2306:         */
                   2307:
                   2308:        *entry_list = first_entry;
                   2309:        UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1.180     yamt     2310:
                   2311:        if (map->flags & VM_MAP_WANTVA) {
1.238     ad       2312:                mutex_enter(&map->misc_lock);
1.180     yamt     2313:                map->flags &= ~VM_MAP_WANTVA;
1.238     ad       2314:                cv_broadcast(&map->cv);
                   2315:                mutex_exit(&map->misc_lock);
1.180     yamt     2316:        }
1.1       mrg      2317: }
                   2318:
                   2319: /*
                   2320:  * uvm_unmap_detach: drop references in a chain of map entries
                   2321:  *
                   2322:  * => we will free the map entries as we traverse the list.
                   2323:  */
                   2324:
1.10      mrg      2325: void
1.138     enami    2326: uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
1.1       mrg      2327: {
1.99      chs      2328:        struct vm_map_entry *next_entry;
1.10      mrg      2329:        UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
1.1       mrg      2330:
1.10      mrg      2331:        while (first_entry) {
1.85      chs      2332:                KASSERT(!VM_MAPENT_ISWIRED(first_entry));
1.10      mrg      2333:                UVMHIST_LOG(maphist,
1.331.2.2  skrll    2334:                    "  detach %p: amap=%p, obj=%p, submap?=%d",
1.98      chs      2335:                    first_entry, first_entry->aref.ar_amap,
1.29      chuck    2336:                    first_entry->object.uvm_obj,
                   2337:                    UVM_ET_ISSUBMAP(first_entry));
1.1       mrg      2338:
1.10      mrg      2339:                /*
                   2340:                 * drop reference to amap, if we've got one
                   2341:                 */
                   2342:
                   2343:                if (first_entry->aref.ar_amap)
1.85      chs      2344:                        uvm_map_unreference_amap(first_entry, flags);
1.10      mrg      2345:
                   2346:                /*
                   2347:                 * drop reference to our backing object, if we've got one
                   2348:                 */
1.85      chs      2349:
1.120     chs      2350:                KASSERT(!UVM_ET_ISSUBMAP(first_entry));
                   2351:                if (UVM_ET_ISOBJ(first_entry) &&
                   2352:                    first_entry->object.uvm_obj->pgops->pgo_detach) {
                   2353:                        (*first_entry->object.uvm_obj->pgops->pgo_detach)
                   2354:                                (first_entry->object.uvm_obj);
1.10      mrg      2355:                }
                   2356:                next_entry = first_entry->next;
                   2357:                uvm_mapent_free(first_entry);
                   2358:                first_entry = next_entry;
                   2359:        }
                   2360:        UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
1.1       mrg      2361: }
                   2362:
                   2363: /*
                   2364:  *   E X T R A C T I O N   F U N C T I O N S
                   2365:  */
                   2366:
1.98      chs      2367: /*
1.1       mrg      2368:  * uvm_map_reserve: reserve space in a vm_map for future use.
                   2369:  *
1.98      chs      2370:  * => we reserve space in a map by putting a dummy map entry in the
1.1       mrg      2371:  *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
                   2372:  * => map should be unlocked (we will write lock it)
                   2373:  * => we return true if we were able to reserve space
                   2374:  * => XXXCDC: should be inline?
                   2375:  */
                   2376:
1.10      mrg      2377: int
1.138     enami    2378: uvm_map_reserve(struct vm_map *map, vsize_t size,
                   2379:     vaddr_t offset     /* hint for pmap_prefer */,
1.243     yamt     2380:     vsize_t align      /* alignment */,
1.210     yamt     2381:     vaddr_t *raddr     /* IN:hint, OUT: reserved VA */,
1.324     matt     2382:     uvm_flag_t flags   /* UVM_FLAG_FIXED or UVM_FLAG_COLORMATCH or 0 */)
1.1       mrg      2383: {
1.98      chs      2384:        UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
1.85      chs      2385:
1.331.2.2  skrll    2386:        UVMHIST_LOG(maphist, "(map=%p, size=%#lx, offset=%#lx, addr=%p)",
1.139     enami    2387:            map,size,offset,raddr);
1.85      chs      2388:
1.10      mrg      2389:        size = round_page(size);
1.85      chs      2390:
1.10      mrg      2391:        /*
                   2392:         * reserve some virtual space.
                   2393:         */
1.85      chs      2394:
1.243     yamt     2395:        if (uvm_map(map, raddr, size, NULL, offset, align,
1.10      mrg      2396:            UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
1.210     yamt     2397:            UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) {
1.10      mrg      2398:            UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
1.234     thorpej  2399:                return (false);
1.98      chs      2400:        }
1.85      chs      2401:
1.331.2.2  skrll    2402:        UVMHIST_LOG(maphist, "<- done (*raddr=%#lx)", *raddr,0,0,0);
1.234     thorpej  2403:        return (true);
1.1       mrg      2404: }
                   2405:
                   2406: /*
1.98      chs      2407:  * uvm_map_replace: replace a reserved (blank) area of memory with
1.1       mrg      2408:  * real mappings.
                   2409:  *
1.98      chs      2410:  * => caller must WRITE-LOCK the map
1.234     thorpej  2411:  * => we return true if replacement was a success
1.1       mrg      2412:  * => we expect the newents chain to have nnewents entrys on it and
                   2413:  *    we expect newents->prev to point to the last entry on the list
                   2414:  * => note newents is allowed to be NULL
                   2415:  */
                   2416:
1.275     yamt     2417: static int
1.138     enami    2418: uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
1.275     yamt     2419:     struct vm_map_entry *newents, int nnewents, vsize_t nsize,
                   2420:     struct vm_map_entry **oldentryp)
1.10      mrg      2421: {
1.99      chs      2422:        struct vm_map_entry *oldent, *last;
1.1       mrg      2423:
1.222     yamt     2424:        uvm_map_check(map, "map_replace entry");
1.144     yamt     2425:
1.10      mrg      2426:        /*
                   2427:         * first find the blank map entry at the specified address
                   2428:         */
1.85      chs      2429:
1.10      mrg      2430:        if (!uvm_map_lookup_entry(map, start, &oldent)) {
1.234     thorpej  2431:                return (false);
1.10      mrg      2432:        }
1.85      chs      2433:
1.10      mrg      2434:        /*
                   2435:         * check to make sure we have a proper blank entry
                   2436:         */
1.1       mrg      2437:
1.311     para     2438:        if (end < oldent->end) {
                   2439:                UVM_MAP_CLIP_END(map, oldent, end);
1.210     yamt     2440:        }
1.98      chs      2441:        if (oldent->start != start || oldent->end != end ||
1.10      mrg      2442:            oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
1.234     thorpej  2443:                return (false);
1.10      mrg      2444:        }
1.1       mrg      2445:
                   2446: #ifdef DIAGNOSTIC
1.99      chs      2447:
1.10      mrg      2448:        /*
                   2449:         * sanity check the newents chain
                   2450:         */
1.99      chs      2451:
1.10      mrg      2452:        {
1.99      chs      2453:                struct vm_map_entry *tmpent = newents;
1.10      mrg      2454:                int nent = 0;
1.275     yamt     2455:                vsize_t sz = 0;
1.24      eeh      2456:                vaddr_t cur = start;
1.10      mrg      2457:
                   2458:                while (tmpent) {
                   2459:                        nent++;
1.275     yamt     2460:                        sz += tmpent->end - tmpent->start;
1.10      mrg      2461:                        if (tmpent->start < cur)
                   2462:                                panic("uvm_map_replace1");
1.275     yamt     2463:                        if (tmpent->start >= tmpent->end || tmpent->end > end) {
1.286     matt     2464:                                panic("uvm_map_replace2: "
1.331.2.2  skrll    2465:                                    "tmpent->start=%#"PRIxVADDR
                   2466:                                    ", tmpent->end=%#"PRIxVADDR
                   2467:                                    ", end=%#"PRIxVADDR,
1.286     matt     2468:                                    tmpent->start, tmpent->end, end);
1.10      mrg      2469:                        }
                   2470:                        cur = tmpent->end;
                   2471:                        if (tmpent->next) {
                   2472:                                if (tmpent->next->prev != tmpent)
                   2473:                                        panic("uvm_map_replace3");
                   2474:                        } else {
                   2475:                                if (newents->prev != tmpent)
                   2476:                                        panic("uvm_map_replace4");
                   2477:                        }
                   2478:                        tmpent = tmpent->next;
                   2479:                }
                   2480:                if (nent != nnewents)
                   2481:                        panic("uvm_map_replace5");
1.275     yamt     2482:                if (sz != nsize)
                   2483:                        panic("uvm_map_replace6");
1.10      mrg      2484:        }
                   2485: #endif
                   2486:
                   2487:        /*
                   2488:         * map entry is a valid blank!   replace it.   (this does all the
                   2489:         * work of map entry link/unlink...).
                   2490:         */
                   2491:
                   2492:        if (newents) {
1.99      chs      2493:                last = newents->prev;
1.10      mrg      2494:
                   2495:                /* critical: flush stale hints out of map */
1.82      thorpej  2496:                SAVE_HINT(map, map->hint, newents);
1.10      mrg      2497:                if (map->first_free == oldent)
                   2498:                        map->first_free = last;
                   2499:
                   2500:                last->next = oldent->next;
                   2501:                last->next->prev = last;
1.144     yamt     2502:
                   2503:                /* Fix RB tree */
                   2504:                uvm_rb_remove(map, oldent);
                   2505:
1.10      mrg      2506:                newents->prev = oldent->prev;
                   2507:                newents->prev->next = newents;
                   2508:                map->nentries = map->nentries + (nnewents - 1);
                   2509:
1.144     yamt     2510:                /* Fixup the RB tree */
                   2511:                {
                   2512:                        int i;
                   2513:                        struct vm_map_entry *tmp;
                   2514:
                   2515:                        tmp = newents;
                   2516:                        for (i = 0; i < nnewents && tmp; i++) {
                   2517:                                uvm_rb_insert(map, tmp);
                   2518:                                tmp = tmp->next;
                   2519:                        }
                   2520:                }
1.10      mrg      2521:        } else {
                   2522:                /* NULL list of new entries: just remove the old one */
1.221     yamt     2523:                clear_hints(map, oldent);
1.10      mrg      2524:                uvm_map_entry_unlink(map, oldent);
                   2525:        }
1.275     yamt     2526:        map->size -= end - start - nsize;
1.10      mrg      2527:
1.222     yamt     2528:        uvm_map_check(map, "map_replace leave");
1.10      mrg      2529:
                   2530:        /*
1.209     yamt     2531:         * now we can free the old blank entry and return.
1.10      mrg      2532:         */
1.1       mrg      2533:
1.253     yamt     2534:        *oldentryp = oldent;
1.234     thorpej  2535:        return (true);
1.1       mrg      2536: }
                   2537:
                   2538: /*
                   2539:  * uvm_map_extract: extract a mapping from a map and put it somewhere
                   2540:  *     (maybe removing the old mapping)
                   2541:  *
                   2542:  * => maps should be unlocked (we will write lock them)
                   2543:  * => returns 0 on success, error code otherwise
                   2544:  * => start must be page aligned
                   2545:  * => len must be page sized
                   2546:  * => flags:
                   2547:  *      UVM_EXTRACT_REMOVE: remove mappings from srcmap
                   2548:  *      UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
                   2549:  *      UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
                   2550:  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
1.331.2.4  skrll    2551:  *      UVM_EXTRACT_PROT_ALL: set prot to UVM_PROT_ALL as we go
1.1       mrg      2552:  *    >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
                   2553:  *    >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
                   2554:  *             be used from within the kernel in a kernel level map <<<
                   2555:  */
                   2556:
1.10      mrg      2557: int
1.138     enami    2558: uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
                   2559:     struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
1.10      mrg      2560: {
1.163     mycroft  2561:        vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge;
1.99      chs      2562:        struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry,
                   2563:            *deadentry, *oldentry;
1.253     yamt     2564:        struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */
1.325     martin   2565:        vsize_t elen __unused;
1.10      mrg      2566:        int nchain, error, copy_ok;
1.275     yamt     2567:        vsize_t nsize;
1.10      mrg      2568:        UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
1.85      chs      2569:
1.331.2.2  skrll    2570:        UVMHIST_LOG(maphist,"(srcmap=%p,start=%#lx, len=%#lx", srcmap, start,
1.10      mrg      2571:            len,0);
1.331.2.2  skrll    2572:        UVMHIST_LOG(maphist," ...,dstmap=%p, flags=%#x)", dstmap,flags,0,0);
1.10      mrg      2573:
                   2574:        /*
                   2575:         * step 0: sanity check: start must be on a page boundary, length
                   2576:         * must be page sized.  can't ask for CONTIG/QREF if you asked for
                   2577:         * REMOVE.
                   2578:         */
                   2579:
1.85      chs      2580:        KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
                   2581:        KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
                   2582:                (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
1.10      mrg      2583:
                   2584:        /*
                   2585:         * step 1: reserve space in the target map for the extracted area
                   2586:         */
                   2587:
1.210     yamt     2588:        if ((flags & UVM_EXTRACT_RESERVED) == 0) {
                   2589:                dstaddr = vm_map_min(dstmap);
1.331.2.2  skrll    2590:                if (!uvm_map_reserve(dstmap, len, start,
1.324     matt     2591:                    atop(start) & uvmexp.colormask, &dstaddr,
                   2592:                    UVM_FLAG_COLORMATCH))
1.210     yamt     2593:                        return (ENOMEM);
1.324     matt     2594:                KASSERT((atop(start ^ dstaddr) & uvmexp.colormask) == 0);
1.210     yamt     2595:                *dstaddrp = dstaddr;    /* pass address back to caller */
1.331.2.2  skrll    2596:                UVMHIST_LOG(maphist, "  dstaddr=%#lx", dstaddr,0,0,0);
1.210     yamt     2597:        } else {
                   2598:                dstaddr = *dstaddrp;
                   2599:        }
1.10      mrg      2600:
                   2601:        /*
1.98      chs      2602:         * step 2: setup for the extraction process loop by init'ing the
1.10      mrg      2603:         * map entry chain, locking src map, and looking up the first useful
                   2604:         * entry in the map.
                   2605:         */
1.1       mrg      2606:
1.10      mrg      2607:        end = start + len;
                   2608:        newend = dstaddr + len;
                   2609:        chain = endchain = NULL;
                   2610:        nchain = 0;
1.275     yamt     2611:        nsize = 0;
1.10      mrg      2612:        vm_map_lock(srcmap);
                   2613:
                   2614:        if (uvm_map_lookup_entry(srcmap, start, &entry)) {
                   2615:
                   2616:                /* "start" is within an entry */
                   2617:                if (flags & UVM_EXTRACT_QREF) {
1.85      chs      2618:
1.10      mrg      2619:                        /*
                   2620:                         * for quick references we don't clip the entry, so
                   2621:                         * the entry may map space "before" the starting
                   2622:                         * virtual address... this is the "fudge" factor
                   2623:                         * (which can be non-zero only the first time
                   2624:                         * through the "while" loop in step 3).
                   2625:                         */
1.85      chs      2626:
1.10      mrg      2627:                        fudge = start - entry->start;
                   2628:                } else {
1.85      chs      2629:
1.10      mrg      2630:                        /*
                   2631:                         * normal reference: we clip the map to fit (thus
                   2632:                         * fudge is zero)
                   2633:                         */
1.85      chs      2634:
1.311     para     2635:                        UVM_MAP_CLIP_START(srcmap, entry, start);
1.82      thorpej  2636:                        SAVE_HINT(srcmap, srcmap->hint, entry->prev);
1.10      mrg      2637:                        fudge = 0;
                   2638:                }
1.85      chs      2639:        } else {
1.1       mrg      2640:
1.10      mrg      2641:                /* "start" is not within an entry ... skip to next entry */
                   2642:                if (flags & UVM_EXTRACT_CONTIG) {
                   2643:                        error = EINVAL;
                   2644:                        goto bad;    /* definite hole here ... */
                   2645:                }
1.1       mrg      2646:
1.10      mrg      2647:                entry = entry->next;
                   2648:                fudge = 0;
                   2649:        }
1.85      chs      2650:
1.10      mrg      2651:        /* save values from srcmap for step 6 */
                   2652:        orig_entry = entry;
                   2653:        orig_fudge = fudge;
1.1       mrg      2654:
1.10      mrg      2655:        /*
                   2656:         * step 3: now start looping through the map entries, extracting
                   2657:         * as we go.
                   2658:         */
1.1       mrg      2659:
1.10      mrg      2660:        while (entry->start < end && entry != &srcmap->header) {
1.85      chs      2661:
1.10      mrg      2662:                /* if we are not doing a quick reference, clip it */
                   2663:                if ((flags & UVM_EXTRACT_QREF) == 0)
1.311     para     2664:                        UVM_MAP_CLIP_END(srcmap, entry, end);
1.10      mrg      2665:
                   2666:                /* clear needs_copy (allow chunking) */
                   2667:                if (UVM_ET_ISNEEDSCOPY(entry)) {
1.212     yamt     2668:                        amap_copy(srcmap, entry,
                   2669:                            AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end);
1.10      mrg      2670:                        if (UVM_ET_ISNEEDSCOPY(entry)) {  /* failed? */
                   2671:                                error = ENOMEM;
                   2672:                                goto bad;
                   2673:                        }
1.85      chs      2674:
1.10      mrg      2675:                        /* amap_copy could clip (during chunk)!  update fudge */
                   2676:                        if (fudge) {
1.163     mycroft  2677:                                fudge = start - entry->start;
1.10      mrg      2678:                                orig_fudge = fudge;
                   2679:                        }
                   2680:                }
1.1       mrg      2681:
1.10      mrg      2682:                /* calculate the offset of this from "start" */
                   2683:                oldoffset = (entry->start + fudge) - start;
1.1       mrg      2684:
1.10      mrg      2685:                /* allocate a new map entry */
1.126     bouyer   2686:                newentry = uvm_mapent_alloc(dstmap, 0);
1.10      mrg      2687:                if (newentry == NULL) {
                   2688:                        error = ENOMEM;
                   2689:                        goto bad;
                   2690:                }
                   2691:
                   2692:                /* set up new map entry */
                   2693:                newentry->next = NULL;
                   2694:                newentry->prev = endchain;
                   2695:                newentry->start = dstaddr + oldoffset;
                   2696:                newentry->end =
                   2697:                    newentry->start + (entry->end - (entry->start + fudge));
1.37      chs      2698:                if (newentry->end > newend || newentry->end < newentry->start)
1.10      mrg      2699:                        newentry->end = newend;
                   2700:                newentry->object.uvm_obj = entry->object.uvm_obj;
                   2701:                if (newentry->object.uvm_obj) {
                   2702:                        if (newentry->object.uvm_obj->pgops->pgo_reference)
                   2703:                                newentry->object.uvm_obj->pgops->
                   2704:                                    pgo_reference(newentry->object.uvm_obj);
                   2705:                                newentry->offset = entry->offset + fudge;
                   2706:                } else {
                   2707:                        newentry->offset = 0;
                   2708:                }
                   2709:                newentry->etype = entry->etype;
1.331.2.4  skrll    2710:                if (flags & UVM_EXTRACT_PROT_ALL) {
                   2711:                        newentry->protection = newentry->max_protection =
                   2712:                            UVM_PROT_ALL;
                   2713:                } else {
                   2714:                        newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
                   2715:                            entry->max_protection : entry->protection;
                   2716:                        newentry->max_protection = entry->max_protection;
                   2717:                }
1.10      mrg      2718:                newentry->inheritance = entry->inheritance;
                   2719:                newentry->wired_count = 0;
                   2720:                newentry->aref.ar_amap = entry->aref.ar_amap;
                   2721:                if (newentry->aref.ar_amap) {
1.34      chuck    2722:                        newentry->aref.ar_pageoff =
                   2723:                            entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
1.85      chs      2724:                        uvm_map_reference_amap(newentry, AMAP_SHARED |
1.10      mrg      2725:                            ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
                   2726:                } else {
1.34      chuck    2727:                        newentry->aref.ar_pageoff = 0;
1.10      mrg      2728:                }
                   2729:                newentry->advice = entry->advice;
1.245     yamt     2730:                if ((flags & UVM_EXTRACT_QREF) != 0) {
                   2731:                        newentry->flags |= UVM_MAP_NOMERGE;
                   2732:                }
1.10      mrg      2733:
                   2734:                /* now link it on the chain */
                   2735:                nchain++;
1.275     yamt     2736:                nsize += newentry->end - newentry->start;
1.10      mrg      2737:                if (endchain == NULL) {
                   2738:                        chain = endchain = newentry;
                   2739:                } else {
                   2740:                        endchain->next = newentry;
                   2741:                        endchain = newentry;
                   2742:                }
                   2743:
                   2744:                /* end of 'while' loop! */
1.98      chs      2745:                if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
1.10      mrg      2746:                    (entry->next == &srcmap->header ||
                   2747:                    entry->next->start != entry->end)) {
                   2748:                        error = EINVAL;
                   2749:                        goto bad;
                   2750:                }
                   2751:                entry = entry->next;
                   2752:                fudge = 0;
                   2753:        }
                   2754:
                   2755:        /*
                   2756:         * step 4: close off chain (in format expected by uvm_map_replace)
                   2757:         */
                   2758:
                   2759:        if (chain)
                   2760:                chain->prev = endchain;
                   2761:
                   2762:        /*
                   2763:         * step 5: attempt to lock the dest map so we can pmap_copy.
1.98      chs      2764:         * note usage of copy_ok:
1.10      mrg      2765:         *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
                   2766:         *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
                   2767:         */
1.85      chs      2768:
1.234     thorpej  2769:        if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) {
1.10      mrg      2770:                copy_ok = 1;
                   2771:                if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
1.275     yamt     2772:                    nchain, nsize, &resentry)) {
1.10      mrg      2773:                        if (srcmap != dstmap)
                   2774:                                vm_map_unlock(dstmap);
                   2775:                        error = EIO;
                   2776:                        goto bad;
                   2777:                }
                   2778:        } else {
                   2779:                copy_ok = 0;
                   2780:                /* replace defered until step 7 */
                   2781:        }
                   2782:
                   2783:        /*
                   2784:         * step 6: traverse the srcmap a second time to do the following:
                   2785:         *  - if we got a lock on the dstmap do pmap_copy
                   2786:         *  - if UVM_EXTRACT_REMOVE remove the entries
                   2787:         * we make use of orig_entry and orig_fudge (saved in step 2)
                   2788:         */
                   2789:
                   2790:        if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
                   2791:
                   2792:                /* purge possible stale hints from srcmap */
                   2793:                if (flags & UVM_EXTRACT_REMOVE) {
1.82      thorpej  2794:                        SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
1.220     yamt     2795:                        if (srcmap->first_free != &srcmap->header &&
                   2796:                            srcmap->first_free->start >= start)
1.10      mrg      2797:                                srcmap->first_free = orig_entry->prev;
                   2798:                }
                   2799:
                   2800:                entry = orig_entry;
                   2801:                fudge = orig_fudge;
                   2802:                deadentry = NULL;       /* for UVM_EXTRACT_REMOVE */
                   2803:
                   2804:                while (entry->start < end && entry != &srcmap->header) {
                   2805:                        if (copy_ok) {
1.74      thorpej  2806:                                oldoffset = (entry->start + fudge) - start;
1.90      chs      2807:                                elen = MIN(end, entry->end) -
1.74      thorpej  2808:                                    (entry->start + fudge);
                   2809:                                pmap_copy(dstmap->pmap, srcmap->pmap,
                   2810:                                    dstaddr + oldoffset, elen,
                   2811:                                    entry->start + fudge);
1.10      mrg      2812:                        }
                   2813:
1.74      thorpej  2814:                        /* we advance "entry" in the following if statement */
1.10      mrg      2815:                        if (flags & UVM_EXTRACT_REMOVE) {
1.298     rmind    2816:                                uvm_map_lock_entry(entry);
1.98      chs      2817:                                pmap_remove(srcmap->pmap, entry->start,
1.20      chuck    2818:                                                entry->end);
1.298     rmind    2819:                                uvm_map_unlock_entry(entry);
1.139     enami    2820:                                oldentry = entry;       /* save entry */
                   2821:                                entry = entry->next;    /* advance */
1.20      chuck    2822:                                uvm_map_entry_unlink(srcmap, oldentry);
                   2823:                                                        /* add to dead list */
                   2824:                                oldentry->next = deadentry;
                   2825:                                deadentry = oldentry;
1.139     enami    2826:                        } else {
                   2827:                                entry = entry->next;            /* advance */
1.10      mrg      2828:                        }
                   2829:
                   2830:                        /* end of 'while' loop */
                   2831:                        fudge = 0;
                   2832:                }
1.105     chris    2833:                pmap_update(srcmap->pmap);
1.10      mrg      2834:
                   2835:                /*
                   2836:                 * unlock dstmap.  we will dispose of deadentry in
                   2837:                 * step 7 if needed
                   2838:                 */
1.85      chs      2839:
1.10      mrg      2840:                if (copy_ok && srcmap != dstmap)
                   2841:                        vm_map_unlock(dstmap);
                   2842:
1.99      chs      2843:        } else {
                   2844:                deadentry = NULL;
1.10      mrg      2845:        }
                   2846:
                   2847:        /*
                   2848:         * step 7: we are done with the source map, unlock.   if copy_ok
                   2849:         * is 0 then we have not replaced the dummy mapping in dstmap yet
                   2850:         * and we need to do so now.
                   2851:         */
                   2852:
                   2853:        vm_map_unlock(srcmap);
                   2854:        if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
                   2855:                uvm_unmap_detach(deadentry, 0);   /* dispose of old entries */
                   2856:
                   2857:        /* now do the replacement if we didn't do it in step 5 */
                   2858:        if (copy_ok == 0) {
                   2859:                vm_map_lock(dstmap);
                   2860:                error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
1.275     yamt     2861:                    nchain, nsize, &resentry);
1.10      mrg      2862:                vm_map_unlock(dstmap);
                   2863:
1.234     thorpej  2864:                if (error == false) {
1.10      mrg      2865:                        error = EIO;
                   2866:                        goto bad2;
                   2867:                }
                   2868:        }
1.144     yamt     2869:
1.253     yamt     2870:        if (resentry != NULL)
                   2871:                uvm_mapent_free(resentry);
                   2872:
1.139     enami    2873:        return (0);
1.10      mrg      2874:
                   2875:        /*
                   2876:         * bad: failure recovery
                   2877:         */
                   2878: bad:
                   2879:        vm_map_unlock(srcmap);
                   2880: bad2:                  /* src already unlocked */
                   2881:        if (chain)
                   2882:                uvm_unmap_detach(chain,
                   2883:                    (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
1.144     yamt     2884:
1.253     yamt     2885:        if (resentry != NULL)
                   2886:                uvm_mapent_free(resentry);
                   2887:
1.210     yamt     2888:        if ((flags & UVM_EXTRACT_RESERVED) == 0) {
                   2889:                uvm_unmap(dstmap, dstaddr, dstaddr+len);   /* ??? */
                   2890:        }
1.139     enami    2891:        return (error);
1.10      mrg      2892: }
                   2893:
                   2894: /* end of extraction functions */
1.1       mrg      2895:
                   2896: /*
                   2897:  * uvm_map_submap: punch down part of a map into a submap
                   2898:  *
                   2899:  * => only the kernel_map is allowed to be submapped
                   2900:  * => the purpose of submapping is to break up the locking granularity
                   2901:  *     of a larger map
                   2902:  * => the range specified must have been mapped previously with a uvm_map()
                   2903:  *     call [with uobj==NULL] to create a blank map entry in the main map.
                   2904:  *     [And it had better still be blank!]
                   2905:  * => maps which contain submaps should never be copied or forked.
1.98      chs      2906:  * => to remove a submap, use uvm_unmap() on the main map
1.1       mrg      2907:  *     and then uvm_map_deallocate() the submap.
                   2908:  * => main map must be unlocked.
                   2909:  * => submap must have been init'd and have a zero reference count.
                   2910:  *     [need not be locked as we don't actually reference it]
                   2911:  */
1.85      chs      2912:
1.10      mrg      2913: int
1.138     enami    2914: uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
                   2915:     struct vm_map *submap)
1.10      mrg      2916: {
1.99      chs      2917:        struct vm_map_entry *entry;
1.94      chs      2918:        int error;
1.1       mrg      2919:
1.10      mrg      2920:        vm_map_lock(map);
1.85      chs      2921:        VM_MAP_RANGE_CHECK(map, start, end);
1.1       mrg      2922:
1.10      mrg      2923:        if (uvm_map_lookup_entry(map, start, &entry)) {
1.311     para     2924:                UVM_MAP_CLIP_START(map, entry, start);
                   2925:                UVM_MAP_CLIP_END(map, entry, end);      /* to be safe */
1.94      chs      2926:        } else {
1.10      mrg      2927:                entry = NULL;
                   2928:        }
1.1       mrg      2929:
1.98      chs      2930:        if (entry != NULL &&
1.10      mrg      2931:            entry->start == start && entry->end == end &&
                   2932:            entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
                   2933:            !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
1.29      chuck    2934:                entry->etype |= UVM_ET_SUBMAP;
1.10      mrg      2935:                entry->object.sub_map = submap;
                   2936:                entry->offset = 0;
                   2937:                uvm_map_reference(submap);
1.94      chs      2938:                error = 0;
1.10      mrg      2939:        } else {
1.94      chs      2940:                error = EINVAL;
1.10      mrg      2941:        }
                   2942:        vm_map_unlock(map);
1.174     yamt     2943:
1.94      chs      2944:        return error;
1.1       mrg      2945: }
                   2946:
1.175     yamt     2947: /*
1.1       mrg      2948:  * uvm_map_protect: change map protection
                   2949:  *
                   2950:  * => set_max means set max_protection.
                   2951:  * => map must be unlocked.
                   2952:  */
                   2953:
1.139     enami    2954: #define MASK(entry)    (UVM_ET_ISCOPYONWRITE(entry) ? \
1.36      mycroft  2955:                         ~VM_PROT_WRITE : VM_PROT_ALL)
1.1       mrg      2956:
1.10      mrg      2957: int
1.138     enami    2958: uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
1.233     thorpej  2959:     vm_prot_t new_prot, bool set_max)
1.10      mrg      2960: {
1.99      chs      2961:        struct vm_map_entry *current, *entry;
1.94      chs      2962:        int error = 0;
1.10      mrg      2963:        UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
1.331.2.2  skrll    2964:        UVMHIST_LOG(maphist,"(map=%p,start=%#lx,end=%#lx,new_prot=%#x)",
1.85      chs      2965:                    map, start, end, new_prot);
                   2966:
1.10      mrg      2967:        vm_map_lock(map);
                   2968:        VM_MAP_RANGE_CHECK(map, start, end);
                   2969:        if (uvm_map_lookup_entry(map, start, &entry)) {
1.311     para     2970:                UVM_MAP_CLIP_START(map, entry, start);
1.10      mrg      2971:        } else {
                   2972:                entry = entry->next;
                   2973:        }
                   2974:
1.1       mrg      2975:        /*
1.10      mrg      2976:         * make a first pass to check for protection violations.
1.1       mrg      2977:         */
                   2978:
1.10      mrg      2979:        current = entry;
                   2980:        while ((current != &map->header) && (current->start < end)) {
1.65      thorpej  2981:                if (UVM_ET_ISSUBMAP(current)) {
1.94      chs      2982:                        error = EINVAL;
1.65      thorpej  2983:                        goto out;
                   2984:                }
1.10      mrg      2985:                if ((new_prot & current->max_protection) != new_prot) {
1.94      chs      2986:                        error = EACCES;
1.65      thorpej  2987:                        goto out;
1.112     thorpej  2988:                }
                   2989:                /*
                   2990:                 * Don't allow VM_PROT_EXECUTE to be set on entries that
                   2991:                 * point to vnodes that are associated with a NOEXEC file
                   2992:                 * system.
                   2993:                 */
                   2994:                if (UVM_ET_ISOBJ(current) &&
                   2995:                    UVM_OBJ_IS_VNODE(current->object.uvm_obj)) {
                   2996:                        struct vnode *vp =
                   2997:                            (struct vnode *) current->object.uvm_obj;
                   2998:
                   2999:                        if ((new_prot & VM_PROT_EXECUTE) != 0 &&
                   3000:                            (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) {
                   3001:                                error = EACCES;
                   3002:                                goto out;
                   3003:                        }
1.10      mrg      3004:                }
1.224     elad     3005:
1.65      thorpej  3006:                current = current->next;
1.10      mrg      3007:        }
                   3008:
                   3009:        /* go back and fix up protections (no need to clip this time). */
                   3010:
                   3011:        current = entry;
                   3012:        while ((current != &map->header) && (current->start < end)) {
                   3013:                vm_prot_t old_prot;
1.85      chs      3014:
1.311     para     3015:                UVM_MAP_CLIP_END(map, current, end);
1.10      mrg      3016:                old_prot = current->protection;
                   3017:                if (set_max)
                   3018:                        current->protection =
                   3019:                            (current->max_protection = new_prot) & old_prot;
                   3020:                else
                   3021:                        current->protection = new_prot;
                   3022:
                   3023:                /*
1.98      chs      3024:                 * update physical map if necessary.  worry about copy-on-write
1.10      mrg      3025:                 * here -- CHECK THIS XXX
                   3026:                 */
                   3027:
                   3028:                if (current->protection != old_prot) {
1.29      chuck    3029:                        /* update pmap! */
1.298     rmind    3030:                        uvm_map_lock_entry(current);
1.29      chuck    3031:                        pmap_protect(map->pmap, current->start, current->end,
                   3032:                            current->protection & MASK(entry));
1.298     rmind    3033:                        uvm_map_unlock_entry(current);
1.109     thorpej  3034:
                   3035:                        /*
                   3036:                         * If this entry points at a vnode, and the
                   3037:                         * protection includes VM_PROT_EXECUTE, mark
1.111     thorpej  3038:                         * the vnode as VEXECMAP.
1.109     thorpej  3039:                         */
                   3040:                        if (UVM_ET_ISOBJ(current)) {
                   3041:                                struct uvm_object *uobj =
                   3042:                                    current->object.uvm_obj;
                   3043:
                   3044:                                if (UVM_OBJ_IS_VNODE(uobj) &&
1.241     ad       3045:                                    (current->protection & VM_PROT_EXECUTE)) {
1.110     thorpej  3046:                                        vn_markexec((struct vnode *) uobj);
1.241     ad       3047:                                }
1.109     thorpej  3048:                        }
1.65      thorpej  3049:                }
1.10      mrg      3050:
1.65      thorpej  3051:                /*
                   3052:                 * If the map is configured to lock any future mappings,
                   3053:                 * wire this entry now if the old protection was VM_PROT_NONE
                   3054:                 * and the new protection is not VM_PROT_NONE.
                   3055:                 */
                   3056:
                   3057:                if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
                   3058:                    VM_MAPENT_ISWIRED(entry) == 0 &&
                   3059:                    old_prot == VM_PROT_NONE &&
                   3060:                    new_prot != VM_PROT_NONE) {
                   3061:                        if (uvm_map_pageable(map, entry->start,
1.234     thorpej  3062:                            entry->end, false,
1.94      chs      3063:                            UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
1.99      chs      3064:
1.65      thorpej  3065:                                /*
                   3066:                                 * If locking the entry fails, remember the
                   3067:                                 * error if it's the first one.  Note we
                   3068:                                 * still continue setting the protection in
1.94      chs      3069:                                 * the map, but will return the error
                   3070:                                 * condition regardless.
1.65      thorpej  3071:                                 *
                   3072:                                 * XXX Ignore what the actual error is,
                   3073:                                 * XXX just call it a resource shortage
                   3074:                                 * XXX so that it doesn't get confused
                   3075:                                 * XXX what uvm_map_protect() itself would
                   3076:                                 * XXX normally return.
                   3077:                                 */
1.99      chs      3078:
1.94      chs      3079:                                error = ENOMEM;
1.65      thorpej  3080:                        }
1.10      mrg      3081:                }
                   3082:                current = current->next;
                   3083:        }
1.105     chris    3084:        pmap_update(map->pmap);
1.85      chs      3085:
1.65      thorpej  3086:  out:
1.10      mrg      3087:        vm_map_unlock(map);
1.174     yamt     3088:
1.94      chs      3089:        UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0);
                   3090:        return error;
1.1       mrg      3091: }
                   3092:
                   3093: #undef  MASK
                   3094:
1.98      chs      3095: /*
1.1       mrg      3096:  * uvm_map_inherit: set inheritance code for range of addrs in map.
                   3097:  *
                   3098:  * => map must be unlocked
                   3099:  * => note that the inherit code is used during a "fork".  see fork
                   3100:  *     code for details.
                   3101:  */
                   3102:
1.10      mrg      3103: int
1.138     enami    3104: uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
                   3105:     vm_inherit_t new_inheritance)
1.10      mrg      3106: {
1.99      chs      3107:        struct vm_map_entry *entry, *temp_entry;
1.10      mrg      3108:        UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
1.331.2.2  skrll    3109:        UVMHIST_LOG(maphist,"(map=%p,start=%#lx,end=%#lx,new_inh=%#x)",
1.10      mrg      3110:            map, start, end, new_inheritance);
                   3111:
                   3112:        switch (new_inheritance) {
1.80      wiz      3113:        case MAP_INHERIT_NONE:
                   3114:        case MAP_INHERIT_COPY:
                   3115:        case MAP_INHERIT_SHARE:
1.330     christos 3116:        case MAP_INHERIT_ZERO:
1.10      mrg      3117:                break;
                   3118:        default:
                   3119:                UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
1.94      chs      3120:                return EINVAL;
1.10      mrg      3121:        }
1.1       mrg      3122:
1.10      mrg      3123:        vm_map_lock(map);
                   3124:        VM_MAP_RANGE_CHECK(map, start, end);
                   3125:        if (uvm_map_lookup_entry(map, start, &temp_entry)) {
                   3126:                entry = temp_entry;
1.311     para     3127:                UVM_MAP_CLIP_START(map, entry, start);
1.10      mrg      3128:        }  else {
                   3129:                entry = temp_entry->next;
                   3130:        }
                   3131:        while ((entry != &map->header) && (entry->start < end)) {
1.311     para     3132:                UVM_MAP_CLIP_END(map, entry, end);
1.10      mrg      3133:                entry->inheritance = new_inheritance;
                   3134:                entry = entry->next;
                   3135:        }
                   3136:        vm_map_unlock(map);
                   3137:        UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
1.94      chs      3138:        return 0;
1.41      mrg      3139: }
                   3140:
1.98      chs      3141: /*
1.41      mrg      3142:  * uvm_map_advice: set advice code for range of addrs in map.
                   3143:  *
                   3144:  * => map must be unlocked
                   3145:  */
                   3146:
                   3147: int
1.138     enami    3148: uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
1.41      mrg      3149: {
1.99      chs      3150:        struct vm_map_entry *entry, *temp_entry;
1.41      mrg      3151:        UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
1.331.2.2  skrll    3152:        UVMHIST_LOG(maphist,"(map=%p,start=%#lx,end=%#lx,new_adv=%#x)",
1.41      mrg      3153:            map, start, end, new_advice);
                   3154:
                   3155:        vm_map_lock(map);
                   3156:        VM_MAP_RANGE_CHECK(map, start, end);
                   3157:        if (uvm_map_lookup_entry(map, start, &temp_entry)) {
                   3158:                entry = temp_entry;
1.311     para     3159:                UVM_MAP_CLIP_START(map, entry, start);
1.41      mrg      3160:        } else {
                   3161:                entry = temp_entry->next;
                   3162:        }
1.61      thorpej  3163:
                   3164:        /*
                   3165:         * XXXJRT: disallow holes?
                   3166:         */
                   3167:
1.41      mrg      3168:        while ((entry != &map->header) && (entry->start < end)) {
1.311     para     3169:                UVM_MAP_CLIP_END(map, entry, end);
1.41      mrg      3170:
                   3171:                switch (new_advice) {
                   3172:                case MADV_NORMAL:
                   3173:                case MADV_RANDOM:
                   3174:                case MADV_SEQUENTIAL:
                   3175:                        /* nothing special here */
                   3176:                        break;
                   3177:
                   3178:                default:
1.50      mrg      3179:                        vm_map_unlock(map);
1.41      mrg      3180:                        UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
1.94      chs      3181:                        return EINVAL;
1.41      mrg      3182:                }
                   3183:                entry->advice = new_advice;
                   3184:                entry = entry->next;
                   3185:        }
                   3186:
                   3187:        vm_map_unlock(map);
                   3188:        UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
1.94      chs      3189:        return 0;
1.1       mrg      3190: }
                   3191:
                   3192: /*
1.271     yamt     3193:  * uvm_map_willneed: apply MADV_WILLNEED
                   3194:  */
                   3195:
                   3196: int
                   3197: uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end)
                   3198: {
                   3199:        struct vm_map_entry *entry;
                   3200:        UVMHIST_FUNC("uvm_map_willneed"); UVMHIST_CALLED(maphist);
1.331.2.2  skrll    3201:        UVMHIST_LOG(maphist,"(map=%p,start=%#lx,end=%#lx)",
1.271     yamt     3202:            map, start, end, 0);
                   3203:
                   3204:        vm_map_lock_read(map);
                   3205:        VM_MAP_RANGE_CHECK(map, start, end);
                   3206:        if (!uvm_map_lookup_entry(map, start, &entry)) {
                   3207:                entry = entry->next;
                   3208:        }
                   3209:        while (entry->start < end) {
                   3210:                struct vm_amap * const amap = entry->aref.ar_amap;
                   3211:                struct uvm_object * const uobj = entry->object.uvm_obj;
                   3212:
                   3213:                KASSERT(entry != &map->header);
                   3214:                KASSERT(start < entry->end);
                   3215:                /*
1.296     yamt     3216:                 * For now, we handle only the easy but commonly-requested case.
                   3217:                 * ie. start prefetching of backing uobj pages.
1.271     yamt     3218:                 *
1.296     yamt     3219:                 * XXX It might be useful to pmap_enter() the already-in-core
                   3220:                 * pages by inventing a "weak" mode for uvm_fault() which would
                   3221:                 * only do the PGO_LOCKED pgo_get().
1.271     yamt     3222:                 */
                   3223:                if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) {
                   3224:                        off_t offset;
                   3225:                        off_t size;
                   3226:
                   3227:                        offset = entry->offset;
                   3228:                        if (start < entry->start) {
                   3229:                                offset += entry->start - start;
                   3230:                        }
                   3231:                        size = entry->offset + (entry->end - entry->start);
                   3232:                        if (entry->end < end) {
                   3233:                                size -= end - entry->end;
                   3234:                        }
                   3235:                        uvm_readahead(uobj, offset, size);
                   3236:                }
                   3237:                entry = entry->next;
                   3238:        }
                   3239:        vm_map_unlock_read(map);
                   3240:        UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
                   3241:        return 0;
                   3242: }
                   3243:
                   3244: /*
1.1       mrg      3245:  * uvm_map_pageable: sets the pageability of a range in a map.
                   3246:  *
1.56      thorpej  3247:  * => wires map entries.  should not be used for transient page locking.
                   3248:  *     for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
1.216     drochner 3249:  * => regions specified as not pageable require lock-down (wired) memory
1.1       mrg      3250:  *     and page tables.
1.59      thorpej  3251:  * => map must never be read-locked
1.234     thorpej  3252:  * => if islocked is true, map is already write-locked
1.59      thorpej  3253:  * => we always unlock the map, since we must downgrade to a read-lock
                   3254:  *     to call uvm_fault_wire()
1.1       mrg      3255:  * => XXXCDC: check this and try and clean it up.
                   3256:  */
                   3257:
1.19      kleink   3258: int
1.138     enami    3259: uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
1.233     thorpej  3260:     bool new_pageable, int lockflags)
1.1       mrg      3261: {
1.99      chs      3262:        struct vm_map_entry *entry, *start_entry, *failed_entry;
1.10      mrg      3263:        int rv;
1.60      thorpej  3264: #ifdef DIAGNOSTIC
                   3265:        u_int timestamp_save;
                   3266: #endif
1.10      mrg      3267:        UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
1.331.2.2  skrll    3268:        UVMHIST_LOG(maphist,"(map=%p,start=%#lx,end=%#lx,new_pageable=%u)",
1.85      chs      3269:                    map, start, end, new_pageable);
                   3270:        KASSERT(map->flags & VM_MAP_PAGEABLE);
1.45      thorpej  3271:
1.64      thorpej  3272:        if ((lockflags & UVM_LK_ENTER) == 0)
1.59      thorpej  3273:                vm_map_lock(map);
1.10      mrg      3274:        VM_MAP_RANGE_CHECK(map, start, end);
                   3275:
1.98      chs      3276:        /*
1.10      mrg      3277:         * only one pageability change may take place at one time, since
                   3278:         * uvm_fault_wire assumes it will be called only once for each
                   3279:         * wiring/unwiring.  therefore, we have to make sure we're actually
                   3280:         * changing the pageability for the entire region.  we do so before
1.98      chs      3281:         * making any changes.
1.10      mrg      3282:         */
                   3283:
1.234     thorpej  3284:        if (uvm_map_lookup_entry(map, start, &start_entry) == false) {
1.64      thorpej  3285:                if ((lockflags & UVM_LK_EXIT) == 0)
                   3286:                        vm_map_unlock(map);
1.85      chs      3287:
1.94      chs      3288:                UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0);
                   3289:                return EFAULT;
1.10      mrg      3290:        }
                   3291:        entry = start_entry;
                   3292:
1.98      chs      3293:        /*
1.100     wiz      3294:         * handle wiring and unwiring separately.
1.10      mrg      3295:         */
1.1       mrg      3296:
1.56      thorpej  3297:        if (new_pageable) {             /* unwire */
1.311     para     3298:                UVM_MAP_CLIP_START(map, entry, start);
1.85      chs      3299:
1.10      mrg      3300:                /*
                   3301:                 * unwiring.  first ensure that the range to be unwired is
1.98      chs      3302:                 * really wired down and that there are no holes.
1.10      mrg      3303:                 */
1.85      chs      3304:
1.10      mrg      3305:                while ((entry != &map->header) && (entry->start < end)) {
                   3306:                        if (entry->wired_count == 0 ||
                   3307:                            (entry->end < end &&
1.55      thorpej  3308:                             (entry->next == &map->header ||
                   3309:                              entry->next->start > entry->end))) {
1.64      thorpej  3310:                                if ((lockflags & UVM_LK_EXIT) == 0)
                   3311:                                        vm_map_unlock(map);
1.94      chs      3312:                                UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0);
                   3313:                                return EINVAL;
1.10      mrg      3314:                        }
                   3315:                        entry = entry->next;
                   3316:                }
                   3317:
1.98      chs      3318:                /*
1.56      thorpej  3319:                 * POSIX 1003.1b - a single munlock call unlocks a region,
                   3320:                 * regardless of the number of mlock calls made on that
                   3321:                 * region.
1.10      mrg      3322:                 */
1.85      chs      3323:
1.10      mrg      3324:                entry = start_entry;
                   3325:                while ((entry != &map->header) && (entry->start < end)) {
1.311     para     3326:                        UVM_MAP_CLIP_END(map, entry, end);
1.56      thorpej  3327:                        if (VM_MAPENT_ISWIRED(entry))
1.10      mrg      3328:                                uvm_map_entry_unwire(map, entry);
                   3329:                        entry = entry->next;
                   3330:                }
1.64      thorpej  3331:                if ((lockflags & UVM_LK_EXIT) == 0)
                   3332:                        vm_map_unlock(map);
1.10      mrg      3333:                UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
1.94      chs      3334:                return 0;
1.10      mrg      3335:        }
                   3336:
                   3337:        /*
                   3338:         * wire case: in two passes [XXXCDC: ugly block of code here]
                   3339:         *
                   3340:         * 1: holding the write lock, we create any anonymous maps that need
                   3341:         *    to be created.  then we clip each map entry to the region to
1.98      chs      3342:         *    be wired and increment its wiring count.
1.10      mrg      3343:         *
                   3344:         * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
1.56      thorpej  3345:         *    in the pages for any newly wired area (wired_count == 1).
1.10      mrg      3346:         *
                   3347:         *    downgrading to a read lock for uvm_fault_wire avoids a possible
                   3348:         *    deadlock with another thread that may have faulted on one of
                   3349:         *    the pages to be wired (it would mark the page busy, blocking
                   3350:         *    us, then in turn block on the map lock that we hold).  because
                   3351:         *    of problems in the recursive lock package, we cannot upgrade
                   3352:         *    to a write lock in vm_map_lookup.  thus, any actions that
                   3353:         *    require the write lock must be done beforehand.  because we
                   3354:         *    keep the read lock on the map, the copy-on-write status of the
                   3355:         *    entries we modify here cannot change.
                   3356:         */
                   3357:
                   3358:        while ((entry != &map->header) && (entry->start < end)) {
1.55      thorpej  3359:                if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
1.85      chs      3360:
                   3361:                        /*
1.10      mrg      3362:                         * perform actions of vm_map_lookup that need the
                   3363:                         * write lock on the map: create an anonymous map
                   3364:                         * for a copy-on-write region, or an anonymous map
1.29      chuck    3365:                         * for a zero-fill region.  (XXXCDC: submap case
                   3366:                         * ok?)
1.10      mrg      3367:                         */
1.85      chs      3368:
1.29      chuck    3369:                        if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
1.98      chs      3370:                                if (UVM_ET_ISNEEDSCOPY(entry) &&
1.117     chs      3371:                                    ((entry->max_protection & VM_PROT_WRITE) ||
1.54      thorpej  3372:                                     (entry->object.uvm_obj == NULL))) {
1.212     yamt     3373:                                        amap_copy(map, entry, 0, start, end);
1.10      mrg      3374:                                        /* XXXCDC: wait OK? */
                   3375:                                }
                   3376:                        }
1.55      thorpej  3377:                }
1.311     para     3378:                UVM_MAP_CLIP_START(map, entry, start);
                   3379:                UVM_MAP_CLIP_END(map, entry, end);
1.10      mrg      3380:                entry->wired_count++;
                   3381:
                   3382:                /*
1.98      chs      3383:                 * Check for holes
1.10      mrg      3384:                 */
1.85      chs      3385:
1.54      thorpej  3386:                if (entry->protection == VM_PROT_NONE ||
                   3387:                    (entry->end < end &&
                   3388:                     (entry->next == &map->header ||
                   3389:                      entry->next->start > entry->end))) {
1.85      chs      3390:
1.10      mrg      3391:                        /*
                   3392:                         * found one.  amap creation actions do not need to
1.98      chs      3393:                         * be undone, but the wired counts need to be restored.
1.10      mrg      3394:                         */
1.85      chs      3395:
1.10      mrg      3396:                        while (entry != &map->header && entry->end > start) {
                   3397:                                entry->wired_count--;
                   3398:                                entry = entry->prev;
                   3399:                        }
1.64      thorpej  3400:                        if ((lockflags & UVM_LK_EXIT) == 0)
                   3401:                                vm_map_unlock(map);
1.10      mrg      3402:                        UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
1.94      chs      3403:                        return EINVAL;
1.10      mrg      3404:                }
                   3405:                entry = entry->next;
                   3406:        }
                   3407:
                   3408:        /*
                   3409:         * Pass 2.
                   3410:         */
1.51      thorpej  3411:
1.60      thorpej  3412: #ifdef DIAGNOSTIC
                   3413:        timestamp_save = map->timestamp;
                   3414: #endif
                   3415:        vm_map_busy(map);
1.249     yamt     3416:        vm_map_unlock(map);
1.10      mrg      3417:
                   3418:        rv = 0;
                   3419:        entry = start_entry;
                   3420:        while (entry != &map->header && entry->start < end) {
1.51      thorpej  3421:                if (entry->wired_count == 1) {
1.44      thorpej  3422:                        rv = uvm_fault_wire(map, entry->start, entry->end,
1.216     drochner 3423:                            entry->max_protection, 1);
1.10      mrg      3424:                        if (rv) {
1.94      chs      3425:
1.51      thorpej  3426:                                /*
                   3427:                                 * wiring failed.  break out of the loop.
                   3428:                                 * we'll clean up the map below, once we
                   3429:                                 * have a write lock again.
                   3430:                                 */
1.94      chs      3431:
1.51      thorpej  3432:                                break;
1.10      mrg      3433:                        }
                   3434:                }
                   3435:                entry = entry->next;
                   3436:        }
                   3437:
1.139     enami    3438:        if (rv) {       /* failed? */
1.85      chs      3439:
1.52      thorpej  3440:                /*
                   3441:                 * Get back to an exclusive (write) lock.
                   3442:                 */
1.85      chs      3443:
1.249     yamt     3444:                vm_map_lock(map);
1.60      thorpej  3445:                vm_map_unbusy(map);
                   3446:
                   3447: #ifdef DIAGNOSTIC
1.252     yamt     3448:                if (timestamp_save + 1 != map->timestamp)
1.60      thorpej  3449:                        panic("uvm_map_pageable: stale map");
                   3450: #endif
1.10      mrg      3451:
1.51      thorpej  3452:                /*
                   3453:                 * first drop the wiring count on all the entries
                   3454:                 * which haven't actually been wired yet.
                   3455:                 */
1.85      chs      3456:
1.54      thorpej  3457:                failed_entry = entry;
                   3458:                while (entry != &map->header && entry->start < end) {
1.51      thorpej  3459:                        entry->wired_count--;
1.54      thorpej  3460:                        entry = entry->next;
                   3461:                }
1.51      thorpej  3462:
                   3463:                /*
1.54      thorpej  3464:                 * now, unwire all the entries that were successfully
                   3465:                 * wired above.
1.51      thorpej  3466:                 */
1.85      chs      3467:
1.54      thorpej  3468:                entry = start_entry;
                   3469:                while (entry != failed_entry) {
                   3470:                        entry->wired_count--;
1.55      thorpej  3471:                        if (VM_MAPENT_ISWIRED(entry) == 0)
1.54      thorpej  3472:                                uvm_map_entry_unwire(map, entry);
                   3473:                        entry = entry->next;
                   3474:                }
1.64      thorpej  3475:                if ((lockflags & UVM_LK_EXIT) == 0)
                   3476:                        vm_map_unlock(map);
1.10      mrg      3477:                UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
1.139     enami    3478:                return (rv);
1.10      mrg      3479:        }
1.51      thorpej  3480:
1.64      thorpej  3481:        if ((lockflags & UVM_LK_EXIT) == 0) {
                   3482:                vm_map_unbusy(map);
                   3483:        } else {
1.85      chs      3484:
1.64      thorpej  3485:                /*
                   3486:                 * Get back to an exclusive (write) lock.
                   3487:                 */
1.85      chs      3488:
1.249     yamt     3489:                vm_map_lock(map);
1.64      thorpej  3490:                vm_map_unbusy(map);
                   3491:        }
                   3492:
1.10      mrg      3493:        UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
1.94      chs      3494:        return 0;
1.1       mrg      3495: }
                   3496:
                   3497: /*
1.54      thorpej  3498:  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
                   3499:  * all mapped regions.
                   3500:  *
                   3501:  * => map must not be locked.
                   3502:  * => if no flags are specified, all regions are unwired.
                   3503:  * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
                   3504:  */
                   3505:
                   3506: int
1.138     enami    3507: uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
1.54      thorpej  3508: {
1.99      chs      3509:        struct vm_map_entry *entry, *failed_entry;
1.54      thorpej  3510:        vsize_t size;
                   3511:        int rv;
1.60      thorpej  3512: #ifdef DIAGNOSTIC
                   3513:        u_int timestamp_save;
                   3514: #endif
1.54      thorpej  3515:        UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist);
1.331.2.2  skrll    3516:        UVMHIST_LOG(maphist,"(map=%p,flags=%#x)", map, flags, 0, 0);
1.54      thorpej  3517:
1.85      chs      3518:        KASSERT(map->flags & VM_MAP_PAGEABLE);
1.54      thorpej  3519:
                   3520:        vm_map_lock(map);
                   3521:
                   3522:        /*
                   3523:         * handle wiring and unwiring separately.
                   3524:         */
                   3525:
                   3526:        if (flags == 0) {                       /* unwire */
1.99      chs      3527:
1.54      thorpej  3528:                /*
1.56      thorpej  3529:                 * POSIX 1003.1b -- munlockall unlocks all regions,
                   3530:                 * regardless of how many times mlockall has been called.
1.54      thorpej  3531:                 */
1.99      chs      3532:
1.54      thorpej  3533:                for (entry = map->header.next; entry != &map->header;
                   3534:                     entry = entry->next) {
1.56      thorpej  3535:                        if (VM_MAPENT_ISWIRED(entry))
                   3536:                                uvm_map_entry_unwire(map, entry);
1.54      thorpej  3537:                }
1.238     ad       3538:                map->flags &= ~VM_MAP_WIREFUTURE;
1.54      thorpej  3539:                vm_map_unlock(map);
                   3540:                UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
1.94      chs      3541:                return 0;
1.54      thorpej  3542:        }
                   3543:
                   3544:        if (flags & MCL_FUTURE) {
1.99      chs      3545:
1.54      thorpej  3546:                /*
                   3547:                 * must wire all future mappings; remember this.
                   3548:                 */
1.99      chs      3549:
1.238     ad       3550:                map->flags |= VM_MAP_WIREFUTURE;
1.54      thorpej  3551:        }
                   3552:
                   3553:        if ((flags & MCL_CURRENT) == 0) {
1.99      chs      3554:
1.54      thorpej  3555:                /*
                   3556:                 * no more work to do!
                   3557:                 */
1.99      chs      3558:
1.54      thorpej  3559:                UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
                   3560:                vm_map_unlock(map);
1.94      chs      3561:                return 0;
1.54      thorpej  3562:        }
                   3563:
                   3564:        /*
                   3565:         * wire case: in three passes [XXXCDC: ugly block of code here]
                   3566:         *
                   3567:         * 1: holding the write lock, count all pages mapped by non-wired
                   3568:         *    entries.  if this would cause us to go over our limit, we fail.
                   3569:         *
                   3570:         * 2: still holding the write lock, we create any anonymous maps that
                   3571:         *    need to be created.  then we increment its wiring count.
                   3572:         *
                   3573:         * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
1.56      thorpej  3574:         *    in the pages for any newly wired area (wired_count == 1).
1.54      thorpej  3575:         *
                   3576:         *    downgrading to a read lock for uvm_fault_wire avoids a possible
                   3577:         *    deadlock with another thread that may have faulted on one of
                   3578:         *    the pages to be wired (it would mark the page busy, blocking
                   3579:         *    us, then in turn block on the map lock that we hold).  because
                   3580:         *    of problems in the recursive lock package, we cannot upgrade
                   3581:         *    to a write lock in vm_map_lookup.  thus, any actions that
                   3582:         *    require the write lock must be done beforehand.  because we
                   3583:         *    keep the read lock on the map, the copy-on-write status of the
                   3584:         *    entries we modify here cannot change.
                   3585:         */
                   3586:
                   3587:        for (size = 0, entry = map->header.next; entry != &map->header;
                   3588:             entry = entry->next) {
                   3589:                if (entry->protection != VM_PROT_NONE &&
1.55      thorpej  3590:                    VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
1.54      thorpej  3591:                        size += entry->end - entry->start;
                   3592:                }
                   3593:        }
                   3594:
                   3595:        if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
                   3596:                vm_map_unlock(map);
1.94      chs      3597:                return ENOMEM;
1.54      thorpej  3598:        }
                   3599:
                   3600:        if (limit != 0 &&
                   3601:            (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
                   3602:                vm_map_unlock(map);
1.94      chs      3603:                return ENOMEM;
1.54      thorpej  3604:        }
                   3605:
                   3606:        /*
                   3607:         * Pass 2.
                   3608:         */
                   3609:
                   3610:        for (entry = map->header.next; entry != &map->header;
                   3611:             entry = entry->next) {
                   3612:                if (entry->protection == VM_PROT_NONE)
                   3613:                        continue;
1.55      thorpej  3614:                if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
1.99      chs      3615:
1.54      thorpej  3616:                        /*
                   3617:                         * perform actions of vm_map_lookup that need the
                   3618:                         * write lock on the map: create an anonymous map
                   3619:                         * for a copy-on-write region, or an anonymous map
                   3620:                         * for a zero-fill region.  (XXXCDC: submap case
                   3621:                         * ok?)
                   3622:                         */
1.99      chs      3623:
1.54      thorpej  3624:                        if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
1.98      chs      3625:                                if (UVM_ET_ISNEEDSCOPY(entry) &&
1.117     chs      3626:                                    ((entry->max_protection & VM_PROT_WRITE) ||
1.54      thorpej  3627:                                     (entry->object.uvm_obj == NULL))) {
1.212     yamt     3628:                                        amap_copy(map, entry, 0, entry->start,
                   3629:                                            entry->end);
1.54      thorpej  3630:                                        /* XXXCDC: wait OK? */
                   3631:                                }
                   3632:                        }
1.55      thorpej  3633:                }
1.54      thorpej  3634:                entry->wired_count++;
                   3635:        }
                   3636:
                   3637:        /*
                   3638:         * Pass 3.
                   3639:         */
                   3640:
1.60      thorpej  3641: #ifdef DIAGNOSTIC
                   3642:        timestamp_save = map->timestamp;
                   3643: #endif
                   3644:        vm_map_busy(map);
1.249     yamt     3645:        vm_map_unlock(map);
1.54      thorpej  3646:
1.94      chs      3647:        rv = 0;
1.54      thorpej  3648:        for (entry = map->header.next; entry != &map->header;
                   3649:             entry = entry->next) {
                   3650:                if (entry->wired_count == 1) {
                   3651:                        rv = uvm_fault_wire(map, entry->start, entry->end,
1.216     drochner 3652:                            entry->max_protection, 1);
1.54      thorpej  3653:                        if (rv) {
1.99      chs      3654:
1.54      thorpej  3655:                                /*
                   3656:                                 * wiring failed.  break out of the loop.
                   3657:                                 * we'll clean up the map below, once we
                   3658:                                 * have a write lock again.
                   3659:                                 */
1.99      chs      3660:
1.54      thorpej  3661:                                break;
                   3662:                        }
                   3663:                }
                   3664:        }
                   3665:
1.99      chs      3666:        if (rv) {
                   3667:
1.54      thorpej  3668:                /*
                   3669:                 * Get back an exclusive (write) lock.
                   3670:                 */
1.99      chs      3671:
1.249     yamt     3672:                vm_map_lock(map);
1.60      thorpej  3673:                vm_map_unbusy(map);
                   3674:
                   3675: #ifdef DIAGNOSTIC
1.252     yamt     3676:                if (timestamp_save + 1 != map->timestamp)
1.60      thorpej  3677:                        panic("uvm_map_pageable_all: stale map");
                   3678: #endif
1.54      thorpej  3679:
                   3680:                /*
                   3681:                 * first drop the wiring count on all the entries
                   3682:                 * which haven't actually been wired yet.
1.67      thorpej  3683:                 *
                   3684:                 * Skip VM_PROT_NONE entries like we did above.
1.54      thorpej  3685:                 */
1.99      chs      3686:
1.54      thorpej  3687:                failed_entry = entry;
                   3688:                for (/* nothing */; entry != &map->header;
1.67      thorpej  3689:                     entry = entry->next) {
                   3690:                        if (entry->protection == VM_PROT_NONE)
                   3691:                                continue;
1.54      thorpej  3692:                        entry->wired_count--;
1.67      thorpej  3693:                }
1.54      thorpej  3694:
                   3695:                /*
                   3696:                 * now, unwire all the entries that were successfully
                   3697:                 * wired above.
1.67      thorpej  3698:                 *
                   3699:                 * Skip VM_PROT_NONE entries like we did above.
1.54      thorpej  3700:                 */
1.99      chs      3701:
1.54      thorpej  3702:                for (entry = map->header.next; entry != failed_entry;
                   3703:                     entry = entry->next) {
1.67      thorpej  3704:                        if (entry->protection == VM_PROT_NONE)
                   3705:                                continue;
1.54      thorpej  3706:                        entry->wired_count--;
1.67      thorpej  3707:                        if (VM_MAPENT_ISWIRED(entry))
1.54      thorpej  3708:                                uvm_map_entry_unwire(map, entry);
                   3709:                }
                   3710:                vm_map_unlock(map);
                   3711:                UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0);
                   3712:                return (rv);
                   3713:        }
                   3714:
1.60      thorpej  3715:        vm_map_unbusy(map);
1.54      thorpej  3716:
                   3717:        UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
1.94      chs      3718:        return 0;
1.54      thorpej  3719: }
                   3720:
                   3721: /*
1.61      thorpej  3722:  * uvm_map_clean: clean out a map range
1.1       mrg      3723:  *
                   3724:  * => valid flags:
1.61      thorpej  3725:  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
1.1       mrg      3726:  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
                   3727:  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
                   3728:  *   if (flags & PGO_FREE): any cached pages are freed after clean
                   3729:  * => returns an error if any part of the specified range isn't mapped
1.98      chs      3730:  * => never a need to flush amap layer since the anonymous memory has
1.61      thorpej  3731:  *     no permanent home, but may deactivate pages there
                   3732:  * => called from sys_msync() and sys_madvise()
1.1       mrg      3733:  * => caller must not write-lock map (read OK).
                   3734:  * => we may sleep while cleaning if SYNCIO [with map read-locked]
                   3735:  */
                   3736:
1.10      mrg      3737: int
1.138     enami    3738: uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
1.10      mrg      3739: {
1.99      chs      3740:        struct vm_map_entry *current, *entry;
1.61      thorpej  3741:        struct uvm_object *uobj;
                   3742:        struct vm_amap *amap;
1.298     rmind    3743:        struct vm_anon *anon, *anon_tofree;
1.61      thorpej  3744:        struct vm_page *pg;
                   3745:        vaddr_t offset;
1.24      eeh      3746:        vsize_t size;
1.188     dbj      3747:        voff_t uoff;
1.106     chs      3748:        int error, refs;
1.10      mrg      3749:        UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
1.85      chs      3750:
1.331.2.2  skrll    3751:        UVMHIST_LOG(maphist,"(map=%p,start=%#lx,end=%#lx,flags=%#x)",
1.85      chs      3752:                    map, start, end, flags);
                   3753:        KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
                   3754:                (PGO_FREE|PGO_DEACTIVATE));
1.61      thorpej  3755:
1.10      mrg      3756:        vm_map_lock_read(map);
                   3757:        VM_MAP_RANGE_CHECK(map, start, end);
1.234     thorpej  3758:        if (uvm_map_lookup_entry(map, start, &entry) == false) {
1.10      mrg      3759:                vm_map_unlock_read(map);
1.94      chs      3760:                return EFAULT;
1.10      mrg      3761:        }
                   3762:
                   3763:        /*
1.186     chs      3764:         * Make a first pass to check for holes and wiring problems.
1.10      mrg      3765:         */
1.85      chs      3766:
1.10      mrg      3767:        for (current = entry; current->start < end; current = current->next) {
                   3768:                if (UVM_ET_ISSUBMAP(current)) {
                   3769:                        vm_map_unlock_read(map);
1.94      chs      3770:                        return EINVAL;
1.10      mrg      3771:                }
1.186     chs      3772:                if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) {
                   3773:                        vm_map_unlock_read(map);
                   3774:                        return EBUSY;
                   3775:                }
1.90      chs      3776:                if (end <= current->end) {
                   3777:                        break;
                   3778:                }
                   3779:                if (current->end != current->next->start) {
1.10      mrg      3780:                        vm_map_unlock_read(map);
1.94      chs      3781:                        return EFAULT;
1.10      mrg      3782:                }
                   3783:        }
                   3784:
1.94      chs      3785:        error = 0;
1.90      chs      3786:        for (current = entry; start < end; current = current->next) {
1.283     uebayasi 3787:                amap = current->aref.ar_amap;   /* upper layer */
                   3788:                uobj = current->object.uvm_obj; /* lower layer */
1.85      chs      3789:                KASSERT(start >= current->start);
1.1       mrg      3790:
1.10      mrg      3791:                /*
1.61      thorpej  3792:                 * No amap cleaning necessary if:
                   3793:                 *
                   3794:                 *      (1) There's no amap.
                   3795:                 *
                   3796:                 *      (2) We're not deactivating or freeing pages.
1.10      mrg      3797:                 */
1.85      chs      3798:
1.90      chs      3799:                if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
1.61      thorpej  3800:                        goto flush_object;
                   3801:
                   3802:                offset = start - current->start;
1.90      chs      3803:                size = MIN(end, current->end) - start;
1.303     rmind    3804:                anon_tofree = NULL;
                   3805:
                   3806:                amap_lock(amap);
1.90      chs      3807:                for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
1.61      thorpej  3808:                        anon = amap_lookup(&current->aref, offset);
                   3809:                        if (anon == NULL)
                   3810:                                continue;
                   3811:
1.298     rmind    3812:                        KASSERT(anon->an_lock == amap->am_lock);
1.192     yamt     3813:                        pg = anon->an_page;
1.63      thorpej  3814:                        if (pg == NULL) {
                   3815:                                continue;
                   3816:                        }
1.331.2.1  skrll    3817:                        if (pg->flags & PG_BUSY) {
                   3818:                                continue;
                   3819:                        }
1.63      thorpej  3820:
1.61      thorpej  3821:                        switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
1.85      chs      3822:
1.61      thorpej  3823:                        /*
1.115     chs      3824:                         * In these first 3 cases, we just deactivate the page.
1.61      thorpej  3825:                         */
1.85      chs      3826:
1.61      thorpej  3827:                        case PGO_CLEANIT|PGO_FREE:
                   3828:                        case PGO_CLEANIT|PGO_DEACTIVATE:
                   3829:                        case PGO_DEACTIVATE:
1.68      thorpej  3830:  deactivate_it:
1.61      thorpej  3831:                                /*
1.115     chs      3832:                                 * skip the page if it's loaned or wired,
                   3833:                                 * since it shouldn't be on a paging queue
                   3834:                                 * at all in these cases.
1.61      thorpej  3835:                                 */
1.85      chs      3836:
1.248     ad       3837:                                mutex_enter(&uvm_pageqlock);
1.115     chs      3838:                                if (pg->loan_count != 0 ||
                   3839:                                    pg->wire_count != 0) {
1.248     ad       3840:                                        mutex_exit(&uvm_pageqlock);
1.61      thorpej  3841:                                        continue;
                   3842:                                }
1.85      chs      3843:                                KASSERT(pg->uanon == anon);
1.61      thorpej  3844:                                uvm_pagedeactivate(pg);
1.248     ad       3845:                                mutex_exit(&uvm_pageqlock);
1.61      thorpej  3846:                                continue;
                   3847:
                   3848:                        case PGO_FREE:
1.85      chs      3849:
1.68      thorpej  3850:                                /*
                   3851:                                 * If there are multiple references to
                   3852:                                 * the amap, just deactivate the page.
                   3853:                                 */
1.85      chs      3854:
1.68      thorpej  3855:                                if (amap_refs(amap) > 1)
                   3856:                                        goto deactivate_it;
                   3857:
1.115     chs      3858:                                /* skip the page if it's wired */
1.62      thorpej  3859:                                if (pg->wire_count != 0) {
                   3860:                                        continue;
                   3861:                                }
1.66      thorpej  3862:                                amap_unadd(&current->aref, offset);
1.61      thorpej  3863:                                refs = --anon->an_ref;
1.298     rmind    3864:                                if (refs == 0) {
                   3865:                                        anon->an_link = anon_tofree;
                   3866:                                        anon_tofree = anon;
                   3867:                                }
1.61      thorpej  3868:                                continue;
                   3869:                        }
                   3870:                }
1.303     rmind    3871:                uvm_anon_freelst(amap, anon_tofree);
1.1       mrg      3872:
1.61      thorpej  3873:  flush_object:
1.10      mrg      3874:                /*
1.33      chuck    3875:                 * flush pages if we've got a valid backing object.
1.116     chs      3876:                 * note that we must always clean object pages before
                   3877:                 * freeing them since otherwise we could reveal stale
                   3878:                 * data from files.
1.10      mrg      3879:                 */
1.1       mrg      3880:
1.188     dbj      3881:                uoff = current->offset + (start - current->start);
1.90      chs      3882:                size = MIN(end, current->end) - start;
1.61      thorpej  3883:                if (uobj != NULL) {
1.298     rmind    3884:                        mutex_enter(uobj->vmobjlock);
1.136     thorpej  3885:                        if (uobj->pgops->pgo_put != NULL)
1.188     dbj      3886:                                error = (uobj->pgops->pgo_put)(uobj, uoff,
                   3887:                                    uoff + size, flags | PGO_CLEANIT);
1.136     thorpej  3888:                        else
                   3889:                                error = 0;
1.10      mrg      3890:                }
                   3891:                start += size;
                   3892:        }
1.1       mrg      3893:        vm_map_unlock_read(map);
1.98      chs      3894:        return (error);
1.1       mrg      3895: }
                   3896:
                   3897:
                   3898: /*
                   3899:  * uvm_map_checkprot: check protection in map
                   3900:  *
                   3901:  * => must allow specified protection in a fully allocated region.
                   3902:  * => map must be read or write locked by caller.
                   3903:  */
                   3904:
1.233     thorpej  3905: bool
1.138     enami    3906: uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
                   3907:     vm_prot_t protection)
1.10      mrg      3908: {
1.99      chs      3909:        struct vm_map_entry *entry;
                   3910:        struct vm_map_entry *tmp_entry;
1.10      mrg      3911:
1.94      chs      3912:        if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
1.234     thorpej  3913:                return (false);
1.94      chs      3914:        }
                   3915:        entry = tmp_entry;
                   3916:        while (start < end) {
                   3917:                if (entry == &map->header) {
1.234     thorpej  3918:                        return (false);
1.94      chs      3919:                }
1.85      chs      3920:
1.10      mrg      3921:                /*
                   3922:                 * no holes allowed
                   3923:                 */
                   3924:
1.94      chs      3925:                if (start < entry->start) {
1.234     thorpej  3926:                        return (false);
1.94      chs      3927:                }
1.10      mrg      3928:
                   3929:                /*
                   3930:                 * check protection associated with entry
                   3931:                 */
1.1       mrg      3932:
1.94      chs      3933:                if ((entry->protection & protection) != protection) {
1.234     thorpej  3934:                        return (false);
1.94      chs      3935:                }
                   3936:                start = entry->end;
                   3937:                entry = entry->next;
                   3938:        }
1.234     thorpej  3939:        return (true);
1.1       mrg      3940: }
                   3941:
                   3942: /*
                   3943:  * uvmspace_alloc: allocate a vmspace structure.
                   3944:  *
                   3945:  * - structure includes vm_map and pmap
                   3946:  * - XXX: no locking on this structure
                   3947:  * - refcnt set to 1, rest must be init'd by caller
                   3948:  */
1.10      mrg      3949: struct vmspace *
1.327     martin   3950: uvmspace_alloc(vaddr_t vmin, vaddr_t vmax, bool topdown)
1.10      mrg      3951: {
                   3952:        struct vmspace *vm;
                   3953:        UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
                   3954:
1.248     ad       3955:        vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK);
1.327     martin   3956:        uvmspace_init(vm, NULL, vmin, vmax, topdown);
1.331.2.2  skrll    3957:        UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0);
1.15      thorpej  3958:        return (vm);
                   3959: }
                   3960:
                   3961: /*
                   3962:  * uvmspace_init: initialize a vmspace structure.
                   3963:  *
                   3964:  * - XXX: no locking on this structure
1.132     matt     3965:  * - refcnt set to 1, rest must be init'd by caller
1.15      thorpej  3966:  */
                   3967: void
1.327     martin   3968: uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin,
                   3969:     vaddr_t vmax, bool topdown)
1.15      thorpej  3970: {
                   3971:        UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
                   3972:
1.331.2.2  skrll    3973:        UVMHIST_LOG(maphist, "(vm=%p, pmap=%p, vmin=%#lx, vmax=%#lx",
                   3974:            vm, pmap, vmin, vmax);
                   3975:        UVMHIST_LOG(maphist, "   topdown=%u)", topdown, 0, 0, 0);
                   3976:
1.23      perry    3977:        memset(vm, 0, sizeof(*vm));
1.199     christos 3978:        uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE
1.327     martin   3979:            | (topdown ? VM_MAP_TOPDOWN : 0)
1.131     atatat   3980:            );
1.15      thorpej  3981:        if (pmap)
                   3982:                pmap_reference(pmap);
                   3983:        else
                   3984:                pmap = pmap_create();
                   3985:        vm->vm_map.pmap = pmap;
1.10      mrg      3986:        vm->vm_refcnt = 1;
1.15      thorpej  3987:        UVMHIST_LOG(maphist,"<- done",0,0,0,0);
1.1       mrg      3988: }
                   3989:
                   3990: /*
1.168     junyoung 3991:  * uvmspace_share: share a vmspace between two processes
1.1       mrg      3992:  *
                   3993:  * - used for vfork, threads(?)
                   3994:  */
                   3995:
1.10      mrg      3996: void
1.138     enami    3997: uvmspace_share(struct proc *p1, struct proc *p2)
1.1       mrg      3998: {
1.139     enami    3999:
1.215     yamt     4000:        uvmspace_addref(p1->p_vmspace);
1.10      mrg      4001:        p2->p_vmspace = p1->p_vmspace;
1.1       mrg      4002: }
                   4003:
1.282     rmind    4004: #if 0
                   4005:
1.1       mrg      4006: /*
                   4007:  * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
                   4008:  *
                   4009:  * - XXX: no locking on vmspace
                   4010:  */
                   4011:
1.10      mrg      4012: void
1.138     enami    4013: uvmspace_unshare(struct lwp *l)
1.10      mrg      4014: {
1.128     thorpej  4015:        struct proc *p = l->l_proc;
1.10      mrg      4016:        struct vmspace *nvm, *ovm = p->p_vmspace;
1.85      chs      4017:
1.10      mrg      4018:        if (ovm->vm_refcnt == 1)
                   4019:                /* nothing to do: vmspace isn't shared in the first place */
                   4020:                return;
1.85      chs      4021:
1.10      mrg      4022:        /* make a new vmspace, still holding old one */
                   4023:        nvm = uvmspace_fork(ovm);
                   4024:
1.254     ad       4025:        kpreempt_disable();
1.128     thorpej  4026:        pmap_deactivate(l);             /* unbind old vmspace */
1.98      chs      4027:        p->p_vmspace = nvm;
1.128     thorpej  4028:        pmap_activate(l);               /* switch to new vmspace */
1.254     ad       4029:        kpreempt_enable();
1.13      thorpej  4030:
1.10      mrg      4031:        uvmspace_free(ovm);             /* drop reference to old vmspace */
1.1       mrg      4032: }
                   4033:
1.282     rmind    4034: #endif
                   4035:
1.317     martin   4036:
                   4037: /*
                   4038:  * uvmspace_spawn: a new process has been spawned and needs a vmspace
                   4039:  */
                   4040:
                   4041: void
1.327     martin   4042: uvmspace_spawn(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown)
1.317     martin   4043: {
                   4044:        struct proc *p = l->l_proc;
                   4045:        struct vmspace *nvm;
                   4046:
                   4047: #ifdef __HAVE_CPU_VMSPACE_EXEC
                   4048:        cpu_vmspace_exec(l, start, end);
                   4049: #endif
                   4050:
1.327     martin   4051:        nvm = uvmspace_alloc(start, end, topdown);
1.317     martin   4052:        kpreempt_disable();
                   4053:        p->p_vmspace = nvm;
                   4054:        pmap_activate(l);
                   4055:        kpreempt_enable();
                   4056: }
                   4057:
1.1       mrg      4058: /*
                   4059:  * uvmspace_exec: the process wants to exec a new program
                   4060:  */
                   4061:
1.10      mrg      4062: void
1.327     martin   4063: uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown)
1.1       mrg      4064: {
1.128     thorpej  4065:        struct proc *p = l->l_proc;
1.10      mrg      4066:        struct vmspace *nvm, *ovm = p->p_vmspace;
1.302     martin   4067:        struct vm_map *map;
1.1       mrg      4068:
1.317     martin   4069:        KASSERT(ovm != NULL);
1.294     matt     4070: #ifdef __HAVE_CPU_VMSPACE_EXEC
                   4071:        cpu_vmspace_exec(l, start, end);
                   4072: #endif
1.1       mrg      4073:
1.302     martin   4074:        map = &ovm->vm_map;
                   4075:        /*
1.10      mrg      4076:         * see if more than one process is using this vmspace...
                   4077:         */
1.1       mrg      4078:
1.327     martin   4079:        if (ovm->vm_refcnt == 1
                   4080:            && topdown == ((ovm->vm_map.flags & VM_MAP_TOPDOWN) != 0)) {
1.1       mrg      4081:
1.10      mrg      4082:                /*
                   4083:                 * if p is the only process using its vmspace then we can safely
                   4084:                 * recycle that vmspace for the program that is being exec'd.
1.327     martin   4085:                 * But only if TOPDOWN matches the requested value for the new
                   4086:                 * vm space!
1.10      mrg      4087:                 */
1.1       mrg      4088:
1.10      mrg      4089:                /*
                   4090:                 * SYSV SHM semantics require us to kill all segments on an exec
                   4091:                 */
1.331.2.3  skrll    4092:                if (uvm_shmexit && ovm->vm_shm)
                   4093:                        (*uvm_shmexit)(ovm);
1.54      thorpej  4094:
                   4095:                /*
                   4096:                 * POSIX 1003.1b -- "lock future mappings" is revoked
                   4097:                 * when a process execs another program image.
                   4098:                 */
1.99      chs      4099:
1.238     ad       4100:                map->flags &= ~VM_MAP_WIREFUTURE;
1.10      mrg      4101:
                   4102:                /*
                   4103:                 * now unmap the old program
                   4104:                 */
1.99      chs      4105:
1.120     chs      4106:                pmap_remove_all(map->pmap);
1.184     chs      4107:                uvm_unmap(map, vm_map_min(map), vm_map_max(map));
1.144     yamt     4108:                KASSERT(map->header.prev == &map->header);
                   4109:                KASSERT(map->nentries == 0);
1.93      eeh      4110:
                   4111:                /*
                   4112:                 * resize the map
                   4113:                 */
1.99      chs      4114:
1.184     chs      4115:                vm_map_setmin(map, start);
                   4116:                vm_map_setmax(map, end);
1.10      mrg      4117:        } else {
                   4118:
                   4119:                /*
                   4120:                 * p's vmspace is being shared, so we can't reuse it for p since
                   4121:                 * it is still being used for others.   allocate a new vmspace
                   4122:                 * for p
                   4123:                 */
1.99      chs      4124:
1.327     martin   4125:                nvm = uvmspace_alloc(start, end, topdown);
1.1       mrg      4126:
1.10      mrg      4127:                /*
                   4128:                 * install new vmspace and drop our ref to the old one.
                   4129:                 */
                   4130:
1.254     ad       4131:                kpreempt_disable();
1.128     thorpej  4132:                pmap_deactivate(l);
1.10      mrg      4133:                p->p_vmspace = nvm;
1.128     thorpej  4134:                pmap_activate(l);
1.254     ad       4135:                kpreempt_enable();
1.13      thorpej  4136:
1.10      mrg      4137:                uvmspace_free(ovm);
                   4138:        }
1.1       mrg      4139: }
                   4140:
                   4141: /*
1.215     yamt     4142:  * uvmspace_addref: add a referece to a vmspace.
                   4143:  */
                   4144:
                   4145: void
                   4146: uvmspace_addref(struct vmspace *vm)
                   4147: {
                   4148:        struct vm_map *map = &vm->vm_map;
                   4149:
                   4150:        KASSERT((map->flags & VM_MAP_DYING) == 0);
                   4151:
1.238     ad       4152:        mutex_enter(&map->misc_lock);
1.215     yamt     4153:        KASSERT(vm->vm_refcnt > 0);
                   4154:        vm->vm_refcnt++;
1.238     ad       4155:        mutex_exit(&map->misc_lock);
1.215     yamt     4156: }
                   4157:
                   4158: /*
1.1       mrg      4159:  * uvmspace_free: free a vmspace data structure
                   4160:  */
                   4161:
1.10      mrg      4162: void
1.138     enami    4163: uvmspace_free(struct vmspace *vm)
1.1       mrg      4164: {
1.99      chs      4165:        struct vm_map_entry *dead_entries;
1.171     pk       4166:        struct vm_map *map = &vm->vm_map;
1.172     he       4167:        int n;
                   4168:
1.10      mrg      4169:        UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
1.1       mrg      4170:
1.331.2.2  skrll    4171:        UVMHIST_LOG(maphist,"(vm=%p) ref=%d", vm, vm->vm_refcnt,0,0);
1.238     ad       4172:        mutex_enter(&map->misc_lock);
1.172     he       4173:        n = --vm->vm_refcnt;
1.238     ad       4174:        mutex_exit(&map->misc_lock);
1.171     pk       4175:        if (n > 0)
1.120     chs      4176:                return;
1.99      chs      4177:
1.120     chs      4178:        /*
                   4179:         * at this point, there should be no other references to the map.
                   4180:         * delete all of the mappings, then destroy the pmap.
                   4181:         */
1.99      chs      4182:
1.120     chs      4183:        map->flags |= VM_MAP_DYING;
                   4184:        pmap_remove_all(map->pmap);
1.331.2.3  skrll    4185:
1.120     chs      4186:        /* Get rid of any SYSV shared memory segments. */
1.331.2.3  skrll    4187:        if (uvm_shmexit && vm->vm_shm != NULL)
                   4188:                (*uvm_shmexit)(vm);
1.314     rmind    4189:
1.120     chs      4190:        if (map->nentries) {
1.184     chs      4191:                uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map),
1.311     para     4192:                    &dead_entries, 0);
1.120     chs      4193:                if (dead_entries != NULL)
                   4194:                        uvm_unmap_detach(dead_entries, 0);
1.10      mrg      4195:        }
1.146     yamt     4196:        KASSERT(map->nentries == 0);
                   4197:        KASSERT(map->size == 0);
1.314     rmind    4198:
1.239     ad       4199:        mutex_destroy(&map->misc_lock);
                   4200:        rw_destroy(&map->lock);
1.255     ad       4201:        cv_destroy(&map->cv);
1.120     chs      4202:        pmap_destroy(map->pmap);
1.248     ad       4203:        pool_cache_put(&uvm_vmspace_cache, vm);
1.1       mrg      4204: }
                   4205:
1.329     christos 4206: static struct vm_map_entry *
                   4207: uvm_mapent_clone(struct vm_map *new_map, struct vm_map_entry *old_entry,
                   4208:     int flags)
                   4209: {
                   4210:        struct vm_map_entry *new_entry;
                   4211:
                   4212:        new_entry = uvm_mapent_alloc(new_map, 0);
                   4213:        /* old_entry -> new_entry */
                   4214:        uvm_mapent_copy(old_entry, new_entry);
                   4215:
                   4216:        /* new pmap has nothing wired in it */
                   4217:        new_entry->wired_count = 0;
                   4218:
                   4219:        /*
                   4220:         * gain reference to object backing the map (can't
                   4221:         * be a submap, already checked this case).
                   4222:         */
                   4223:
                   4224:        if (new_entry->aref.ar_amap)
                   4225:                uvm_map_reference_amap(new_entry, flags);
                   4226:
                   4227:        if (new_entry->object.uvm_obj &&
                   4228:            new_entry->object.uvm_obj->pgops->pgo_reference)
                   4229:                new_entry->object.uvm_obj->pgops->pgo_reference(
                   4230:                        new_entry->object.uvm_obj);
                   4231:
                   4232:        /* insert entry at end of new_map's entry list */
                   4233:        uvm_map_entry_link(new_map, new_map->header.prev,
                   4234:            new_entry);
                   4235:
                   4236:        return new_entry;
                   4237: }
                   4238:
                   4239: /*
                   4240:  * share the mapping: this means we want the old and
                   4241:  * new entries to share amaps and backing objects.
                   4242:  */
                   4243: static void
                   4244: uvm_mapent_forkshared(struct vm_map *new_map, struct vm_map *old_map,
                   4245:     struct vm_map_entry *old_entry)
                   4246: {
                   4247:        /*
                   4248:         * if the old_entry needs a new amap (due to prev fork)
                   4249:         * then we need to allocate it now so that we have
                   4250:         * something we own to share with the new_entry.   [in
                   4251:         * other words, we need to clear needs_copy]
                   4252:         */
                   4253:
                   4254:        if (UVM_ET_ISNEEDSCOPY(old_entry)) {
                   4255:                /* get our own amap, clears needs_copy */
                   4256:                amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK,
                   4257:                    0, 0);
                   4258:                /* XXXCDC: WAITOK??? */
                   4259:        }
                   4260:
                   4261:        uvm_mapent_clone(new_map, old_entry, AMAP_SHARED);
                   4262: }
                   4263:
                   4264:
                   4265: static void
                   4266: uvm_mapent_forkcopy(struct vm_map *new_map, struct vm_map *old_map,
                   4267:     struct vm_map_entry *old_entry)
                   4268: {
                   4269:        struct vm_map_entry *new_entry;
                   4270:
                   4271:        /*
                   4272:         * copy-on-write the mapping (using mmap's
                   4273:         * MAP_PRIVATE semantics)
                   4274:         *
                   4275:         * allocate new_entry, adjust reference counts.
                   4276:         * (note that new references are read-only).
                   4277:         */
                   4278:
                   4279:        new_entry = uvm_mapent_clone(new_map, old_entry, 0);
                   4280:
                   4281:        new_entry->etype |=
                   4282:            (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
                   4283:
                   4284:        /*
                   4285:         * the new entry will need an amap.  it will either
                   4286:         * need to be copied from the old entry or created
                   4287:         * from scratch (if the old entry does not have an
                   4288:         * amap).  can we defer this process until later
                   4289:         * (by setting "needs_copy") or do we need to copy
                   4290:         * the amap now?
                   4291:         *
                   4292:         * we must copy the amap now if any of the following
                   4293:         * conditions hold:
                   4294:         * 1. the old entry has an amap and that amap is
                   4295:         *    being shared.  this means that the old (parent)
                   4296:         *    process is sharing the amap with another
                   4297:         *    process.  if we do not clear needs_copy here
                   4298:         *    we will end up in a situation where both the
                   4299:         *    parent and child process are refering to the
                   4300:         *    same amap with "needs_copy" set.  if the
                   4301:         *    parent write-faults, the fault routine will
                   4302:         *    clear "needs_copy" in the parent by allocating
                   4303:         *    a new amap.   this is wrong because the
                   4304:         *    parent is supposed to be sharing the old amap
                   4305:         *    and the new amap will break that.
                   4306:         *
                   4307:         * 2. if the old entry has an amap and a non-zero
                   4308:         *    wire count then we are going to have to call
                   4309:         *    amap_cow_now to avoid page faults in the
                   4310:         *    parent process.   since amap_cow_now requires
                   4311:         *    "needs_copy" to be clear we might as well
                   4312:         *    clear it here as well.
                   4313:         *
                   4314:         */
                   4315:
                   4316:        if (old_entry->aref.ar_amap != NULL) {
                   4317:                if ((amap_flags(old_entry->aref.ar_amap) & AMAP_SHARED) != 0 ||
                   4318:                    VM_MAPENT_ISWIRED(old_entry)) {
                   4319:
                   4320:                        amap_copy(new_map, new_entry,
                   4321:                            AMAP_COPY_NOCHUNK, 0, 0);
                   4322:                        /* XXXCDC: M_WAITOK ... ok? */
                   4323:                }
                   4324:        }
                   4325:
                   4326:        /*
                   4327:         * if the parent's entry is wired down, then the
                   4328:         * parent process does not want page faults on
                   4329:         * access to that memory.  this means that we
                   4330:         * cannot do copy-on-write because we can't write
                   4331:         * protect the old entry.   in this case we
                   4332:         * resolve all copy-on-write faults now, using
                   4333:         * amap_cow_now.   note that we have already
                   4334:         * allocated any needed amap (above).
                   4335:         */
                   4336:
                   4337:        if (VM_MAPENT_ISWIRED(old_entry)) {
                   4338:
                   4339:                /*
                   4340:                 * resolve all copy-on-write faults now
                   4341:                 * (note that there is nothing to do if
                   4342:                 * the old mapping does not have an amap).
                   4343:                 */
                   4344:                if (old_entry->aref.ar_amap)
                   4345:                        amap_cow_now(new_map, new_entry);
                   4346:
                   4347:        } else {
                   4348:                /*
                   4349:                 * setup mappings to trigger copy-on-write faults
                   4350:                 * we must write-protect the parent if it has
                   4351:                 * an amap and it is not already "needs_copy"...
                   4352:                 * if it is already "needs_copy" then the parent
                   4353:                 * has already been write-protected by a previous
                   4354:                 * fork operation.
                   4355:                 */
                   4356:                if (old_entry->aref.ar_amap &&
                   4357:                    !UVM_ET_ISNEEDSCOPY(old_entry)) {
                   4358:                        if (old_entry->max_protection & VM_PROT_WRITE) {
                   4359:                                pmap_protect(old_map->pmap,
                   4360:                                    old_entry->start, old_entry->end,
                   4361:                                    old_entry->protection & ~VM_PROT_WRITE);
                   4362:                        }
                   4363:                        old_entry->etype |= UVM_ET_NEEDSCOPY;
                   4364:                }
                   4365:        }
                   4366: }
                   4367:
1.1       mrg      4368: /*
1.330     christos 4369:  * zero the mapping: the new entry will be zero initialized
                   4370:  */
                   4371: static void
                   4372: uvm_mapent_forkzero(struct vm_map *new_map, struct vm_map *old_map,
                   4373:     struct vm_map_entry *old_entry)
                   4374: {
                   4375:        struct vm_map_entry *new_entry;
                   4376:
                   4377:        new_entry = uvm_mapent_clone(new_map, old_entry, 0);
                   4378:
                   4379:        new_entry->etype |=
                   4380:            (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
                   4381:
                   4382:        if (new_entry->aref.ar_amap) {
                   4383:                uvm_map_unreference_amap(new_entry, 0);
                   4384:                new_entry->aref.ar_pageoff = 0;
                   4385:                new_entry->aref.ar_amap = NULL;
                   4386:        }
                   4387:
                   4388:        if (UVM_ET_ISOBJ(new_entry)) {
                   4389:                if (new_entry->object.uvm_obj->pgops->pgo_detach)
                   4390:                        new_entry->object.uvm_obj->pgops->pgo_detach(
                   4391:                            new_entry->object.uvm_obj);
                   4392:                new_entry->object.uvm_obj = NULL;
                   4393:                new_entry->etype &= ~UVM_ET_OBJ;
                   4394:        }
                   4395: }
                   4396:
                   4397: /*
1.1       mrg      4398:  *   F O R K   -   m a i n   e n t r y   p o i n t
                   4399:  */
                   4400: /*
                   4401:  * uvmspace_fork: fork a process' main map
                   4402:  *
                   4403:  * => create a new vmspace for child process from parent.
                   4404:  * => parent's map must not be locked.
                   4405:  */
                   4406:
1.10      mrg      4407: struct vmspace *
1.138     enami    4408: uvmspace_fork(struct vmspace *vm1)
1.10      mrg      4409: {
                   4410:        struct vmspace *vm2;
1.99      chs      4411:        struct vm_map *old_map = &vm1->vm_map;
                   4412:        struct vm_map *new_map;
                   4413:        struct vm_map_entry *old_entry;
1.10      mrg      4414:        UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
1.1       mrg      4415:
1.10      mrg      4416:        vm_map_lock(old_map);
1.1       mrg      4417:
1.327     martin   4418:        vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map),
                   4419:            vm1->vm_map.flags & VM_MAP_TOPDOWN);
1.23      perry    4420:        memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
1.235     christos 4421:            (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy);
1.10      mrg      4422:        new_map = &vm2->vm_map;           /* XXX */
                   4423:
                   4424:        old_entry = old_map->header.next;
1.162     pooka    4425:        new_map->size = old_map->size;
1.10      mrg      4426:
                   4427:        /*
                   4428:         * go entry-by-entry
                   4429:         */
1.1       mrg      4430:
1.10      mrg      4431:        while (old_entry != &old_map->header) {
1.1       mrg      4432:
1.10      mrg      4433:                /*
                   4434:                 * first, some sanity checks on the old entry
                   4435:                 */
1.99      chs      4436:
1.94      chs      4437:                KASSERT(!UVM_ET_ISSUBMAP(old_entry));
                   4438:                KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) ||
                   4439:                        !UVM_ET_ISNEEDSCOPY(old_entry));
1.1       mrg      4440:
1.10      mrg      4441:                switch (old_entry->inheritance) {
1.80      wiz      4442:                case MAP_INHERIT_NONE:
1.10      mrg      4443:                        /*
1.162     pooka    4444:                         * drop the mapping, modify size
1.10      mrg      4445:                         */
1.162     pooka    4446:                        new_map->size -= old_entry->end - old_entry->start;
1.10      mrg      4447:                        break;
                   4448:
1.80      wiz      4449:                case MAP_INHERIT_SHARE:
1.329     christos 4450:                        uvm_mapent_forkshared(new_map, old_map, old_entry);
1.10      mrg      4451:                        break;
                   4452:
1.80      wiz      4453:                case MAP_INHERIT_COPY:
1.329     christos 4454:                        uvm_mapent_forkcopy(new_map, old_map, old_entry);
                   4455:                        break;
1.10      mrg      4456:
1.330     christos 4457:                case MAP_INHERIT_ZERO:
                   4458:                        uvm_mapent_forkzero(new_map, old_map, old_entry);
                   4459:                        break;
1.329     christos 4460:                default:
                   4461:                        KASSERT(0);
1.10      mrg      4462:                        break;
1.329     christos 4463:                }
1.10      mrg      4464:                old_entry = old_entry->next;
1.1       mrg      4465:        }
                   4466:
1.268     ad       4467:        pmap_update(old_map->pmap);
1.98      chs      4468:        vm_map_unlock(old_map);
1.1       mrg      4469:
1.331.2.3  skrll    4470:        if (uvm_shmfork && vm1->vm_shm)
                   4471:                (*uvm_shmfork)(vm1, vm2);
1.39      thorpej  4472:
                   4473: #ifdef PMAP_FORK
                   4474:        pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
1.1       mrg      4475: #endif
                   4476:
1.10      mrg      4477:        UVMHIST_LOG(maphist,"<- done",0,0,0,0);
1.139     enami    4478:        return (vm2);
1.1       mrg      4479: }
                   4480:
                   4481:
1.174     yamt     4482: /*
1.194     yamt     4483:  * uvm_mapent_trymerge: try to merge an entry with its neighbors.
                   4484:  *
                   4485:  * => called with map locked.
                   4486:  * => return non zero if successfully merged.
                   4487:  */
                   4488:
                   4489: int
                   4490: uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags)
                   4491: {
                   4492:        struct uvm_object *uobj;
                   4493:        struct vm_map_entry *next;
                   4494:        struct vm_map_entry *prev;
1.195     yamt     4495:        vsize_t size;
1.194     yamt     4496:        int merged = 0;
1.233     thorpej  4497:        bool copying;
1.194     yamt     4498:        int newetype;
                   4499:
                   4500:        if (entry->aref.ar_amap != NULL) {
                   4501:                return 0;
                   4502:        }
                   4503:        if ((entry->flags & UVM_MAP_NOMERGE) != 0) {
                   4504:                return 0;
                   4505:        }
                   4506:
                   4507:        uobj = entry->object.uvm_obj;
1.195     yamt     4508:        size = entry->end - entry->start;
1.194     yamt     4509:        copying = (flags & UVM_MERGE_COPYING) != 0;
                   4510:        newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype;
                   4511:
                   4512:        next = entry->next;
                   4513:        if (next != &map->header &&
                   4514:            next->start == entry->end &&
                   4515:            ((copying && next->aref.ar_amap != NULL &&
                   4516:            amap_refs(next->aref.ar_amap) == 1) ||
                   4517:            (!copying && next->aref.ar_amap == NULL)) &&
                   4518:            UVM_ET_ISCOMPATIBLE(next, newetype,
                   4519:            uobj, entry->flags, entry->protection,
                   4520:            entry->max_protection, entry->inheritance, entry->advice,
1.195     yamt     4521:            entry->wired_count) &&
                   4522:            (uobj == NULL || entry->offset + size == next->offset)) {
1.194     yamt     4523:                int error;
                   4524:
                   4525:                if (copying) {
1.195     yamt     4526:                        error = amap_extend(next, size,
1.194     yamt     4527:                            AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS);
                   4528:                } else {
                   4529:                        error = 0;
                   4530:                }
                   4531:                if (error == 0) {
1.197     yamt     4532:                        if (uobj) {
                   4533:                                if (uobj->pgops->pgo_detach) {
                   4534:                                        uobj->pgops->pgo_detach(uobj);
                   4535:                                }
1.194     yamt     4536:                        }
                   4537:
                   4538:                        entry->end = next->end;
1.221     yamt     4539:                        clear_hints(map, next);
1.194     yamt     4540:                        uvm_map_entry_unlink(map, next);
                   4541:                        if (copying) {
                   4542:                                entry->aref = next->aref;
                   4543:                                entry->etype &= ~UVM_ET_NEEDSCOPY;
                   4544:                        }
1.222     yamt     4545:                        uvm_map_check(map, "trymerge forwardmerge");
1.311     para     4546:                        uvm_mapent_free(next);
1.194     yamt     4547:                        merged++;
                   4548:                }
                   4549:        }
                   4550:
                   4551:        prev = entry->prev;
                   4552:        if (prev != &map->header &&
                   4553:            prev->end == entry->start &&
                   4554:            ((copying && !merged && prev->aref.ar_amap != NULL &&
                   4555:            amap_refs(prev->aref.ar_amap) == 1) ||
                   4556:            (!copying && prev->aref.ar_amap == NULL)) &&
                   4557:            UVM_ET_ISCOMPATIBLE(prev, newetype,
                   4558:            uobj, entry->flags, entry->protection,
                   4559:            entry->max_protection, entry->inheritance, entry->advice,
1.195     yamt     4560:            entry->wired_count) &&
1.196     yamt     4561:            (uobj == NULL ||
                   4562:            prev->offset + prev->end - prev->start == entry->offset)) {
1.194     yamt     4563:                int error;
                   4564:
                   4565:                if (copying) {
1.195     yamt     4566:                        error = amap_extend(prev, size,
1.194     yamt     4567:                            AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS);
                   4568:                } else {
                   4569:                        error = 0;
                   4570:                }
                   4571:                if (error == 0) {
1.197     yamt     4572:                        if (uobj) {
                   4573:                                if (uobj->pgops->pgo_detach) {
                   4574:                                        uobj->pgops->pgo_detach(uobj);
                   4575:                                }
                   4576:                                entry->offset = prev->offset;
1.194     yamt     4577:                        }
                   4578:
                   4579:                        entry->start = prev->start;
1.221     yamt     4580:                        clear_hints(map, prev);
1.194     yamt     4581:                        uvm_map_entry_unlink(map, prev);
                   4582:                        if (copying) {
                   4583:                                entry->aref = prev->aref;
                   4584:                                entry->etype &= ~UVM_ET_NEEDSCOPY;
                   4585:                        }
1.222     yamt     4586:                        uvm_map_check(map, "trymerge backmerge");
1.311     para     4587:                        uvm_mapent_free(prev);
1.194     yamt     4588:                        merged++;
                   4589:                }
                   4590:        }
                   4591:
                   4592:        return merged;
                   4593: }
                   4594:
1.211     yamt     4595: /*
                   4596:  * uvm_map_setup: init map
                   4597:  *
                   4598:  * => map must not be in service yet.
                   4599:  */
                   4600:
                   4601: void
                   4602: uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags)
                   4603: {
                   4604:
1.263     matt     4605:        rb_tree_init(&map->rb_tree, &uvm_map_tree_ops);
1.211     yamt     4606:        map->header.next = map->header.prev = &map->header;
                   4607:        map->nentries = 0;
                   4608:        map->size = 0;
                   4609:        map->ref_count = 1;
                   4610:        vm_map_setmin(map, vmin);
                   4611:        vm_map_setmax(map, vmax);
                   4612:        map->flags = flags;
                   4613:        map->first_free = &map->header;
                   4614:        map->hint = &map->header;
                   4615:        map->timestamp = 0;
1.238     ad       4616:        map->busy = NULL;
                   4617:
1.240     ad       4618:        rw_init(&map->lock);
1.238     ad       4619:        cv_init(&map->cv, "vm_map");
1.314     rmind    4620:        mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE);
1.211     yamt     4621: }
                   4622:
                   4623: /*
                   4624:  *   U N M A P   -   m a i n   e n t r y   p o i n t
                   4625:  */
                   4626:
                   4627: /*
                   4628:  * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop")
                   4629:  *
                   4630:  * => caller must check alignment and size
                   4631:  * => map must be unlocked (we will lock it)
                   4632:  * => flags is UVM_FLAG_QUANTUM or 0.
                   4633:  */
                   4634:
                   4635: void
                   4636: uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
                   4637: {
                   4638:        struct vm_map_entry *dead_entries;
                   4639:        UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist);
                   4640:
1.331.2.2  skrll    4641:        UVMHIST_LOG(maphist, "  (map=%p, start=%#lx, end=%#lx)",
1.211     yamt     4642:            map, start, end, 0);
1.246     xtraeme  4643:        if (map == kernel_map) {
1.244     yamt     4644:                LOCKDEBUG_MEM_CHECK((void *)start, end - start);
1.246     xtraeme  4645:        }
1.211     yamt     4646:        /*
                   4647:         * work now done by helper functions.   wipe the pmap's and then
                   4648:         * detach from the dead entries...
                   4649:         */
                   4650:        vm_map_lock(map);
1.311     para     4651:        uvm_unmap_remove(map, start, end, &dead_entries, flags);
1.211     yamt     4652:        vm_map_unlock(map);
                   4653:
                   4654:        if (dead_entries != NULL)
                   4655:                uvm_unmap_detach(dead_entries, 0);
                   4656:
                   4657:        UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
                   4658: }
                   4659:
                   4660:
                   4661: /*
                   4662:  * uvm_map_reference: add reference to a map
                   4663:  *
1.238     ad       4664:  * => map need not be locked (we use misc_lock).
1.211     yamt     4665:  */
                   4666:
                   4667: void
                   4668: uvm_map_reference(struct vm_map *map)
                   4669: {
1.238     ad       4670:        mutex_enter(&map->misc_lock);
1.211     yamt     4671:        map->ref_count++;
1.238     ad       4672:        mutex_exit(&map->misc_lock);
1.211     yamt     4673: }
                   4674:
1.233     thorpej  4675: bool
1.226     yamt     4676: vm_map_starved_p(struct vm_map *map)
                   4677: {
                   4678:
                   4679:        if ((map->flags & VM_MAP_WANTVA) != 0) {
1.234     thorpej  4680:                return true;
1.226     yamt     4681:        }
                   4682:        /* XXX */
                   4683:        if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) {
1.234     thorpej  4684:                return true;
1.226     yamt     4685:        }
1.234     thorpej  4686:        return false;
1.226     yamt     4687: }
1.247     yamt     4688:
1.298     rmind    4689: void
                   4690: uvm_map_lock_entry(struct vm_map_entry *entry)
                   4691: {
                   4692:
1.299     rmind    4693:        if (entry->aref.ar_amap != NULL) {
                   4694:                amap_lock(entry->aref.ar_amap);
                   4695:        }
1.298     rmind    4696:        if (UVM_ET_ISOBJ(entry)) {
                   4697:                mutex_enter(entry->object.uvm_obj->vmobjlock);
                   4698:        }
                   4699: }
                   4700:
                   4701: void
                   4702: uvm_map_unlock_entry(struct vm_map_entry *entry)
                   4703: {
                   4704:
1.299     rmind    4705:        if (UVM_ET_ISOBJ(entry)) {
                   4706:                mutex_exit(entry->object.uvm_obj->vmobjlock);
                   4707:        }
1.298     rmind    4708:        if (entry->aref.ar_amap != NULL) {
                   4709:                amap_unlock(entry->aref.ar_amap);
                   4710:        }
                   4711: }
                   4712:
1.270     pooka    4713: #if defined(DDB) || defined(DEBUGPRINT)
1.280     thorpej  4714:
                   4715: /*
                   4716:  * uvm_map_printit: actually prints the map
                   4717:  */
                   4718:
                   4719: void
                   4720: uvm_map_printit(struct vm_map *map, bool full,
                   4721:     void (*pr)(const char *, ...))
                   4722: {
                   4723:        struct vm_map_entry *entry;
                   4724:
1.331.2.2  skrll    4725:        (*pr)("MAP %p: [%#lx->%#lx]\n", map, vm_map_min(map),
1.280     thorpej  4726:            vm_map_max(map));
1.331.2.2  skrll    4727:        (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=%#x\n",
1.280     thorpej  4728:            map->nentries, map->size, map->ref_count, map->timestamp,
                   4729:            map->flags);
                   4730:        (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap,
                   4731:            pmap_resident_count(map->pmap), pmap_wired_count(map->pmap));
                   4732:        if (!full)
                   4733:                return;
                   4734:        for (entry = map->header.next; entry != &map->header;
                   4735:            entry = entry->next) {
1.331.2.2  skrll    4736:                (*pr)(" - %p: %#lx->%#lx: obj=%p/%#llx, amap=%p/%d\n",
1.280     thorpej  4737:                    entry, entry->start, entry->end, entry->object.uvm_obj,
                   4738:                    (long long)entry->offset, entry->aref.ar_amap,
                   4739:                    entry->aref.ar_pageoff);
                   4740:                (*pr)(
                   4741:                    "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
                   4742:                    "wc=%d, adv=%d\n",
                   4743:                    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
                   4744:                    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
                   4745:                    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
                   4746:                    entry->protection, entry->max_protection,
                   4747:                    entry->inheritance, entry->wired_count, entry->advice);
                   4748:        }
                   4749: }
                   4750:
1.247     yamt     4751: void
                   4752: uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...))
                   4753: {
                   4754:        struct vm_map *map;
                   4755:
                   4756:        for (map = kernel_map;;) {
                   4757:                struct vm_map_entry *entry;
                   4758:
                   4759:                if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) {
                   4760:                        break;
                   4761:                }
                   4762:                (*pr)("%p is %p+%zu from VMMAP %p\n",
                   4763:                    (void *)addr, (void *)entry->start,
                   4764:                    (size_t)(addr - (uintptr_t)entry->start), map);
                   4765:                if (!UVM_ET_ISSUBMAP(entry)) {
                   4766:                        break;
                   4767:                }
                   4768:                map = entry->object.sub_map;
                   4769:        }
                   4770: }
1.280     thorpej  4771:
                   4772: #endif /* DDB || DEBUGPRINT */
1.288     drochner 4773:
                   4774: #ifndef __USER_VA0_IS_SAFE
                   4775: static int
1.290     drochner 4776: sysctl_user_va0_disable(SYSCTLFN_ARGS)
1.288     drochner 4777: {
                   4778:        struct sysctlnode node;
                   4779:        int t, error;
                   4780:
                   4781:        node = *rnode;
                   4782:        node.sysctl_data = &t;
1.290     drochner 4783:        t = user_va0_disable;
1.288     drochner 4784:        error = sysctl_lookup(SYSCTLFN_CALL(&node));
                   4785:        if (error || newp == NULL)
                   4786:                return (error);
                   4787:
1.290     drochner 4788:        if (!t && user_va0_disable &&
1.316     elad     4789:            kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0,
                   4790:            NULL, NULL, NULL))
1.288     drochner 4791:                return EPERM;
                   4792:
1.290     drochner 4793:        user_va0_disable = !!t;
1.288     drochner 4794:        return 0;
                   4795: }
1.331.2.3  skrll    4796: #endif
                   4797:
                   4798: static int
                   4799: fill_vmentry(struct lwp *l, struct proc *p, struct kinfo_vmentry *kve,
                   4800:     struct vm_map *m, struct vm_map_entry *e)
                   4801: {
                   4802: #ifndef _RUMPKERNEL
                   4803:        int error;
                   4804:
                   4805:        memset(kve, 0, sizeof(*kve));
                   4806:        KASSERT(e != NULL);
                   4807:        if (UVM_ET_ISOBJ(e)) {
                   4808:                struct uvm_object *uobj = e->object.uvm_obj;
                   4809:                KASSERT(uobj != NULL);
                   4810:                kve->kve_ref_count = uobj->uo_refs;
                   4811:                kve->kve_count = uobj->uo_npages;
                   4812:                if (UVM_OBJ_IS_VNODE(uobj)) {
                   4813:                        struct vattr va;
                   4814:                        struct vnode *vp = (struct vnode *)uobj;
                   4815:                        vn_lock(vp, LK_SHARED | LK_RETRY);
                   4816:                        error = VOP_GETATTR(vp, &va, l->l_cred);
                   4817:                        VOP_UNLOCK(vp);
                   4818:                        kve->kve_type = KVME_TYPE_VNODE;
                   4819:                        if (error == 0) {
                   4820:                                kve->kve_vn_size = vp->v_size;
                   4821:                                kve->kve_vn_type = (int)vp->v_type;
                   4822:                                kve->kve_vn_mode = va.va_mode;
                   4823:                                kve->kve_vn_rdev = va.va_rdev;
                   4824:                                kve->kve_vn_fileid = va.va_fileid;
                   4825:                                kve->kve_vn_fsid = va.va_fsid;
                   4826:                                error = vnode_to_path(kve->kve_path,
                   4827:                                    sizeof(kve->kve_path) / 2, vp, l, p);
                   4828: #ifdef DIAGNOSTIC
                   4829:                                if (error)
                   4830:                                        printf("%s: vp %p error %d\n", __func__,
                   4831:                                                vp, error);
                   4832: #endif
                   4833:                        }
                   4834:                } else if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
                   4835:                        kve->kve_type = KVME_TYPE_KERN;
                   4836:                } else if (UVM_OBJ_IS_DEVICE(uobj)) {
                   4837:                        kve->kve_type = KVME_TYPE_DEVICE;
                   4838:                } else if (UVM_OBJ_IS_AOBJ(uobj)) {
                   4839:                        kve->kve_type = KVME_TYPE_ANON;
                   4840:                } else {
                   4841:                        kve->kve_type = KVME_TYPE_OBJECT;
                   4842:                }
                   4843:        } else if (UVM_ET_ISSUBMAP(e)) {
                   4844:                struct vm_map *map = e->object.sub_map;
                   4845:                KASSERT(map != NULL);
                   4846:                kve->kve_ref_count = map->ref_count;
                   4847:                kve->kve_count = map->nentries;
                   4848:                kve->kve_type = KVME_TYPE_SUBMAP;
                   4849:        } else
                   4850:                kve->kve_type = KVME_TYPE_UNKNOWN;
                   4851:
                   4852:        kve->kve_start = e->start;
                   4853:        kve->kve_end = e->end;
                   4854:        kve->kve_offset = e->offset;
                   4855:        kve->kve_wired_count = e->wired_count;
                   4856:        kve->kve_inheritance = e->inheritance;
                   4857:        kve->kve_attributes = e->map_attrib;
                   4858:        kve->kve_advice = e->advice;
                   4859: #define PROT(p) (((p) & VM_PROT_READ) ? KVME_PROT_READ : 0) | \
                   4860:        (((p) & VM_PROT_WRITE) ? KVME_PROT_WRITE : 0) | \
                   4861:        (((p) & VM_PROT_EXECUTE) ? KVME_PROT_EXEC : 0)
                   4862:        kve->kve_protection = PROT(e->protection);
                   4863:        kve->kve_max_protection = PROT(e->max_protection);
                   4864:        kve->kve_flags |= (e->etype & UVM_ET_COPYONWRITE)
                   4865:            ? KVME_FLAG_COW : 0;
                   4866:        kve->kve_flags |= (e->etype & UVM_ET_NEEDSCOPY)
                   4867:            ? KVME_FLAG_NEEDS_COPY : 0;
                   4868:        kve->kve_flags |= (m->flags & VM_MAP_TOPDOWN)
                   4869:            ? KVME_FLAG_GROWS_DOWN : KVME_FLAG_GROWS_UP;
                   4870:        kve->kve_flags |= (m->flags & VM_MAP_PAGEABLE)
                   4871:            ? KVME_FLAG_PAGEABLE : 0;
                   4872: #endif
                   4873:        return 0;
                   4874: }
                   4875:
                   4876: static int
                   4877: fill_vmentries(struct lwp *l, pid_t pid, u_int elem_size, void *oldp,
                   4878:     size_t *oldlenp)
                   4879: {
                   4880:        int error;
                   4881:        struct proc *p;
1.331.2.5  skrll    4882:        struct kinfo_vmentry *vme;
1.331.2.3  skrll    4883:        struct vmspace *vm;
                   4884:        struct vm_map *map;
                   4885:        struct vm_map_entry *entry;
                   4886:        char *dp;
1.331.2.5  skrll    4887:        size_t count, vmesize;
1.331.2.3  skrll    4888:
1.331.2.5  skrll    4889:        vme = NULL;
                   4890:        vmesize = *oldlenp;
1.331.2.3  skrll    4891:        count = 0;
1.331.2.5  skrll    4892:        if (oldp && *oldlenp > 1024 * 1024)
                   4893:                return E2BIG;
1.331.2.3  skrll    4894:
                   4895:        if ((error = proc_find_locked(l, &p, pid)) != 0)
                   4896:                return error;
                   4897:
                   4898:        if ((error = proc_vmspace_getref(p, &vm)) != 0)
                   4899:                goto out;
                   4900:
                   4901:        map = &vm->vm_map;
                   4902:        vm_map_lock_read(map);
                   4903:
                   4904:        dp = oldp;
1.331.2.5  skrll    4905:        if (oldp)
                   4906:                vme = kmem_alloc(vmesize, KM_SLEEP);
1.331.2.3  skrll    4907:        for (entry = map->header.next; entry != &map->header;
                   4908:            entry = entry->next) {
                   4909:                if (oldp && (dp - (char *)oldp) < *oldlenp + elem_size) {
1.331.2.5  skrll    4910:                        error = fill_vmentry(l, p, &vme[count], map, entry);
1.331.2.3  skrll    4911:                        if (error)
1.331.2.5  skrll    4912:                                goto out;
1.331.2.3  skrll    4913:                        dp += elem_size;
                   4914:                }
                   4915:                count++;
                   4916:        }
                   4917:        vm_map_unlock_read(map);
                   4918:        uvmspace_free(vm);
1.331.2.5  skrll    4919:
1.331.2.3  skrll    4920: out:
                   4921:        if (pid != -1)
                   4922:                mutex_exit(p->p_lock);
                   4923:        if (error == 0) {
1.331.2.5  skrll    4924:                const u_int esize = min(sizeof(*vme), elem_size);
                   4925:                dp = oldp;
                   4926:                for (size_t i = 0; i < count; i++) {
                   4927:                        if (oldp && (dp - (char *)oldp) < *oldlenp + elem_size)
                   4928:                        {
                   4929:                                error = sysctl_copyout(l, &vme[i], dp, esize);
                   4930:                                if (error)
                   4931:                                        break;
                   4932:                                dp += elem_size;
                   4933:                        } else
                   4934:                                break;
                   4935:                }
1.331.2.3  skrll    4936:                count *= elem_size;
                   4937:                if (oldp != NULL && *oldlenp < count)
                   4938:                        error = ENOSPC;
                   4939:                *oldlenp = count;
                   4940:        }
1.331.2.5  skrll    4941:        if (vme)
                   4942:                kmem_free(vme, vmesize);
1.331.2.3  skrll    4943:        return error;
                   4944: }
                   4945:
                   4946: static int
                   4947: sysctl_vmproc(SYSCTLFN_ARGS)
                   4948: {
                   4949:        int error;
                   4950:
                   4951:        if (namelen == 1 && name[0] == CTL_QUERY)
                   4952:                return (sysctl_query(SYSCTLFN_CALL(rnode)));
                   4953:
                   4954:        if (namelen == 0)
                   4955:                return EINVAL;
                   4956:
                   4957:        switch (name[0]) {
                   4958:        case VM_PROC_MAP:
                   4959:                if (namelen != 3)
                   4960:                        return EINVAL;
                   4961:                sysctl_unlock();
                   4962:                error = fill_vmentries(l, name[1], name[2],
                   4963:                    oldp, oldlenp);
                   4964:                sysctl_relock();
                   4965:                return error;
                   4966:        default:
                   4967:                return EINVAL;
                   4968:        }
                   4969: }
1.288     drochner 4970:
                   4971: SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup")
                   4972: {
                   4973:
1.331.2.3  skrll    4974:        sysctl_createv(clog, 0, NULL, NULL,
                   4975:                       CTLFLAG_PERMANENT,
                   4976:                       CTLTYPE_STRUCT, "proc",
                   4977:                       SYSCTL_DESCR("Process vm information"),
                   4978:                       sysctl_vmproc, 0, NULL, 0,
                   4979:                       CTL_VM, VM_PROC, CTL_EOL);
                   4980: #ifndef __USER_VA0_IS_SAFE
1.288     drochner 4981:         sysctl_createv(clog, 0, NULL, NULL,
                   4982:                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1.289     drochner 4983:                        CTLTYPE_INT, "user_va0_disable",
1.288     drochner 4984:                        SYSCTL_DESCR("Disable VA 0"),
1.290     drochner 4985:                        sysctl_user_va0_disable, 0, &user_va0_disable, 0,
1.288     drochner 4986:                        CTL_VM, CTL_CREATE, CTL_EOL);
                   4987: #endif
1.331.2.3  skrll    4988: }

CVSweb <webmaster@jp.NetBSD.org>