[BACK]Return to uvm_aobj.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / uvm

Annotation of src/sys/uvm/uvm_aobj.c, Revision 1.137

1.137   ! ad          1: /*     $NetBSD: uvm_aobj.c,v 1.136 2020/02/24 12:38:57 rin Exp $       */
1.6       mrg         2:
1.7       chs         3: /*
                      4:  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
                      5:  *                    Washington University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     18:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     19:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     20:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     21:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     22:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     23:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     24:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     25:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     26:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     27:  *
1.4       mrg        28:  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
                     29:  */
1.113     rmind      30:
1.7       chs        31: /*
                     32:  * uvm_aobj.c: anonymous memory uvm_object pager
                     33:  *
                     34:  * author: Chuck Silvers <chuq@chuq.com>
                     35:  * started: Jan-1998
                     36:  *
                     37:  * - design mostly from Chuck Cranor
                     38:  */
1.49      lukem      39:
                     40: #include <sys/cdefs.h>
1.137   ! ad         41: __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.136 2020/02/24 12:38:57 rin Exp $");
1.7       chs        42:
1.123     pooka      43: #ifdef _KERNEL_OPT
1.7       chs        44: #include "opt_uvmhist.h"
1.123     pooka      45: #endif
1.1       mrg        46:
                     47: #include <sys/param.h>
                     48: #include <sys/systm.h>
1.37      chs        49: #include <sys/kernel.h>
1.104     rmind      50: #include <sys/kmem.h>
1.12      thorpej    51: #include <sys/pool.h>
1.119     matt       52: #include <sys/atomic.h>
1.1       mrg        53:
                     54: #include <uvm/uvm.h>
1.132     ad         55: #include <uvm/uvm_page_array.h>
1.1       mrg        56:
                     57: /*
1.117     rmind      58:  * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
                     59:  * keeping the list of resident pages, it may also keep a list of allocated
                     60:  * swap blocks.  Depending on the size of the object, this list is either
                     61:  * stored in an array (small objects) or in a hash table (large objects).
                     62:  *
                     63:  * Lock order
                     64:  *
1.118     rmind      65:  *     uao_list_lock ->
                     66:  *             uvm_object::vmobjlock
1.1       mrg        67:  */
                     68:
                     69: /*
1.117     rmind      70:  * Note: for hash tables, we break the address space of the aobj into blocks
                     71:  * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
1.1       mrg        72:  */
                     73:
1.117     rmind      74: #define        UAO_SWHASH_CLUSTER_SHIFT        4
                     75: #define        UAO_SWHASH_CLUSTER_SIZE         (1 << UAO_SWHASH_CLUSTER_SHIFT)
1.1       mrg        76:
1.117     rmind      77: /* Get the "tag" for this page index. */
                     78: #define        UAO_SWHASH_ELT_TAG(idx)         ((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
                     79: #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
                     80:     ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
1.1       mrg        81:
1.117     rmind      82: /* Given an ELT and a page index, find the swap slot. */
                     83: #define        UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
                     84:     ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
1.75      yamt       85:
1.117     rmind      86: /* Given an ELT, return its pageidx base. */
                     87: #define        UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
                     88:     ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
1.1       mrg        89:
1.117     rmind      90: /* The hash function. */
                     91: #define        UAO_SWHASH_HASH(aobj, idx) \
                     92:     (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
                     93:     & (aobj)->u_swhashmask)])
1.1       mrg        94:
                     95: /*
1.117     rmind      96:  * The threshold which determines whether we will use an array or a
1.1       mrg        97:  * hash table to store the list of allocated swap blocks.
                     98:  */
1.117     rmind      99: #define        UAO_SWHASH_THRESHOLD            (UAO_SWHASH_CLUSTER_SIZE * 4)
                    100: #define        UAO_USES_SWHASH(aobj) \
                    101:     ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
                    102:
                    103: /* The number of buckets in a hash, with an upper bound. */
                    104: #define        UAO_SWHASH_MAXBUCKETS           256
                    105: #define        UAO_SWHASH_BUCKETS(aobj) \
                    106:     (MIN((aobj)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
1.1       mrg       107:
                    108: /*
                    109:  * uao_swhash_elt: when a hash table is being used, this structure defines
                    110:  * the format of an entry in the bucket list.
                    111:  */
                    112:
                    113: struct uao_swhash_elt {
1.5       mrg       114:        LIST_ENTRY(uao_swhash_elt) list;        /* the hash list */
1.28      kleink    115:        voff_t tag;                             /* our 'tag' */
1.5       mrg       116:        int count;                              /* our number of active slots */
                    117:        int slots[UAO_SWHASH_CLUSTER_SIZE];     /* the slots */
1.1       mrg       118: };
                    119:
                    120: /*
                    121:  * uao_swhash: the swap hash table structure
                    122:  */
                    123:
                    124: LIST_HEAD(uao_swhash, uao_swhash_elt);
                    125:
1.12      thorpej   126: /*
1.113     rmind     127:  * uao_swhash_elt_pool: pool of uao_swhash_elt structures.
                    128:  * Note: pages for this pool must not come from a pageable kernel map.
1.12      thorpej   129:  */
1.117     rmind     130: static struct pool     uao_swhash_elt_pool     __cacheline_aligned;
1.1       mrg       131:
                    132: /*
                    133:  * uvm_aobj: the actual anon-backed uvm_object
                    134:  *
                    135:  * => the uvm_object is at the top of the structure, this allows
1.46      chs       136:  *   (struct uvm_aobj *) == (struct uvm_object *)
1.1       mrg       137:  * => only one of u_swslots and u_swhash is used in any given aobj
                    138:  */
                    139:
                    140: struct uvm_aobj {
1.132     ad        141:        struct uvm_object u_obj; /* has: lock, pgops, #pages, #refs */
1.79      cherry    142:        pgoff_t u_pages;         /* number of pages in entire object */
1.5       mrg       143:        int u_flags;             /* the flags (see uvm_aobj.h) */
                    144:        int *u_swslots;          /* array of offset->swapslot mappings */
                    145:                                 /*
                    146:                                  * hashtable of offset->swapslot mappings
                    147:                                  * (u_swhash is an array of bucket heads)
                    148:                                  */
                    149:        struct uao_swhash *u_swhash;
                    150:        u_long u_swhashmask;            /* mask for hashtable */
                    151:        LIST_ENTRY(uvm_aobj) u_list;    /* global list of aobjs */
1.121     riastrad  152:        int u_freelist;           /* freelist to allocate pages from */
1.1       mrg       153: };
                    154:
1.62      junyoung  155: static void    uao_free(struct uvm_aobj *);
                    156: static int     uao_get(struct uvm_object *, voff_t, struct vm_page **,
                    157:                    int *, int, vm_prot_t, int, int);
1.86      matt      158: static int     uao_put(struct uvm_object *, voff_t, voff_t, int);
1.72      yamt      159:
                    160: #if defined(VMSWAP)
                    161: static struct uao_swhash_elt *uao_find_swhash_elt
1.85      thorpej   162:     (struct uvm_aobj *, int, bool);
1.72      yamt      163:
1.85      thorpej   164: static bool uao_pagein(struct uvm_aobj *, int, int);
                    165: static bool uao_pagein_page(struct uvm_aobj *, int);
1.72      yamt      166: #endif /* defined(VMSWAP) */
1.1       mrg       167:
1.121     riastrad  168: static struct vm_page  *uao_pagealloc(struct uvm_object *, voff_t, int);
                    169:
1.1       mrg       170: /*
                    171:  * aobj_pager
1.41      chs       172:  *
1.1       mrg       173:  * note that some functions (e.g. put) are handled elsewhere
                    174:  */
                    175:
1.95      yamt      176: const struct uvm_pagerops aobj_pager = {
1.94      yamt      177:        .pgo_reference = uao_reference,
                    178:        .pgo_detach = uao_detach,
                    179:        .pgo_get = uao_get,
                    180:        .pgo_put = uao_put,
1.1       mrg       181: };
                    182:
                    183: /*
                    184:  * uao_list: global list of active aobjs, locked by uao_list_lock
                    185:  */
                    186:
1.117     rmind     187: static LIST_HEAD(aobjlist, uvm_aobj) uao_list  __cacheline_aligned;
                    188: static kmutex_t                uao_list_lock           __cacheline_aligned;
1.1       mrg       189:
                    190: /*
                    191:  * hash table/array related functions
                    192:  */
                    193:
1.72      yamt      194: #if defined(VMSWAP)
                    195:
1.1       mrg       196: /*
                    197:  * uao_find_swhash_elt: find (or create) a hash table entry for a page
                    198:  * offset.
                    199:  *
                    200:  * => the object should be locked by the caller
                    201:  */
                    202:
1.5       mrg       203: static struct uao_swhash_elt *
1.85      thorpej   204: uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
1.5       mrg       205: {
                    206:        struct uao_swhash *swhash;
                    207:        struct uao_swhash_elt *elt;
1.28      kleink    208:        voff_t page_tag;
1.1       mrg       209:
1.45      chs       210:        swhash = UAO_SWHASH_HASH(aobj, pageidx);
                    211:        page_tag = UAO_SWHASH_ELT_TAG(pageidx);
1.1       mrg       212:
1.5       mrg       213:        /*
                    214:         * now search the bucket for the requested tag
                    215:         */
1.45      chs       216:
1.37      chs       217:        LIST_FOREACH(elt, swhash, list) {
1.45      chs       218:                if (elt->tag == page_tag) {
                    219:                        return elt;
                    220:                }
1.5       mrg       221:        }
1.45      chs       222:        if (!create) {
1.5       mrg       223:                return NULL;
1.45      chs       224:        }
1.5       mrg       225:
                    226:        /*
1.12      thorpej   227:         * allocate a new entry for the bucket and init/insert it in
1.5       mrg       228:         */
1.45      chs       229:
                    230:        elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
                    231:        if (elt == NULL) {
                    232:                return NULL;
                    233:        }
1.5       mrg       234:        LIST_INSERT_HEAD(swhash, elt, list);
                    235:        elt->tag = page_tag;
                    236:        elt->count = 0;
1.9       perry     237:        memset(elt->slots, 0, sizeof(elt->slots));
1.45      chs       238:        return elt;
1.1       mrg       239: }
                    240:
                    241: /*
                    242:  * uao_find_swslot: find the swap slot number for an aobj/pageidx
                    243:  *
1.41      chs       244:  * => object must be locked by caller
1.1       mrg       245:  */
1.46      chs       246:
                    247: int
1.67      thorpej   248: uao_find_swslot(struct uvm_object *uobj, int pageidx)
1.1       mrg       249: {
1.46      chs       250:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
                    251:        struct uao_swhash_elt *elt;
1.1       mrg       252:
1.5       mrg       253:        /*
                    254:         * if noswap flag is set, then we never return a slot
                    255:         */
1.1       mrg       256:
1.5       mrg       257:        if (aobj->u_flags & UAO_FLAG_NOSWAP)
1.117     rmind     258:                return 0;
1.1       mrg       259:
1.5       mrg       260:        /*
                    261:         * if hashing, look in hash table.
                    262:         */
1.1       mrg       263:
1.5       mrg       264:        if (UAO_USES_SWHASH(aobj)) {
1.87      thorpej   265:                elt = uao_find_swhash_elt(aobj, pageidx, false);
1.117     rmind     266:                return elt ? UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) : 0;
1.5       mrg       267:        }
1.1       mrg       268:
1.41      chs       269:        /*
1.5       mrg       270:         * otherwise, look in the array
                    271:         */
1.46      chs       272:
1.117     rmind     273:        return aobj->u_swslots[pageidx];
1.1       mrg       274: }
                    275:
                    276: /*
                    277:  * uao_set_swslot: set the swap slot for a page in an aobj.
                    278:  *
                    279:  * => setting a slot to zero frees the slot
                    280:  * => object must be locked by caller
1.45      chs       281:  * => we return the old slot number, or -1 if we failed to allocate
                    282:  *    memory to record the new slot number
1.1       mrg       283:  */
1.46      chs       284:
1.5       mrg       285: int
1.67      thorpej   286: uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
1.5       mrg       287: {
                    288:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.45      chs       289:        struct uao_swhash_elt *elt;
1.5       mrg       290:        int oldslot;
                    291:        UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
1.126     pgoyette  292:        UVMHIST_LOG(pdhist, "aobj %#jx pageidx %jd slot %jd",
                    293:            (uintptr_t)aobj, pageidx, slot, 0);
1.1       mrg       294:
1.135     ad        295:        KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
1.109     rmind     296:
1.5       mrg       297:        /*
1.46      chs       298:         * if noswap flag is set, then we can't set a non-zero slot.
1.5       mrg       299:         */
1.1       mrg       300:
1.5       mrg       301:        if (aobj->u_flags & UAO_FLAG_NOSWAP) {
1.117     rmind     302:                KASSERTMSG(slot == 0, "uao_set_swslot: no swap object");
                    303:                return 0;
1.5       mrg       304:        }
1.1       mrg       305:
1.5       mrg       306:        /*
                    307:         * are we using a hash table?  if so, add it in the hash.
                    308:         */
1.1       mrg       309:
1.5       mrg       310:        if (UAO_USES_SWHASH(aobj)) {
1.39      chs       311:
1.12      thorpej   312:                /*
                    313:                 * Avoid allocating an entry just to free it again if
                    314:                 * the page had not swap slot in the first place, and
                    315:                 * we are freeing.
                    316:                 */
1.39      chs       317:
1.46      chs       318:                elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
1.12      thorpej   319:                if (elt == NULL) {
1.45      chs       320:                        return slot ? -1 : 0;
1.12      thorpej   321:                }
1.5       mrg       322:
                    323:                oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
                    324:                UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
                    325:
                    326:                /*
                    327:                 * now adjust the elt's reference counter and free it if we've
                    328:                 * dropped it to zero.
                    329:                 */
                    330:
                    331:                if (slot) {
                    332:                        if (oldslot == 0)
                    333:                                elt->count++;
1.45      chs       334:                } else {
                    335:                        if (oldslot)
1.5       mrg       336:                                elt->count--;
                    337:
                    338:                        if (elt->count == 0) {
                    339:                                LIST_REMOVE(elt, list);
1.12      thorpej   340:                                pool_put(&uao_swhash_elt_pool, elt);
1.5       mrg       341:                        }
                    342:                }
1.41      chs       343:        } else {
1.5       mrg       344:                /* we are using an array */
                    345:                oldslot = aobj->u_swslots[pageidx];
                    346:                aobj->u_swslots[pageidx] = slot;
                    347:        }
1.117     rmind     348:        return oldslot;
1.1       mrg       349: }
                    350:
1.72      yamt      351: #endif /* defined(VMSWAP) */
                    352:
1.1       mrg       353: /*
                    354:  * end of hash/array functions
                    355:  */
                    356:
                    357: /*
                    358:  * uao_free: free all resources held by an aobj, and then free the aobj
                    359:  *
                    360:  * => the aobj should be dead
                    361:  */
1.46      chs       362:
1.1       mrg       363: static void
1.67      thorpej   364: uao_free(struct uvm_aobj *aobj)
1.1       mrg       365: {
1.117     rmind     366:        struct uvm_object *uobj = &aobj->u_obj;
1.96      ad        367:
1.135     ad        368:        KASSERT(rw_write_held(uobj->vmobjlock));
1.118     rmind     369:        uao_dropswap_range(uobj, 0, 0);
1.135     ad        370:        rw_exit(uobj->vmobjlock);
1.72      yamt      371:
                    372: #if defined(VMSWAP)
1.5       mrg       373:        if (UAO_USES_SWHASH(aobj)) {
1.1       mrg       374:
1.5       mrg       375:                /*
1.75      yamt      376:                 * free the hash table itself.
1.5       mrg       377:                 */
1.46      chs       378:
1.104     rmind     379:                hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
1.5       mrg       380:        } else {
                    381:
                    382:                /*
1.75      yamt      383:                 * free the array itsself.
1.5       mrg       384:                 */
                    385:
1.104     rmind     386:                kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
1.1       mrg       387:        }
1.72      yamt      388: #endif /* defined(VMSWAP) */
                    389:
1.5       mrg       390:        /*
                    391:         * finally free the aobj itself
                    392:         */
1.46      chs       393:
1.117     rmind     394:        uvm_obj_destroy(uobj, true);
1.113     rmind     395:        kmem_free(aobj, sizeof(struct uvm_aobj));
1.1       mrg       396: }
                    397:
                    398: /*
                    399:  * pager functions
                    400:  */
                    401:
                    402: /*
                    403:  * uao_create: create an aobj of the given size and return its uvm_object.
                    404:  *
                    405:  * => for normal use, flags are always zero
                    406:  * => for the kernel object, the flags are:
                    407:  *     UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
                    408:  *     UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
                    409:  */
1.46      chs       410:
1.5       mrg       411: struct uvm_object *
1.127     chs       412: uao_create(voff_t size, int flags)
1.5       mrg       413: {
1.46      chs       414:        static struct uvm_aobj kernel_object_store;
1.135     ad        415:        static krwlock_t kernel_object_lock __cacheline_aligned;
1.120     martin    416:        static int kobj_alloced __diagused = 0;
1.127     chs       417:        pgoff_t pages = round_page((uint64_t)size) >> PAGE_SHIFT;
1.5       mrg       418:        struct uvm_aobj *aobj;
1.66      yamt      419:        int refs;
1.1       mrg       420:
1.5       mrg       421:        /*
1.114     rmind     422:         * Allocate a new aobj, unless kernel object is requested.
1.27      chs       423:         */
1.5       mrg       424:
1.46      chs       425:        if (flags & UAO_FLAG_KERNOBJ) {
                    426:                KASSERT(!kobj_alloced);
1.5       mrg       427:                aobj = &kernel_object_store;
                    428:                aobj->u_pages = pages;
1.46      chs       429:                aobj->u_flags = UAO_FLAG_NOSWAP;
1.66      yamt      430:                refs = UVM_OBJ_KERN;
1.5       mrg       431:                kobj_alloced = UAO_FLAG_KERNOBJ;
                    432:        } else if (flags & UAO_FLAG_KERNSWAP) {
1.46      chs       433:                KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
1.5       mrg       434:                aobj = &kernel_object_store;
                    435:                kobj_alloced = UAO_FLAG_KERNSWAP;
1.66      yamt      436:                refs = 0xdeadbeaf; /* XXX: gcc */
1.46      chs       437:        } else {
1.113     rmind     438:                aobj = kmem_alloc(sizeof(struct uvm_aobj), KM_SLEEP);
1.5       mrg       439:                aobj->u_pages = pages;
1.46      chs       440:                aobj->u_flags = 0;
1.66      yamt      441:                refs = 1;
1.5       mrg       442:        }
1.1       mrg       443:
1.5       mrg       444:        /*
1.121     riastrad  445:         * no freelist by default
                    446:         */
                    447:
                    448:        aobj->u_freelist = VM_NFREELIST;
                    449:
                    450:        /*
1.5       mrg       451:         * allocate hash/array if necessary
                    452:         *
                    453:         * note: in the KERNSWAP case no need to worry about locking since
                    454:         * we are still booting we should be the only thread around.
                    455:         */
1.46      chs       456:
1.5       mrg       457:        if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
1.72      yamt      458: #if defined(VMSWAP)
1.104     rmind     459:                const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
1.5       mrg       460:
                    461:                /* allocate hash table or array depending on object size */
1.27      chs       462:                if (UAO_USES_SWHASH(aobj)) {
1.104     rmind     463:                        aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
                    464:                            HASH_LIST, kernswap ? false : true,
                    465:                            &aobj->u_swhashmask);
1.5       mrg       466:                        if (aobj->u_swhash == NULL)
                    467:                                panic("uao_create: hashinit swhash failed");
                    468:                } else {
1.104     rmind     469:                        aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
                    470:                            kernswap ? KM_NOSLEEP : KM_SLEEP);
1.5       mrg       471:                        if (aobj->u_swslots == NULL)
1.114     rmind     472:                                panic("uao_create: swslots allocation failed");
1.5       mrg       473:                }
1.72      yamt      474: #endif /* defined(VMSWAP) */
1.5       mrg       475:
                    476:                if (flags) {
                    477:                        aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
1.117     rmind     478:                        return &aobj->u_obj;
1.5       mrg       479:                }
                    480:        }
                    481:
                    482:        /*
1.115     rmind     483:         * Initialise UVM object.
                    484:         */
1.46      chs       485:
1.115     rmind     486:        const bool kernobj = (flags & UAO_FLAG_KERNOBJ) != 0;
                    487:        uvm_obj_init(&aobj->u_obj, &aobj_pager, !kernobj, refs);
                    488:        if (__predict_false(kernobj)) {
                    489:                /* Initialisation only once, for UAO_FLAG_KERNOBJ. */
1.135     ad        490:                rw_init(&kernel_object_lock);
1.115     rmind     491:                uvm_obj_setlock(&aobj->u_obj, &kernel_object_lock);
                    492:        }
1.1       mrg       493:
1.5       mrg       494:        /*
                    495:         * now that aobj is ready, add it to the global list
                    496:         */
1.46      chs       497:
1.90      ad        498:        mutex_enter(&uao_list_lock);
1.5       mrg       499:        LIST_INSERT_HEAD(&uao_list, aobj, u_list);
1.90      ad        500:        mutex_exit(&uao_list_lock);
1.5       mrg       501:        return(&aobj->u_obj);
1.1       mrg       502: }
                    503:
                    504: /*
1.121     riastrad  505:  * uao_set_pgfl: allocate pages only from the specified freelist.
                    506:  *
                    507:  * => must be called before any pages are allocated for the object.
1.122     riastrad  508:  * => reset by setting it to VM_NFREELIST, meaning any freelist.
1.121     riastrad  509:  */
                    510:
                    511: void
                    512: uao_set_pgfl(struct uvm_object *uobj, int freelist)
                    513: {
                    514:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
                    515:
                    516:        KASSERTMSG((0 <= freelist), "invalid freelist %d", freelist);
1.122     riastrad  517:        KASSERTMSG((freelist <= VM_NFREELIST), "invalid freelist %d",
                    518:            freelist);
1.121     riastrad  519:
                    520:        aobj->u_freelist = freelist;
                    521: }
                    522:
                    523: /*
                    524:  * uao_pagealloc: allocate a page for aobj.
                    525:  */
                    526:
                    527: static inline struct vm_page *
                    528: uao_pagealloc(struct uvm_object *uobj, voff_t offset, int flags)
                    529: {
                    530:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
                    531:
                    532:        if (__predict_true(aobj->u_freelist == VM_NFREELIST))
                    533:                return uvm_pagealloc(uobj, offset, NULL, flags);
                    534:        else
                    535:                return uvm_pagealloc_strat(uobj, offset, NULL, flags,
                    536:                    UVM_PGA_STRAT_ONLY, aobj->u_freelist);
                    537: }
                    538:
                    539: /*
1.1       mrg       540:  * uao_init: set up aobj pager subsystem
                    541:  *
                    542:  * => called at boot time from uvm_pager_init()
                    543:  */
1.46      chs       544:
1.27      chs       545: void
1.46      chs       546: uao_init(void)
1.5       mrg       547: {
1.12      thorpej   548:        static int uao_initialized;
                    549:
                    550:        if (uao_initialized)
                    551:                return;
1.87      thorpej   552:        uao_initialized = true;
1.5       mrg       553:        LIST_INIT(&uao_list);
1.96      ad        554:        mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
1.107     pooka     555:        pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
                    556:            0, 0, 0, "uaoeltpl", NULL, IPL_VM);
1.1       mrg       557: }
                    558:
                    559: /*
1.118     rmind     560:  * uao_reference: hold a reference to an anonymous UVM object.
1.1       mrg       561:  */
1.5       mrg       562: void
1.67      thorpej   563: uao_reference(struct uvm_object *uobj)
1.1       mrg       564: {
1.118     rmind     565:        /* Kernel object is persistent. */
                    566:        if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
1.101     ad        567:                return;
1.118     rmind     568:        }
                    569:        atomic_inc_uint(&uobj->uo_refs);
1.1       mrg       570: }
                    571:
                    572: /*
1.118     rmind     573:  * uao_detach: drop a reference to an anonymous UVM object.
1.1       mrg       574:  */
1.5       mrg       575: void
1.67      thorpej   576: uao_detach(struct uvm_object *uobj)
1.5       mrg       577: {
1.118     rmind     578:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.132     ad        579:        struct uvm_page_array a;
1.118     rmind     580:        struct vm_page *pg;
                    581:
                    582:        UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
1.101     ad        583:
                    584:        /*
1.118     rmind     585:         * Detaching from kernel object is a NOP.
                    586:         */
1.101     ad        587:
                    588:        if (UVM_OBJ_IS_KERN_OBJECT(uobj))
1.102     ad        589:                return;
1.101     ad        590:
1.5       mrg       591:        /*
1.118     rmind     592:         * Drop the reference.  If it was the last one, destroy the object.
                    593:         */
1.5       mrg       594:
1.125     chs       595:        KASSERT(uobj->uo_refs > 0);
1.136     rin       596:        UVMHIST_LOG(maphist,"  (uobj=%#jx)  ref=%jd",
1.126     pgoyette  597:            (uintptr_t)uobj, uobj->uo_refs, 0, 0);
1.118     rmind     598:        if (atomic_dec_uint_nv(&uobj->uo_refs) > 0) {
1.5       mrg       599:                UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
                    600:                return;
                    601:        }
                    602:
                    603:        /*
1.118     rmind     604:         * Remove the aobj from the global list.
                    605:         */
1.46      chs       606:
1.92      ad        607:        mutex_enter(&uao_list_lock);
1.5       mrg       608:        LIST_REMOVE(aobj, u_list);
1.92      ad        609:        mutex_exit(&uao_list_lock);
1.5       mrg       610:
                    611:        /*
1.118     rmind     612:         * Free all the pages left in the aobj.  For each page, when the
                    613:         * page is no longer busy (and thus after any disk I/O that it is
                    614:         * involved in is complete), release any swap resources and free
                    615:         * the page itself.
                    616:         */
1.132     ad        617:        uvm_page_array_init(&a);
1.135     ad        618:        rw_enter(uobj->vmobjlock, RW_WRITER);
1.132     ad        619:        while ((pg = uvm_page_array_fill_and_peek(&a, uobj, 0, 0, 0))
                    620:            != NULL) {
                    621:                uvm_page_array_advance(&a);
1.130     ad        622:                pmap_page_protect(pg, VM_PROT_NONE);
1.5       mrg       623:                if (pg->flags & PG_BUSY) {
1.137   ! ad        624:                        uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
1.132     ad        625:                        uvm_page_array_clear(&a);
1.135     ad        626:                        rw_enter(uobj->vmobjlock, RW_WRITER);
1.5       mrg       627:                        continue;
                    628:                }
1.18      chs       629:                uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
1.5       mrg       630:                uvm_pagefree(pg);
                    631:        }
1.132     ad        632:        uvm_page_array_fini(&a);
1.1       mrg       633:
1.5       mrg       634:        /*
1.118     rmind     635:         * Finally, free the anonymous UVM object itself.
                    636:         */
1.1       mrg       637:
1.5       mrg       638:        uao_free(aobj);
                    639: }
1.1       mrg       640:
                    641: /*
1.46      chs       642:  * uao_put: flush pages out of a uvm object
1.22      thorpej   643:  *
                    644:  * => object should be locked by caller.  we may _unlock_ the object
                    645:  *     if (and only if) we need to clean a page (PGO_CLEANIT).
                    646:  *     XXXJRT Currently, however, we don't.  In the case of cleaning
                    647:  *     XXXJRT a page, we simply just deactivate it.  Should probably
                    648:  *     XXXJRT handle this better, in the future (although "flushing"
                    649:  *     XXXJRT anonymous memory isn't terribly important).
                    650:  * => if PGO_CLEANIT is not set, then we will neither unlock the object
                    651:  *     or block.
                    652:  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
                    653:  *     for flushing.
1.86      matt      654:  * => we return 0 unless we encountered some sort of I/O error
1.22      thorpej   655:  *     XXXJRT currently never happens, as we never directly initiate
                    656:  *     XXXJRT I/O
1.1       mrg       657:  */
1.22      thorpej   658:
1.68      thorpej   659: static int
1.67      thorpej   660: uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1.5       mrg       661: {
1.46      chs       662:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.132     ad        663:        struct uvm_page_array a;
                    664:        struct vm_page *pg;
1.28      kleink    665:        voff_t curoff;
1.46      chs       666:        UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
1.22      thorpej   667:
1.135     ad        668:        KASSERT(rw_write_held(uobj->vmobjlock));
1.96      ad        669:
1.22      thorpej   670:        if (flags & PGO_ALLPAGES) {
                    671:                start = 0;
                    672:                stop = aobj->u_pages << PAGE_SHIFT;
                    673:        } else {
                    674:                start = trunc_page(start);
1.71      yamt      675:                if (stop == 0) {
                    676:                        stop = aobj->u_pages << PAGE_SHIFT;
                    677:                } else {
                    678:                        stop = round_page(stop);
                    679:                }
1.127     chs       680:                if (stop > (uint64_t)(aobj->u_pages << PAGE_SHIFT)) {
                    681:                        printf("uao_put: strange, got an out of range "
1.136     rin       682:                            "flush %#jx > %#jx (fixed)\n",
1.127     chs       683:                            (uintmax_t)stop,
                    684:                            (uintmax_t)(aobj->u_pages << PAGE_SHIFT));
1.22      thorpej   685:                        stop = aobj->u_pages << PAGE_SHIFT;
                    686:                }
                    687:        }
                    688:        UVMHIST_LOG(maphist,
1.136     rin       689:            " flush start=%#jx, stop=%#jx, flags=%#jx",
1.132     ad        690:            start, stop, flags, 0);
1.1       mrg       691:
1.5       mrg       692:        /*
1.22      thorpej   693:         * Don't need to do any work here if we're not freeing
                    694:         * or deactivating pages.
                    695:         */
1.46      chs       696:
1.22      thorpej   697:        if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
1.135     ad        698:                rw_exit(uobj->vmobjlock);
1.46      chs       699:                return 0;
1.22      thorpej   700:        }
                    701:
1.99      ad        702:        /* locked: uobj */
1.132     ad        703:        uvm_page_array_init(&a);
                    704:        curoff = start;
                    705:        while ((pg = uvm_page_array_fill_and_peek(&a, uobj, curoff, 0, 0)) !=
                    706:            NULL) {
                    707:                if (pg->offset >= stop) {
                    708:                        break;
1.22      thorpej   709:                }
1.98      yamt      710:
                    711:                /*
                    712:                 * wait and try again if the page is busy.
                    713:                 */
                    714:
                    715:                if (pg->flags & PG_BUSY) {
1.137   ! ad        716:                        uvm_pagewait(pg, uobj->vmobjlock, "uao_put");
1.132     ad        717:                        uvm_page_array_clear(&a);
1.135     ad        718:                        rw_enter(uobj->vmobjlock, RW_WRITER);
1.98      yamt      719:                        continue;
                    720:                }
1.132     ad        721:                uvm_page_array_advance(&a);
                    722:                curoff = pg->offset + PAGE_SIZE;
1.98      yamt      723:
1.46      chs       724:                switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
1.41      chs       725:
1.22      thorpej   726:                /*
                    727:                 * XXX In these first 3 cases, we always just
                    728:                 * XXX deactivate the page.  We may want to
                    729:                 * XXX handle the different cases more specifically
                    730:                 * XXX in the future.
                    731:                 */
1.46      chs       732:
1.22      thorpej   733:                case PGO_CLEANIT|PGO_FREE:
                    734:                case PGO_CLEANIT|PGO_DEACTIVATE:
                    735:                case PGO_DEACTIVATE:
1.25      thorpej   736:  deactivate_it:
1.133     ad        737:                        uvm_pagelock(pg);
1.131     ad        738:                        uvm_pagedeactivate(pg);
1.133     ad        739:                        uvm_pageunlock(pg);
1.98      yamt      740:                        break;
1.22      thorpej   741:
                    742:                case PGO_FREE:
1.25      thorpej   743:                        /*
                    744:                         * If there are multiple references to
                    745:                         * the object, just deactivate the page.
                    746:                         */
1.46      chs       747:
1.25      thorpej   748:                        if (uobj->uo_refs > 1)
                    749:                                goto deactivate_it;
                    750:
1.22      thorpej   751:                        /*
1.98      yamt      752:                         * free the swap slot and the page.
1.22      thorpej   753:                         */
1.46      chs       754:
                    755:                        pmap_page_protect(pg, VM_PROT_NONE);
1.75      yamt      756:
                    757:                        /*
                    758:                         * freeing swapslot here is not strictly necessary.
                    759:                         * however, leaving it here doesn't save much
                    760:                         * because we need to update swap accounting anyway.
                    761:                         */
                    762:
1.46      chs       763:                        uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
                    764:                        uvm_pagefree(pg);
1.98      yamt      765:                        break;
                    766:
                    767:                default:
                    768:                        panic("%s: impossible", __func__);
1.22      thorpej   769:                }
                    770:        }
1.135     ad        771:        rw_exit(uobj->vmobjlock);
1.132     ad        772:        uvm_page_array_fini(&a);
1.46      chs       773:        return 0;
1.1       mrg       774: }
                    775:
                    776: /*
                    777:  * uao_get: fetch me a page
                    778:  *
                    779:  * we have three cases:
                    780:  * 1: page is resident     -> just return the page.
                    781:  * 2: page is zero-fill    -> allocate a new page and zero it.
                    782:  * 3: page is swapped out  -> fetch the page from swap.
                    783:  *
                    784:  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
                    785:  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
1.40      chs       786:  * then we will need to return EBUSY.
1.1       mrg       787:  *
                    788:  * => prefer map unlocked (not required)
                    789:  * => object must be locked!  we will _unlock_ it before starting any I/O.
                    790:  * => flags: PGO_ALLPAGES: get all of the pages
                    791:  *           PGO_LOCKED: fault data structures are locked
                    792:  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
                    793:  * => NOTE: caller must check for released pages!!
                    794:  */
1.46      chs       795:
1.5       mrg       796: static int
1.67      thorpej   797: uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
1.82      yamt      798:     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
1.5       mrg       799: {
1.28      kleink    800:        voff_t current_offset;
1.52      scw       801:        struct vm_page *ptmp = NULL;    /* Quell compiler warning */
1.72      yamt      802:        int lcv, gotpages, maxpages, swslot, pageidx;
1.85      thorpej   803:        bool done;
1.5       mrg       804:        UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
                    805:
1.126     pgoyette  806:        UVMHIST_LOG(pdhist, "aobj=%#jx offset=%jd, flags=%jd",
                    807:                    (uintptr_t)uobj, offset, flags,0);
1.37      chs       808:
1.5       mrg       809:        /*
                    810:         * get number of pages
                    811:         */
1.46      chs       812:
1.5       mrg       813:        maxpages = *npagesp;
                    814:
                    815:        /*
                    816:         * step 1: handled the case where fault data structures are locked.
                    817:         */
1.1       mrg       818:
1.5       mrg       819:        if (flags & PGO_LOCKED) {
1.46      chs       820:
1.5       mrg       821:                /*
                    822:                 * step 1a: get pages that are already resident.   only do
                    823:                 * this if the data structures are locked (i.e. the first
                    824:                 * time through).
                    825:                 */
                    826:
1.87      thorpej   827:                done = true;    /* be optimistic */
1.5       mrg       828:                gotpages = 0;   /* # of pages we got so far */
                    829:                for (lcv = 0, current_offset = offset ; lcv < maxpages ;
                    830:                    lcv++, current_offset += PAGE_SIZE) {
                    831:                        /* do we care about this page?  if not, skip it */
                    832:                        if (pps[lcv] == PGO_DONTCARE)
                    833:                                continue;
                    834:                        ptmp = uvm_pagelookup(uobj, current_offset);
                    835:
                    836:                        /*
1.30      thorpej   837:                         * if page is new, attempt to allocate the page,
                    838:                         * zero-fill'd.
1.5       mrg       839:                         */
1.46      chs       840:
1.117     rmind     841:                        if (ptmp == NULL && uao_find_swslot(uobj,
1.15      chs       842:                            current_offset >> PAGE_SHIFT) == 0) {
1.121     riastrad  843:                                ptmp = uao_pagealloc(uobj, current_offset,
                    844:                                    UVM_FLAG_COLORMATCH|UVM_PGA_ZERO);
1.5       mrg       845:                                if (ptmp) {
                    846:                                        /* new page */
1.47      chs       847:                                        ptmp->flags &= ~(PG_FAKE);
1.134     ad        848:                                        uvm_pagemarkdirty(ptmp,
                    849:                                            UVM_PAGE_STATUS_UNKNOWN);
1.47      chs       850:                                        goto gotpage;
1.5       mrg       851:                                }
                    852:                        }
                    853:
                    854:                        /*
1.46      chs       855:                         * to be useful must get a non-busy page
1.5       mrg       856:                         */
1.46      chs       857:
                    858:                        if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
1.5       mrg       859:                                if (lcv == centeridx ||
                    860:                                    (flags & PGO_ALLPAGES) != 0)
                    861:                                        /* need to do a wait or I/O! */
1.87      thorpej   862:                                        done = false;
1.124     martin    863:                                continue;
1.5       mrg       864:                        }
                    865:
                    866:                        /*
                    867:                         * useful page: busy/lock it and plug it in our
                    868:                         * result array
                    869:                         */
1.134     ad        870:                        KASSERT(uvm_pagegetdirty(ptmp) !=
                    871:                            UVM_PAGE_STATUS_CLEAN);
1.46      chs       872:
1.5       mrg       873:                        /* caller must un-busy this page */
1.41      chs       874:                        ptmp->flags |= PG_BUSY;
1.5       mrg       875:                        UVM_PAGE_OWN(ptmp, "uao_get1");
1.47      chs       876: gotpage:
1.5       mrg       877:                        pps[lcv] = ptmp;
                    878:                        gotpages++;
1.46      chs       879:                }
1.5       mrg       880:
                    881:                /*
                    882:                 * step 1b: now we've either done everything needed or we
                    883:                 * to unlock and do some waiting or I/O.
                    884:                 */
                    885:
1.126     pgoyette  886:                UVMHIST_LOG(pdhist, "<- done (done=%jd)", done, 0,0,0);
1.5       mrg       887:                *npagesp = gotpages;
                    888:                if (done)
1.46      chs       889:                        return 0;
1.5       mrg       890:                else
1.46      chs       891:                        return EBUSY;
1.1       mrg       892:        }
                    893:
1.5       mrg       894:        /*
                    895:         * step 2: get non-resident or busy pages.
                    896:         * object is locked.   data structures are unlocked.
                    897:         */
                    898:
1.76      yamt      899:        if ((flags & PGO_SYNCIO) == 0) {
                    900:                goto done;
                    901:        }
                    902:
1.5       mrg       903:        for (lcv = 0, current_offset = offset ; lcv < maxpages ;
                    904:            lcv++, current_offset += PAGE_SIZE) {
1.27      chs       905:
1.5       mrg       906:                /*
                    907:                 * - skip over pages we've already gotten or don't want
                    908:                 * - skip over pages we don't _have_ to get
                    909:                 */
1.27      chs       910:
1.5       mrg       911:                if (pps[lcv] != NULL ||
                    912:                    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
                    913:                        continue;
                    914:
1.27      chs       915:                pageidx = current_offset >> PAGE_SHIFT;
                    916:
1.5       mrg       917:                /*
                    918:                 * we have yet to locate the current page (pps[lcv]).   we
                    919:                 * first look for a page that is already at the current offset.
                    920:                 * if we find a page, we check to see if it is busy or
                    921:                 * released.  if that is the case, then we sleep on the page
                    922:                 * until it is no longer busy or released and repeat the lookup.
                    923:                 * if the page we found is neither busy nor released, then we
                    924:                 * busy it (so we own it) and plug it into pps[lcv].   this
                    925:                 * 'break's the following while loop and indicates we are
                    926:                 * ready to move on to the next page in the "lcv" loop above.
                    927:                 *
                    928:                 * if we exit the while loop with pps[lcv] still set to NULL,
                    929:                 * then it means that we allocated a new busy/fake/clean page
                    930:                 * ptmp in the object and we need to do I/O to fill in the data.
                    931:                 */
                    932:
                    933:                /* top of "pps" while loop */
                    934:                while (pps[lcv] == NULL) {
                    935:                        /* look for a resident page */
                    936:                        ptmp = uvm_pagelookup(uobj, current_offset);
                    937:
                    938:                        /* not resident?   allocate one now (if we can) */
                    939:                        if (ptmp == NULL) {
                    940:
1.121     riastrad  941:                                ptmp = uao_pagealloc(uobj, current_offset, 0);
1.5       mrg       942:
                    943:                                /* out of RAM? */
                    944:                                if (ptmp == NULL) {
1.135     ad        945:                                        rw_exit(uobj->vmobjlock);
1.5       mrg       946:                                        UVMHIST_LOG(pdhist,
                    947:                                            "sleeping, ptmp == NULL\n",0,0,0,0);
                    948:                                        uvm_wait("uao_getpage");
1.135     ad        949:                                        rw_enter(uobj->vmobjlock, RW_WRITER);
1.41      chs       950:                                        continue;
1.5       mrg       951:                                }
                    952:
1.41      chs       953:                                /*
1.5       mrg       954:                                 * got new page ready for I/O.  break pps while
                    955:                                 * loop.  pps[lcv] is still NULL.
                    956:                                 */
1.46      chs       957:
1.5       mrg       958:                                break;
                    959:                        }
                    960:
                    961:                        /* page is there, see if we need to wait on it */
1.46      chs       962:                        if ((ptmp->flags & PG_BUSY) != 0) {
1.5       mrg       963:                                UVMHIST_LOG(pdhist,
1.136     rin       964:                                    "sleeping, ptmp->flags %#jx\n",
1.5       mrg       965:                                    ptmp->flags,0,0,0);
1.137   ! ad        966:                                uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
1.135     ad        967:                                rw_enter(uobj->vmobjlock, RW_WRITER);
1.46      chs       968:                                continue;
1.5       mrg       969:                        }
1.41      chs       970:
                    971:                        /*
1.5       mrg       972:                         * if we get here then the page has become resident and
                    973:                         * unbusy between steps 1 and 2.  we busy it now (so we
                    974:                         * own it) and set pps[lcv] (so that we exit the while
                    975:                         * loop).
                    976:                         */
1.46      chs       977:
1.134     ad        978:                        KASSERT(uvm_pagegetdirty(ptmp) !=
                    979:                            UVM_PAGE_STATUS_CLEAN);
1.5       mrg       980:                        /* we own it, caller must un-busy */
                    981:                        ptmp->flags |= PG_BUSY;
                    982:                        UVM_PAGE_OWN(ptmp, "uao_get2");
                    983:                        pps[lcv] = ptmp;
                    984:                }
                    985:
                    986:                /*
                    987:                 * if we own the valid page at the correct offset, pps[lcv] will
                    988:                 * point to it.   nothing more to do except go to the next page.
                    989:                 */
1.46      chs       990:
1.5       mrg       991:                if (pps[lcv])
                    992:                        continue;                       /* next lcv */
                    993:
                    994:                /*
1.41      chs       995:                 * we have a "fake/busy/clean" page that we just allocated.
1.5       mrg       996:                 * do the needed "i/o", either reading from swap or zeroing.
                    997:                 */
1.46      chs       998:
1.117     rmind     999:                swslot = uao_find_swslot(uobj, pageidx);
1.5       mrg      1000:
                   1001:                /*
                   1002:                 * just zero the page if there's nothing in swap.
                   1003:                 */
1.46      chs      1004:
                   1005:                if (swslot == 0) {
                   1006:
1.5       mrg      1007:                        /*
                   1008:                         * page hasn't existed before, just zero it.
                   1009:                         */
1.46      chs      1010:
1.5       mrg      1011:                        uvm_pagezero(ptmp);
1.27      chs      1012:                } else {
1.72      yamt     1013: #if defined(VMSWAP)
                   1014:                        int error;
                   1015:
1.126     pgoyette 1016:                        UVMHIST_LOG(pdhist, "pagein from swslot %jd",
1.5       mrg      1017:                             swslot, 0,0,0);
                   1018:
                   1019:                        /*
                   1020:                         * page in the swapped-out page.
                   1021:                         * unlock object for i/o, relock when done.
                   1022:                         */
1.46      chs      1023:
1.135     ad       1024:                        rw_exit(uobj->vmobjlock);
1.46      chs      1025:                        error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1.135     ad       1026:                        rw_enter(uobj->vmobjlock, RW_WRITER);
1.5       mrg      1027:
                   1028:                        /*
                   1029:                         * I/O done.  check for errors.
                   1030:                         */
1.46      chs      1031:
                   1032:                        if (error != 0) {
1.126     pgoyette 1033:                                UVMHIST_LOG(pdhist, "<- done (error=%jd)",
1.46      chs      1034:                                    error,0,0,0);
1.27      chs      1035:
                   1036:                                /*
                   1037:                                 * remove the swap slot from the aobj
                   1038:                                 * and mark the aobj as having no real slot.
                   1039:                                 * don't free the swap slot, thus preventing
                   1040:                                 * it from being used again.
                   1041:                                 */
1.46      chs      1042:
1.118     rmind    1043:                                swslot = uao_set_swslot(uobj, pageidx,
                   1044:                                    SWSLOT_BAD);
1.57      pk       1045:                                if (swslot > 0) {
1.45      chs      1046:                                        uvm_swap_markbad(swslot, 1);
                   1047:                                }
1.27      chs      1048:
1.5       mrg      1049:                                uvm_pagefree(ptmp);
1.135     ad       1050:                                rw_exit(uobj->vmobjlock);
1.46      chs      1051:                                return error;
1.5       mrg      1052:                        }
1.72      yamt     1053: #else /* defined(VMSWAP) */
                   1054:                        panic("%s: pagein", __func__);
                   1055: #endif /* defined(VMSWAP) */
1.5       mrg      1056:                }
                   1057:
1.134     ad       1058:                /*
                   1059:                 * note that we will allow the page being writably-mapped
                   1060:                 * (!PG_RDONLY) regardless of access_type.
                   1061:                 */
                   1062:                uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_UNKNOWN);
1.78      yamt     1063:
1.41      chs      1064:                /*
1.5       mrg      1065:                 * we got the page!   clear the fake flag (indicates valid
                   1066:                 * data now in page) and plug into our result array.   note
1.41      chs      1067:                 * that page is still busy.
1.5       mrg      1068:                 *
                   1069:                 * it is the callers job to:
                   1070:                 * => check if the page is released
                   1071:                 * => unbusy the page
                   1072:                 * => activate the page
                   1073:                 */
1.134     ad       1074:                KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN);
                   1075:                KASSERT((ptmp->flags & PG_FAKE) != 0);
1.46      chs      1076:                ptmp->flags &= ~PG_FAKE;
1.5       mrg      1077:                pps[lcv] = ptmp;
1.46      chs      1078:        }
1.1       mrg      1079:
                   1080:        /*
1.5       mrg      1081:         * finally, unlock object and return.
                   1082:         */
1.1       mrg      1083:
1.76      yamt     1084: done:
1.135     ad       1085:        rw_exit(uobj->vmobjlock);
1.5       mrg      1086:        UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
1.46      chs      1087:        return 0;
1.1       mrg      1088: }
                   1089:
1.72      yamt     1090: #if defined(VMSWAP)
                   1091:
1.1       mrg      1092: /*
1.18      chs      1093:  * uao_dropswap:  release any swap resources from this aobj page.
1.41      chs      1094:  *
1.18      chs      1095:  * => aobj must be locked or have a reference count of 0.
                   1096:  */
                   1097:
                   1098: void
1.67      thorpej  1099: uao_dropswap(struct uvm_object *uobj, int pageidx)
1.18      chs      1100: {
                   1101:        int slot;
                   1102:
                   1103:        slot = uao_set_swslot(uobj, pageidx, 0);
                   1104:        if (slot) {
                   1105:                uvm_swap_free(slot, 1);
                   1106:        }
1.27      chs      1107: }
                   1108:
                   1109: /*
                   1110:  * page in every page in every aobj that is paged-out to a range of swslots.
1.41      chs      1111:  *
1.27      chs      1112:  * => nothing should be locked.
1.87      thorpej  1113:  * => returns true if pagein was aborted due to lack of memory.
1.27      chs      1114:  */
1.46      chs      1115:
1.85      thorpej  1116: bool
1.67      thorpej  1117: uao_swap_off(int startslot, int endslot)
1.27      chs      1118: {
1.118     rmind    1119:        struct uvm_aobj *aobj;
1.27      chs      1120:
                   1121:        /*
1.118     rmind    1122:         * Walk the list of all anonymous UVM objects.  Grab the first.
1.27      chs      1123:         */
1.118     rmind    1124:        mutex_enter(&uao_list_lock);
                   1125:        if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
                   1126:                mutex_exit(&uao_list_lock);
                   1127:                return false;
                   1128:        }
                   1129:        uao_reference(&aobj->u_obj);
1.27      chs      1130:
1.118     rmind    1131:        do {
                   1132:                struct uvm_aobj *nextaobj;
                   1133:                bool rv;
1.27      chs      1134:
                   1135:                /*
1.118     rmind    1136:                 * Prefetch the next object and immediately hold a reference
                   1137:                 * on it, so neither the current nor the next entry could
                   1138:                 * disappear while we are iterating.
1.27      chs      1139:                 */
1.118     rmind    1140:                if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
                   1141:                        uao_reference(&nextaobj->u_obj);
1.27      chs      1142:                }
1.90      ad       1143:                mutex_exit(&uao_list_lock);
1.27      chs      1144:
                   1145:                /*
1.118     rmind    1146:                 * Page in all pages in the swap slot range.
1.27      chs      1147:                 */
1.135     ad       1148:                rw_enter(aobj->u_obj.vmobjlock, RW_WRITER);
1.118     rmind    1149:                rv = uao_pagein(aobj, startslot, endslot);
1.135     ad       1150:                rw_exit(aobj->u_obj.vmobjlock);
1.46      chs      1151:
1.118     rmind    1152:                /* Drop the reference of the current object. */
                   1153:                uao_detach(&aobj->u_obj);
1.27      chs      1154:                if (rv) {
1.118     rmind    1155:                        if (nextaobj) {
                   1156:                                uao_detach(&nextaobj->u_obj);
                   1157:                        }
1.27      chs      1158:                        return rv;
                   1159:                }
                   1160:
1.118     rmind    1161:                aobj = nextaobj;
1.90      ad       1162:                mutex_enter(&uao_list_lock);
1.118     rmind    1163:        } while (aobj);
1.27      chs      1164:
1.90      ad       1165:        mutex_exit(&uao_list_lock);
1.87      thorpej  1166:        return false;
1.27      chs      1167: }
                   1168:
                   1169: /*
                   1170:  * page in any pages from aobj in the given range.
                   1171:  *
                   1172:  * => aobj must be locked and is returned locked.
1.87      thorpej  1173:  * => returns true if pagein was aborted due to lack of memory.
1.27      chs      1174:  */
1.85      thorpej  1175: static bool
1.67      thorpej  1176: uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1.27      chs      1177: {
1.85      thorpej  1178:        bool rv;
1.27      chs      1179:
                   1180:        if (UAO_USES_SWHASH(aobj)) {
                   1181:                struct uao_swhash_elt *elt;
1.65      christos 1182:                int buck;
1.27      chs      1183:
                   1184: restart:
1.65      christos 1185:                for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
                   1186:                        for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
1.27      chs      1187:                             elt != NULL;
                   1188:                             elt = LIST_NEXT(elt, list)) {
                   1189:                                int i;
                   1190:
                   1191:                                for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
                   1192:                                        int slot = elt->slots[i];
                   1193:
                   1194:                                        /*
                   1195:                                         * if the slot isn't in range, skip it.
                   1196:                                         */
1.46      chs      1197:
1.41      chs      1198:                                        if (slot < startslot ||
1.27      chs      1199:                                            slot >= endslot) {
                   1200:                                                continue;
                   1201:                                        }
                   1202:
                   1203:                                        /*
                   1204:                                         * process the page,
                   1205:                                         * the start over on this object
                   1206:                                         * since the swhash elt
                   1207:                                         * may have been freed.
                   1208:                                         */
1.46      chs      1209:
1.27      chs      1210:                                        rv = uao_pagein_page(aobj,
                   1211:                                          UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
                   1212:                                        if (rv) {
                   1213:                                                return rv;
                   1214:                                        }
                   1215:                                        goto restart;
                   1216:                                }
                   1217:                        }
                   1218:                }
                   1219:        } else {
                   1220:                int i;
                   1221:
                   1222:                for (i = 0; i < aobj->u_pages; i++) {
                   1223:                        int slot = aobj->u_swslots[i];
                   1224:
                   1225:                        /*
                   1226:                         * if the slot isn't in range, skip it
                   1227:                         */
1.46      chs      1228:
1.27      chs      1229:                        if (slot < startslot || slot >= endslot) {
                   1230:                                continue;
                   1231:                        }
                   1232:
                   1233:                        /*
                   1234:                         * process the page.
                   1235:                         */
1.46      chs      1236:
1.27      chs      1237:                        rv = uao_pagein_page(aobj, i);
                   1238:                        if (rv) {
                   1239:                                return rv;
                   1240:                        }
                   1241:                }
                   1242:        }
                   1243:
1.87      thorpej  1244:        return false;
1.27      chs      1245: }
                   1246:
                   1247: /*
1.117     rmind    1248:  * uao_pagein_page: page in a single page from an anonymous UVM object.
1.27      chs      1249:  *
1.117     rmind    1250:  * => Returns true if pagein was aborted due to lack of memory.
                   1251:  * => Object must be locked and is returned locked.
1.27      chs      1252:  */
1.46      chs      1253:
1.85      thorpej  1254: static bool
1.67      thorpej  1255: uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1.27      chs      1256: {
1.117     rmind    1257:        struct uvm_object *uobj = &aobj->u_obj;
1.27      chs      1258:        struct vm_page *pg;
1.57      pk       1259:        int rv, npages;
1.27      chs      1260:
                   1261:        pg = NULL;
                   1262:        npages = 1;
1.117     rmind    1263:
1.135     ad       1264:        KASSERT(rw_write_held(uobj->vmobjlock));
1.128     msaitoh  1265:        rv = uao_get(uobj, (voff_t)pageidx << PAGE_SHIFT, &pg, &npages,
1.117     rmind    1266:            0, VM_PROT_READ | VM_PROT_WRITE, 0, PGO_SYNCIO);
1.27      chs      1267:
                   1268:        /*
                   1269:         * relock and finish up.
                   1270:         */
1.46      chs      1271:
1.135     ad       1272:        rw_enter(uobj->vmobjlock, RW_WRITER);
1.27      chs      1273:        switch (rv) {
1.40      chs      1274:        case 0:
1.27      chs      1275:                break;
                   1276:
1.40      chs      1277:        case EIO:
                   1278:        case ERESTART:
1.46      chs      1279:
1.27      chs      1280:                /*
                   1281:                 * nothing more to do on errors.
1.40      chs      1282:                 * ERESTART can only mean that the anon was freed,
1.27      chs      1283:                 * so again there's nothing to do.
                   1284:                 */
1.46      chs      1285:
1.87      thorpej  1286:                return false;
1.59      pk       1287:
                   1288:        default:
1.87      thorpej  1289:                return true;
1.27      chs      1290:        }
                   1291:
                   1292:        /*
                   1293:         * ok, we've got the page now.
                   1294:         * mark it as dirty, clear its swslot and un-busy it.
                   1295:         */
1.57      pk       1296:        uao_dropswap(&aobj->u_obj, pageidx);
1.27      chs      1297:
                   1298:        /*
1.80      yamt     1299:         * make sure it's on a page queue.
1.27      chs      1300:         */
1.133     ad       1301:        uvm_pagelock(pg);
1.131     ad       1302:        uvm_pageenqueue(pg);
1.137   ! ad       1303:        uvm_pageunbusy(pg);
1.133     ad       1304:        uvm_pageunlock(pg);
1.56      yamt     1305:
1.137   ! ad       1306:        pg->flags &= ~(PG_FAKE);
1.134     ad       1307:        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
1.56      yamt     1308:
1.87      thorpej  1309:        return false;
1.1       mrg      1310: }
1.72      yamt     1311:
1.75      yamt     1312: /*
                   1313:  * uao_dropswap_range: drop swapslots in the range.
                   1314:  *
                   1315:  * => aobj must be locked and is returned locked.
                   1316:  * => start is inclusive.  end is exclusive.
                   1317:  */
                   1318:
                   1319: void
                   1320: uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
                   1321: {
                   1322:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.117     rmind    1323:        int swpgonlydelta = 0;
1.75      yamt     1324:
1.135     ad       1325:        KASSERT(rw_write_held(uobj->vmobjlock));
1.75      yamt     1326:
                   1327:        if (end == 0) {
                   1328:                end = INT64_MAX;
                   1329:        }
                   1330:
                   1331:        if (UAO_USES_SWHASH(aobj)) {
                   1332:                int i, hashbuckets = aobj->u_swhashmask + 1;
                   1333:                voff_t taghi;
                   1334:                voff_t taglo;
                   1335:
                   1336:                taglo = UAO_SWHASH_ELT_TAG(start);
                   1337:                taghi = UAO_SWHASH_ELT_TAG(end);
                   1338:
                   1339:                for (i = 0; i < hashbuckets; i++) {
                   1340:                        struct uao_swhash_elt *elt, *next;
                   1341:
                   1342:                        for (elt = LIST_FIRST(&aobj->u_swhash[i]);
                   1343:                             elt != NULL;
                   1344:                             elt = next) {
                   1345:                                int startidx, endidx;
                   1346:                                int j;
                   1347:
                   1348:                                next = LIST_NEXT(elt, list);
                   1349:
                   1350:                                if (elt->tag < taglo || taghi < elt->tag) {
                   1351:                                        continue;
                   1352:                                }
                   1353:
                   1354:                                if (elt->tag == taglo) {
                   1355:                                        startidx =
                   1356:                                            UAO_SWHASH_ELT_PAGESLOT_IDX(start);
                   1357:                                } else {
                   1358:                                        startidx = 0;
                   1359:                                }
                   1360:
                   1361:                                if (elt->tag == taghi) {
                   1362:                                        endidx =
                   1363:                                            UAO_SWHASH_ELT_PAGESLOT_IDX(end);
                   1364:                                } else {
                   1365:                                        endidx = UAO_SWHASH_CLUSTER_SIZE;
                   1366:                                }
                   1367:
                   1368:                                for (j = startidx; j < endidx; j++) {
                   1369:                                        int slot = elt->slots[j];
                   1370:
                   1371:                                        KASSERT(uvm_pagelookup(&aobj->u_obj,
                   1372:                                            (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
                   1373:                                            + j) << PAGE_SHIFT) == NULL);
                   1374:                                        if (slot > 0) {
                   1375:                                                uvm_swap_free(slot, 1);
                   1376:                                                swpgonlydelta++;
                   1377:                                                KASSERT(elt->count > 0);
                   1378:                                                elt->slots[j] = 0;
                   1379:                                                elt->count--;
                   1380:                                        }
                   1381:                                }
                   1382:
                   1383:                                if (elt->count == 0) {
                   1384:                                        LIST_REMOVE(elt, list);
                   1385:                                        pool_put(&uao_swhash_elt_pool, elt);
                   1386:                                }
                   1387:                        }
                   1388:                }
                   1389:        } else {
                   1390:                int i;
                   1391:
                   1392:                if (aobj->u_pages < end) {
                   1393:                        end = aobj->u_pages;
                   1394:                }
                   1395:                for (i = start; i < end; i++) {
                   1396:                        int slot = aobj->u_swslots[i];
                   1397:
                   1398:                        if (slot > 0) {
                   1399:                                uvm_swap_free(slot, 1);
                   1400:                                swpgonlydelta++;
                   1401:                        }
                   1402:                }
                   1403:        }
                   1404:
                   1405:        /*
                   1406:         * adjust the counter of pages only in swap for all
                   1407:         * the swap slots we've freed.
                   1408:         */
                   1409:
                   1410:        if (swpgonlydelta > 0) {
                   1411:                KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1.129     ad       1412:                atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
1.75      yamt     1413:        }
                   1414: }
                   1415:
1.72      yamt     1416: #endif /* defined(VMSWAP) */

CVSweb <webmaster@jp.NetBSD.org>