[BACK]Return to uvm_aobj.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / uvm

Annotation of src/sys/uvm/uvm_aobj.c, Revision 1.68.2.3

1.68.2.3! yamt        1: /*     $NetBSD: uvm_aobj.c,v 1.68.2.2 2006/12/30 20:51:05 yamt Exp $   */
1.6       mrg         2:
1.7       chs         3: /*
                      4:  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
                      5:  *                    Washington University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  * 3. All advertising materials mentioning features or use of this software
                     17:  *    must display the following acknowledgement:
                     18:  *      This product includes software developed by Charles D. Cranor and
                     19:  *      Washington University.
                     20:  * 4. The name of the author may not be used to endorse or promote products
                     21:  *    derived from this software without specific prior written permission.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     24:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     25:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     26:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     27:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     28:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     29:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     30:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     31:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     32:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     33:  *
1.4       mrg        34:  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
                     35:  */
1.7       chs        36: /*
                     37:  * uvm_aobj.c: anonymous memory uvm_object pager
                     38:  *
                     39:  * author: Chuck Silvers <chuq@chuq.com>
                     40:  * started: Jan-1998
                     41:  *
                     42:  * - design mostly from Chuck Cranor
                     43:  */
1.49      lukem      44:
                     45: #include <sys/cdefs.h>
1.68.2.3! yamt       46: __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.68.2.2 2006/12/30 20:51:05 yamt Exp $");
1.7       chs        47:
                     48: #include "opt_uvmhist.h"
1.1       mrg        49:
                     50: #include <sys/param.h>
                     51: #include <sys/systm.h>
                     52: #include <sys/proc.h>
                     53: #include <sys/malloc.h>
1.37      chs        54: #include <sys/kernel.h>
1.12      thorpej    55: #include <sys/pool.h>
1.1       mrg        56:
                     57: #include <uvm/uvm.h>
                     58:
                     59: /*
                     60:  * an aobj manages anonymous-memory backed uvm_objects.   in addition
                     61:  * to keeping the list of resident pages, it also keeps a list of
                     62:  * allocated swap blocks.  depending on the size of the aobj this list
                     63:  * of allocated swap blocks is either stored in an array (small objects)
                     64:  * or in a hash table (large objects).
                     65:  */
                     66:
                     67: /*
                     68:  * local structures
                     69:  */
                     70:
                     71: /*
                     72:  * for hash tables, we break the address space of the aobj into blocks
                     73:  * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
                     74:  * be a power of two.
                     75:  */
                     76:
                     77: #define UAO_SWHASH_CLUSTER_SHIFT 4
                     78: #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
                     79:
                     80: /* get the "tag" for this page index */
                     81: #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
                     82:        ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
                     83:
1.68.2.1  yamt       84: #define UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX) \
                     85:        ((PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1))
                     86:
1.1       mrg        87: /* given an ELT and a page index, find the swap slot */
                     88: #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
1.68.2.1  yamt       89:        ((ELT)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX)])
1.1       mrg        90:
                     91: /* given an ELT, return its pageidx base */
                     92: #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
                     93:        ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
                     94:
                     95: /*
                     96:  * the swhash hash function
                     97:  */
1.46      chs        98:
1.1       mrg        99: #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
                    100:        (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
                    101:                            & (AOBJ)->u_swhashmask)])
                    102:
                    103: /*
                    104:  * the swhash threshhold determines if we will use an array or a
                    105:  * hash table to store the list of allocated swap blocks.
                    106:  */
                    107:
                    108: #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
                    109: #define UAO_USES_SWHASH(AOBJ) \
                    110:        ((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)        /* use hash? */
                    111:
                    112: /*
1.3       chs       113:  * the number of buckets in a swhash, with an upper bound
1.1       mrg       114:  */
1.46      chs       115:
1.1       mrg       116: #define UAO_SWHASH_MAXBUCKETS 256
                    117: #define UAO_SWHASH_BUCKETS(AOBJ) \
1.46      chs       118:        (MIN((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
1.1       mrg       119:             UAO_SWHASH_MAXBUCKETS))
                    120:
                    121:
                    122: /*
                    123:  * uao_swhash_elt: when a hash table is being used, this structure defines
                    124:  * the format of an entry in the bucket list.
                    125:  */
                    126:
                    127: struct uao_swhash_elt {
1.5       mrg       128:        LIST_ENTRY(uao_swhash_elt) list;        /* the hash list */
1.28      kleink    129:        voff_t tag;                             /* our 'tag' */
1.5       mrg       130:        int count;                              /* our number of active slots */
                    131:        int slots[UAO_SWHASH_CLUSTER_SIZE];     /* the slots */
1.1       mrg       132: };
                    133:
                    134: /*
                    135:  * uao_swhash: the swap hash table structure
                    136:  */
                    137:
                    138: LIST_HEAD(uao_swhash, uao_swhash_elt);
                    139:
1.12      thorpej   140: /*
                    141:  * uao_swhash_elt_pool: pool of uao_swhash_elt structures
1.64      simonb    142:  * NOTE: Pages for this pool must not come from a pageable kernel map!
1.12      thorpej   143:  */
1.64      simonb    144: POOL_INIT(uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 0, 0,
                    145:     "uaoeltpl", NULL);
1.1       mrg       146:
                    147: /*
                    148:  * uvm_aobj: the actual anon-backed uvm_object
                    149:  *
                    150:  * => the uvm_object is at the top of the structure, this allows
1.46      chs       151:  *   (struct uvm_aobj *) == (struct uvm_object *)
1.1       mrg       152:  * => only one of u_swslots and u_swhash is used in any given aobj
                    153:  */
                    154:
                    155: struct uvm_aobj {
1.5       mrg       156:        struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
1.68.2.2  yamt      157:        pgoff_t u_pages;         /* number of pages in entire object */
1.5       mrg       158:        int u_flags;             /* the flags (see uvm_aobj.h) */
                    159:        int *u_swslots;          /* array of offset->swapslot mappings */
                    160:                                 /*
                    161:                                  * hashtable of offset->swapslot mappings
                    162:                                  * (u_swhash is an array of bucket heads)
                    163:                                  */
                    164:        struct uao_swhash *u_swhash;
                    165:        u_long u_swhashmask;            /* mask for hashtable */
                    166:        LIST_ENTRY(uvm_aobj) u_list;    /* global list of aobjs */
1.1       mrg       167: };
                    168:
                    169: /*
1.12      thorpej   170:  * uvm_aobj_pool: pool of uvm_aobj structures
                    171:  */
1.64      simonb    172: POOL_INIT(uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 0, 0, "aobjpl",
                    173:     &pool_allocator_nointr);
1.54      thorpej   174:
                    175: MALLOC_DEFINE(M_UVMAOBJ, "UVM aobj", "UVM aobj and related structures");
1.12      thorpej   176:
                    177: /*
1.1       mrg       178:  * local functions
                    179:  */
                    180:
1.62      junyoung  181: static void    uao_free(struct uvm_aobj *);
                    182: static int     uao_get(struct uvm_object *, voff_t, struct vm_page **,
                    183:                    int *, int, vm_prot_t, int, int);
1.68.2.3! yamt      184: static int     uao_put(struct uvm_object *, voff_t, voff_t, int);
1.68.2.1  yamt      185:
                    186: #if defined(VMSWAP)
                    187: static struct uao_swhash_elt *uao_find_swhash_elt
1.68.2.3! yamt      188:     (struct uvm_aobj *, int, bool);
1.68.2.1  yamt      189:
1.68.2.3! yamt      190: static bool uao_pagein(struct uvm_aobj *, int, int);
        !           191: static bool uao_pagein_page(struct uvm_aobj *, int);
1.68.2.1  yamt      192: static void uao_dropswap_range1(struct uvm_aobj *, voff_t, voff_t);
                    193: #endif /* defined(VMSWAP) */
1.1       mrg       194:
                    195: /*
                    196:  * aobj_pager
1.41      chs       197:  *
1.1       mrg       198:  * note that some functions (e.g. put) are handled elsewhere
                    199:  */
                    200:
                    201: struct uvm_pagerops aobj_pager = {
1.27      chs       202:        NULL,                   /* init */
1.5       mrg       203:        uao_reference,          /* reference */
                    204:        uao_detach,             /* detach */
                    205:        NULL,                   /* fault */
                    206:        uao_get,                /* get */
1.46      chs       207:        uao_put,                /* flush */
1.1       mrg       208: };
                    209:
                    210: /*
                    211:  * uao_list: global list of active aobjs, locked by uao_list_lock
                    212:  */
                    213:
                    214: static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
1.42      chs       215: static struct simplelock uao_list_lock;
1.1       mrg       216:
                    217: /*
                    218:  * functions
                    219:  */
                    220:
                    221: /*
                    222:  * hash table/array related functions
                    223:  */
                    224:
1.68.2.1  yamt      225: #if defined(VMSWAP)
                    226:
1.1       mrg       227: /*
                    228:  * uao_find_swhash_elt: find (or create) a hash table entry for a page
                    229:  * offset.
                    230:  *
                    231:  * => the object should be locked by the caller
                    232:  */
                    233:
1.5       mrg       234: static struct uao_swhash_elt *
1.68.2.3! yamt      235: uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
1.5       mrg       236: {
                    237:        struct uao_swhash *swhash;
                    238:        struct uao_swhash_elt *elt;
1.28      kleink    239:        voff_t page_tag;
1.1       mrg       240:
1.45      chs       241:        swhash = UAO_SWHASH_HASH(aobj, pageidx);
                    242:        page_tag = UAO_SWHASH_ELT_TAG(pageidx);
1.1       mrg       243:
1.5       mrg       244:        /*
                    245:         * now search the bucket for the requested tag
                    246:         */
1.45      chs       247:
1.37      chs       248:        LIST_FOREACH(elt, swhash, list) {
1.45      chs       249:                if (elt->tag == page_tag) {
                    250:                        return elt;
                    251:                }
1.5       mrg       252:        }
1.45      chs       253:        if (!create) {
1.5       mrg       254:                return NULL;
1.45      chs       255:        }
1.5       mrg       256:
                    257:        /*
1.12      thorpej   258:         * allocate a new entry for the bucket and init/insert it in
1.5       mrg       259:         */
1.45      chs       260:
                    261:        elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
                    262:        if (elt == NULL) {
                    263:                return NULL;
                    264:        }
1.5       mrg       265:        LIST_INSERT_HEAD(swhash, elt, list);
                    266:        elt->tag = page_tag;
                    267:        elt->count = 0;
1.9       perry     268:        memset(elt->slots, 0, sizeof(elt->slots));
1.45      chs       269:        return elt;
1.1       mrg       270: }
                    271:
                    272: /*
                    273:  * uao_find_swslot: find the swap slot number for an aobj/pageidx
                    274:  *
1.41      chs       275:  * => object must be locked by caller
1.1       mrg       276:  */
1.46      chs       277:
                    278: int
1.67      thorpej   279: uao_find_swslot(struct uvm_object *uobj, int pageidx)
1.1       mrg       280: {
1.46      chs       281:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
                    282:        struct uao_swhash_elt *elt;
1.1       mrg       283:
1.5       mrg       284:        /*
                    285:         * if noswap flag is set, then we never return a slot
                    286:         */
1.1       mrg       287:
1.5       mrg       288:        if (aobj->u_flags & UAO_FLAG_NOSWAP)
                    289:                return(0);
1.1       mrg       290:
1.5       mrg       291:        /*
                    292:         * if hashing, look in hash table.
                    293:         */
1.1       mrg       294:
1.5       mrg       295:        if (UAO_USES_SWHASH(aobj)) {
1.68.2.3! yamt      296:                elt = uao_find_swhash_elt(aobj, pageidx, false);
1.5       mrg       297:                if (elt)
                    298:                        return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
                    299:                else
1.31      thorpej   300:                        return(0);
1.5       mrg       301:        }
1.1       mrg       302:
1.41      chs       303:        /*
1.5       mrg       304:         * otherwise, look in the array
                    305:         */
1.46      chs       306:
1.5       mrg       307:        return(aobj->u_swslots[pageidx]);
1.1       mrg       308: }
                    309:
                    310: /*
                    311:  * uao_set_swslot: set the swap slot for a page in an aobj.
                    312:  *
                    313:  * => setting a slot to zero frees the slot
                    314:  * => object must be locked by caller
1.45      chs       315:  * => we return the old slot number, or -1 if we failed to allocate
                    316:  *    memory to record the new slot number
1.1       mrg       317:  */
1.46      chs       318:
1.5       mrg       319: int
1.67      thorpej   320: uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
1.5       mrg       321: {
                    322:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.45      chs       323:        struct uao_swhash_elt *elt;
1.5       mrg       324:        int oldslot;
                    325:        UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
                    326:        UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
                    327:            aobj, pageidx, slot, 0);
1.1       mrg       328:
1.5       mrg       329:        /*
1.46      chs       330:         * if noswap flag is set, then we can't set a non-zero slot.
1.5       mrg       331:         */
1.1       mrg       332:
1.5       mrg       333:        if (aobj->u_flags & UAO_FLAG_NOSWAP) {
                    334:                if (slot == 0)
1.46      chs       335:                        return(0);
1.1       mrg       336:
1.5       mrg       337:                printf("uao_set_swslot: uobj = %p\n", uobj);
1.46      chs       338:                panic("uao_set_swslot: NOSWAP object");
1.5       mrg       339:        }
1.1       mrg       340:
1.5       mrg       341:        /*
                    342:         * are we using a hash table?  if so, add it in the hash.
                    343:         */
1.1       mrg       344:
1.5       mrg       345:        if (UAO_USES_SWHASH(aobj)) {
1.39      chs       346:
1.12      thorpej   347:                /*
                    348:                 * Avoid allocating an entry just to free it again if
                    349:                 * the page had not swap slot in the first place, and
                    350:                 * we are freeing.
                    351:                 */
1.39      chs       352:
1.46      chs       353:                elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
1.12      thorpej   354:                if (elt == NULL) {
1.45      chs       355:                        return slot ? -1 : 0;
1.12      thorpej   356:                }
1.5       mrg       357:
                    358:                oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
                    359:                UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
                    360:
                    361:                /*
                    362:                 * now adjust the elt's reference counter and free it if we've
                    363:                 * dropped it to zero.
                    364:                 */
                    365:
                    366:                if (slot) {
                    367:                        if (oldslot == 0)
                    368:                                elt->count++;
1.45      chs       369:                } else {
                    370:                        if (oldslot)
1.5       mrg       371:                                elt->count--;
                    372:
                    373:                        if (elt->count == 0) {
                    374:                                LIST_REMOVE(elt, list);
1.12      thorpej   375:                                pool_put(&uao_swhash_elt_pool, elt);
1.5       mrg       376:                        }
                    377:                }
1.41      chs       378:        } else {
1.5       mrg       379:                /* we are using an array */
                    380:                oldslot = aobj->u_swslots[pageidx];
                    381:                aobj->u_swslots[pageidx] = slot;
                    382:        }
                    383:        return (oldslot);
1.1       mrg       384: }
                    385:
1.68.2.1  yamt      386: #endif /* defined(VMSWAP) */
                    387:
1.1       mrg       388: /*
                    389:  * end of hash/array functions
                    390:  */
                    391:
                    392: /*
                    393:  * uao_free: free all resources held by an aobj, and then free the aobj
                    394:  *
                    395:  * => the aobj should be dead
                    396:  */
1.46      chs       397:
1.1       mrg       398: static void
1.67      thorpej   399: uao_free(struct uvm_aobj *aobj)
1.1       mrg       400: {
1.46      chs       401:        int swpgonlydelta = 0;
1.1       mrg       402:
1.27      chs       403:        simple_unlock(&aobj->u_obj.vmobjlock);
1.68.2.1  yamt      404:
                    405: #if defined(VMSWAP)
                    406:        uao_dropswap_range1(aobj, 0, 0);
                    407:
1.5       mrg       408:        if (UAO_USES_SWHASH(aobj)) {
1.1       mrg       409:
1.5       mrg       410:                /*
1.68.2.1  yamt      411:                 * free the hash table itself.
1.5       mrg       412:                 */
1.46      chs       413:
1.34      thorpej   414:                free(aobj->u_swhash, M_UVMAOBJ);
1.5       mrg       415:        } else {
                    416:
                    417:                /*
1.68.2.1  yamt      418:                 * free the array itsself.
1.5       mrg       419:                 */
                    420:
1.34      thorpej   421:                free(aobj->u_swslots, M_UVMAOBJ);
1.1       mrg       422:        }
1.68.2.1  yamt      423: #endif /* defined(VMSWAP) */
1.1       mrg       424:
1.5       mrg       425:        /*
                    426:         * finally free the aobj itself
                    427:         */
1.46      chs       428:
1.12      thorpej   429:        pool_put(&uvm_aobj_pool, aobj);
1.46      chs       430:
                    431:        /*
                    432:         * adjust the counter of pages only in swap for all
                    433:         * the swap slots we've freed.
                    434:         */
                    435:
1.48      chs       436:        if (swpgonlydelta > 0) {
                    437:                simple_lock(&uvm.swap_data_lock);
                    438:                KASSERT(uvmexp.swpgonly >= swpgonlydelta);
                    439:                uvmexp.swpgonly -= swpgonlydelta;
                    440:                simple_unlock(&uvm.swap_data_lock);
                    441:        }
1.1       mrg       442: }
                    443:
                    444: /*
                    445:  * pager functions
                    446:  */
                    447:
                    448: /*
                    449:  * uao_create: create an aobj of the given size and return its uvm_object.
                    450:  *
                    451:  * => for normal use, flags are always zero
                    452:  * => for the kernel object, the flags are:
                    453:  *     UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
                    454:  *     UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
                    455:  */
1.46      chs       456:
1.5       mrg       457: struct uvm_object *
1.67      thorpej   458: uao_create(vsize_t size, int flags)
1.5       mrg       459: {
1.46      chs       460:        static struct uvm_aobj kernel_object_store;
                    461:        static int kobj_alloced = 0;
1.68.2.2  yamt      462:        pgoff_t pages = round_page(size) >> PAGE_SHIFT;
1.5       mrg       463:        struct uvm_aobj *aobj;
1.66      yamt      464:        int refs;
1.1       mrg       465:
1.5       mrg       466:        /*
1.27      chs       467:         * malloc a new aobj unless we are asked for the kernel object
                    468:         */
1.5       mrg       469:
1.46      chs       470:        if (flags & UAO_FLAG_KERNOBJ) {
                    471:                KASSERT(!kobj_alloced);
1.5       mrg       472:                aobj = &kernel_object_store;
                    473:                aobj->u_pages = pages;
1.46      chs       474:                aobj->u_flags = UAO_FLAG_NOSWAP;
1.66      yamt      475:                refs = UVM_OBJ_KERN;
1.5       mrg       476:                kobj_alloced = UAO_FLAG_KERNOBJ;
                    477:        } else if (flags & UAO_FLAG_KERNSWAP) {
1.46      chs       478:                KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
1.5       mrg       479:                aobj = &kernel_object_store;
                    480:                kobj_alloced = UAO_FLAG_KERNSWAP;
1.66      yamt      481:                refs = 0xdeadbeaf; /* XXX: gcc */
1.46      chs       482:        } else {
1.12      thorpej   483:                aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
1.5       mrg       484:                aobj->u_pages = pages;
1.46      chs       485:                aobj->u_flags = 0;
1.66      yamt      486:                refs = 1;
1.5       mrg       487:        }
1.1       mrg       488:
1.5       mrg       489:        /*
                    490:         * allocate hash/array if necessary
                    491:         *
                    492:         * note: in the KERNSWAP case no need to worry about locking since
                    493:         * we are still booting we should be the only thread around.
                    494:         */
1.46      chs       495:
1.5       mrg       496:        if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
1.68.2.1  yamt      497: #if defined(VMSWAP)
1.5       mrg       498:                int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ?
                    499:                    M_NOWAIT : M_WAITOK;
                    500:
                    501:                /* allocate hash table or array depending on object size */
1.27      chs       502:                if (UAO_USES_SWHASH(aobj)) {
1.5       mrg       503:                        aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
1.35      ad        504:                            HASH_LIST, M_UVMAOBJ, mflags, &aobj->u_swhashmask);
1.5       mrg       505:                        if (aobj->u_swhash == NULL)
                    506:                                panic("uao_create: hashinit swhash failed");
                    507:                } else {
1.34      thorpej   508:                        aobj->u_swslots = malloc(pages * sizeof(int),
1.5       mrg       509:                            M_UVMAOBJ, mflags);
                    510:                        if (aobj->u_swslots == NULL)
                    511:                                panic("uao_create: malloc swslots failed");
1.9       perry     512:                        memset(aobj->u_swslots, 0, pages * sizeof(int));
1.5       mrg       513:                }
1.68.2.1  yamt      514: #endif /* defined(VMSWAP) */
1.5       mrg       515:
                    516:                if (flags) {
                    517:                        aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
                    518:                        return(&aobj->u_obj);
                    519:                }
                    520:        }
                    521:
                    522:        /*
                    523:         * init aobj fields
                    524:         */
1.46      chs       525:
1.66      yamt      526:        UVM_OBJ_INIT(&aobj->u_obj, &aobj_pager, refs);
1.1       mrg       527:
1.5       mrg       528:        /*
                    529:         * now that aobj is ready, add it to the global list
                    530:         */
1.46      chs       531:
1.5       mrg       532:        simple_lock(&uao_list_lock);
                    533:        LIST_INSERT_HEAD(&uao_list, aobj, u_list);
                    534:        simple_unlock(&uao_list_lock);
                    535:        return(&aobj->u_obj);
1.1       mrg       536: }
                    537:
                    538:
                    539:
                    540: /*
                    541:  * uao_init: set up aobj pager subsystem
                    542:  *
                    543:  * => called at boot time from uvm_pager_init()
                    544:  */
1.46      chs       545:
1.27      chs       546: void
1.46      chs       547: uao_init(void)
1.5       mrg       548: {
1.12      thorpej   549:        static int uao_initialized;
                    550:
                    551:        if (uao_initialized)
                    552:                return;
1.68.2.3! yamt      553:        uao_initialized = true;
1.5       mrg       554:        LIST_INIT(&uao_list);
                    555:        simple_lock_init(&uao_list_lock);
1.1       mrg       556: }
                    557:
                    558: /*
                    559:  * uao_reference: add a ref to an aobj
                    560:  *
1.27      chs       561:  * => aobj must be unlocked
                    562:  * => just lock it and call the locked version
1.1       mrg       563:  */
1.46      chs       564:
1.5       mrg       565: void
1.67      thorpej   566: uao_reference(struct uvm_object *uobj)
1.1       mrg       567: {
1.27      chs       568:        simple_lock(&uobj->vmobjlock);
                    569:        uao_reference_locked(uobj);
                    570:        simple_unlock(&uobj->vmobjlock);
                    571: }
                    572:
                    573: /*
                    574:  * uao_reference_locked: add a ref to an aobj that is already locked
                    575:  *
                    576:  * => aobj must be locked
                    577:  * this needs to be separate from the normal routine
                    578:  * since sometimes we need to add a reference to an aobj when
                    579:  * it's already locked.
                    580:  */
1.46      chs       581:
1.27      chs       582: void
1.67      thorpej   583: uao_reference_locked(struct uvm_object *uobj)
1.27      chs       584: {
1.5       mrg       585:        UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
1.1       mrg       586:
1.5       mrg       587:        /*
                    588:         * kernel_object already has plenty of references, leave it alone.
                    589:         */
1.1       mrg       590:
1.20      thorpej   591:        if (UVM_OBJ_IS_KERN_OBJECT(uobj))
1.5       mrg       592:                return;
1.1       mrg       593:
1.46      chs       594:        uobj->uo_refs++;
1.41      chs       595:        UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
1.27      chs       596:                    uobj, uobj->uo_refs,0,0);
1.1       mrg       597: }
                    598:
                    599: /*
                    600:  * uao_detach: drop a reference to an aobj
                    601:  *
1.27      chs       602:  * => aobj must be unlocked
                    603:  * => just lock it and call the locked version
1.1       mrg       604:  */
1.46      chs       605:
1.5       mrg       606: void
1.67      thorpej   607: uao_detach(struct uvm_object *uobj)
1.5       mrg       608: {
1.27      chs       609:        simple_lock(&uobj->vmobjlock);
                    610:        uao_detach_locked(uobj);
                    611: }
                    612:
                    613: /*
                    614:  * uao_detach_locked: drop a reference to an aobj
                    615:  *
                    616:  * => aobj must be locked, and is unlocked (or freed) upon return.
                    617:  * this needs to be separate from the normal routine
                    618:  * since sometimes we need to detach from an aobj when
                    619:  * it's already locked.
                    620:  */
1.46      chs       621:
1.27      chs       622: void
1.67      thorpej   623: uao_detach_locked(struct uvm_object *uobj)
1.27      chs       624: {
1.5       mrg       625:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.46      chs       626:        struct vm_page *pg;
1.5       mrg       627:        UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
1.1       mrg       628:
1.5       mrg       629:        /*
                    630:         * detaching from kernel_object is a noop.
                    631:         */
1.46      chs       632:
1.27      chs       633:        if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
                    634:                simple_unlock(&uobj->vmobjlock);
1.5       mrg       635:                return;
1.27      chs       636:        }
1.5       mrg       637:
                    638:        UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
1.46      chs       639:        uobj->uo_refs--;
                    640:        if (uobj->uo_refs) {
1.5       mrg       641:                simple_unlock(&uobj->vmobjlock);
                    642:                UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
                    643:                return;
                    644:        }
                    645:
                    646:        /*
                    647:         * remove the aobj from the global list.
                    648:         */
1.46      chs       649:
1.5       mrg       650:        simple_lock(&uao_list_lock);
                    651:        LIST_REMOVE(aobj, u_list);
                    652:        simple_unlock(&uao_list_lock);
                    653:
                    654:        /*
1.46      chs       655:         * free all the pages left in the aobj.  for each page,
                    656:         * when the page is no longer busy (and thus after any disk i/o that
                    657:         * it's involved in is complete), release any swap resources and
                    658:         * free the page itself.
1.5       mrg       659:         */
1.46      chs       660:
                    661:        uvm_lock_pageq();
                    662:        while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
                    663:                pmap_page_protect(pg, VM_PROT_NONE);
1.5       mrg       664:                if (pg->flags & PG_BUSY) {
1.46      chs       665:                        pg->flags |= PG_WANTED;
                    666:                        uvm_unlock_pageq();
1.68.2.3! yamt      667:                        UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, false,
1.46      chs       668:                            "uao_det", 0);
                    669:                        simple_lock(&uobj->vmobjlock);
                    670:                        uvm_lock_pageq();
1.5       mrg       671:                        continue;
                    672:                }
1.18      chs       673:                uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
1.5       mrg       674:                uvm_pagefree(pg);
                    675:        }
1.46      chs       676:        uvm_unlock_pageq();
1.1       mrg       677:
1.5       mrg       678:        /*
1.46      chs       679:         * finally, free the aobj itself.
1.5       mrg       680:         */
1.1       mrg       681:
1.5       mrg       682:        uao_free(aobj);
                    683: }
1.1       mrg       684:
                    685: /*
1.46      chs       686:  * uao_put: flush pages out of a uvm object
1.22      thorpej   687:  *
                    688:  * => object should be locked by caller.  we may _unlock_ the object
                    689:  *     if (and only if) we need to clean a page (PGO_CLEANIT).
                    690:  *     XXXJRT Currently, however, we don't.  In the case of cleaning
                    691:  *     XXXJRT a page, we simply just deactivate it.  Should probably
                    692:  *     XXXJRT handle this better, in the future (although "flushing"
                    693:  *     XXXJRT anonymous memory isn't terribly important).
                    694:  * => if PGO_CLEANIT is not set, then we will neither unlock the object
                    695:  *     or block.
                    696:  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
                    697:  *     for flushing.
                    698:  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
                    699:  *     that new pages are inserted on the tail end of the list.  thus,
                    700:  *     we can make a complete pass through the object in one go by starting
                    701:  *     at the head and working towards the tail (new pages are put in
                    702:  *     front of us).
                    703:  * => NOTE: we are allowed to lock the page queues, so the caller
                    704:  *     must not be holding the lock on them [e.g. pagedaemon had
                    705:  *     better not call us with the queues locked]
1.68.2.3! yamt      706:  * => we return 0 unless we encountered some sort of I/O error
1.22      thorpej   707:  *     XXXJRT currently never happens, as we never directly initiate
                    708:  *     XXXJRT I/O
                    709:  *
                    710:  * note on page traversal:
                    711:  *     we can traverse the pages in an object either by going down the
                    712:  *     linked list in "uobj->memq", or we can go over the address range
                    713:  *     by page doing hash table lookups for each address.  depending
                    714:  *     on how many pages are in the object it may be cheaper to do one
                    715:  *     or the other.  we set "by_list" to true if we are using memq.
                    716:  *     if the cost of a hash lookup was equal to the cost of the list
                    717:  *     traversal we could compare the number of pages in the start->stop
                    718:  *     range to the total number of pages in the object.  however, it
                    719:  *     seems that a hash table lookup is more expensive than the linked
                    720:  *     list traversal, so we multiply the number of pages in the
                    721:  *     start->stop range by a penalty which we define below.
1.1       mrg       722:  */
1.22      thorpej   723:
1.68      thorpej   724: static int
1.67      thorpej   725: uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1.5       mrg       726: {
1.46      chs       727:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.51      enami     728:        struct vm_page *pg, *nextpg, curmp, endmp;
1.68.2.3! yamt      729:        bool by_list;
1.28      kleink    730:        voff_t curoff;
1.46      chs       731:        UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
1.22      thorpej   732:
1.46      chs       733:        curoff = 0;
1.22      thorpej   734:        if (flags & PGO_ALLPAGES) {
                    735:                start = 0;
                    736:                stop = aobj->u_pages << PAGE_SHIFT;
1.68.2.3! yamt      737:                by_list = true;         /* always go by the list */
1.22      thorpej   738:        } else {
                    739:                start = trunc_page(start);
1.68.2.1  yamt      740:                if (stop == 0) {
                    741:                        stop = aobj->u_pages << PAGE_SHIFT;
                    742:                } else {
                    743:                        stop = round_page(stop);
                    744:                }
1.22      thorpej   745:                if (stop > (aobj->u_pages << PAGE_SHIFT)) {
                    746:                        printf("uao_flush: strange, got an out of range "
                    747:                            "flush (fixed)\n");
                    748:                        stop = aobj->u_pages << PAGE_SHIFT;
                    749:                }
                    750:                by_list = (uobj->uo_npages <=
1.46      chs       751:                    ((stop - start) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
1.22      thorpej   752:        }
                    753:        UVMHIST_LOG(maphist,
                    754:            " flush start=0x%lx, stop=0x%x, by_list=%d, flags=0x%x",
                    755:            start, stop, by_list, flags);
1.1       mrg       756:
1.5       mrg       757:        /*
1.22      thorpej   758:         * Don't need to do any work here if we're not freeing
                    759:         * or deactivating pages.
                    760:         */
1.46      chs       761:
1.22      thorpej   762:        if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
1.46      chs       763:                simple_unlock(&uobj->vmobjlock);
                    764:                return 0;
1.22      thorpej   765:        }
                    766:
1.5       mrg       767:        /*
1.51      enami     768:         * Initialize the marker pages.  See the comment in
                    769:         * genfs_putpages() also.
                    770:         */
                    771:
                    772:        curmp.uobject = uobj;
                    773:        curmp.offset = (voff_t)-1;
                    774:        curmp.flags = PG_BUSY;
                    775:        endmp.uobject = uobj;
                    776:        endmp.offset = (voff_t)-1;
                    777:        endmp.flags = PG_BUSY;
                    778:
                    779:        /*
1.46      chs       780:         * now do it.  note: we must update nextpg in the body of loop or we
1.51      enami     781:         * will get stuck.  we need to use nextpg if we'll traverse the list
                    782:         * because we may free "pg" before doing the next loop.
1.21      thorpej   783:         */
1.22      thorpej   784:
                    785:        if (by_list) {
1.51      enami     786:                TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
                    787:                nextpg = TAILQ_FIRST(&uobj->memq);
1.53      thorpej   788:                PHOLD(curlwp);
1.22      thorpej   789:        } else {
                    790:                curoff = start;
1.52      scw       791:                nextpg = NULL;  /* Quell compiler warning */
1.22      thorpej   792:        }
                    793:
1.46      chs       794:        uvm_lock_pageq();
1.22      thorpej   795:
                    796:        /* locked: both page queues and uobj */
1.51      enami     797:        for (;;) {
1.22      thorpej   798:                if (by_list) {
1.51      enami     799:                        pg = nextpg;
                    800:                        if (pg == &endmp)
                    801:                                break;
1.46      chs       802:                        nextpg = TAILQ_NEXT(pg, listq);
                    803:                        if (pg->offset < start || pg->offset >= stop)
1.22      thorpej   804:                                continue;
                    805:                } else {
1.51      enami     806:                        if (curoff < stop) {
                    807:                                pg = uvm_pagelookup(uobj, curoff);
                    808:                                curoff += PAGE_SIZE;
                    809:                        } else
                    810:                                break;
1.46      chs       811:                        if (pg == NULL)
1.22      thorpej   812:                                continue;
                    813:                }
1.46      chs       814:                switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
1.41      chs       815:
1.22      thorpej   816:                /*
                    817:                 * XXX In these first 3 cases, we always just
                    818:                 * XXX deactivate the page.  We may want to
                    819:                 * XXX handle the different cases more specifically
                    820:                 * XXX in the future.
                    821:                 */
1.46      chs       822:
1.22      thorpej   823:                case PGO_CLEANIT|PGO_FREE:
                    824:                case PGO_CLEANIT|PGO_DEACTIVATE:
                    825:                case PGO_DEACTIVATE:
1.25      thorpej   826:  deactivate_it:
1.68.2.2  yamt      827:                        /* skip the page if it's wired */
                    828:                        if (pg->wire_count != 0)
1.22      thorpej   829:                                continue;
                    830:
                    831:                        /* ...and deactivate the page. */
1.46      chs       832:                        pmap_clear_reference(pg);
                    833:                        uvm_pagedeactivate(pg);
1.22      thorpej   834:                        continue;
                    835:
                    836:                case PGO_FREE:
1.46      chs       837:
1.25      thorpej   838:                        /*
                    839:                         * If there are multiple references to
                    840:                         * the object, just deactivate the page.
                    841:                         */
1.46      chs       842:
1.25      thorpej   843:                        if (uobj->uo_refs > 1)
                    844:                                goto deactivate_it;
                    845:
1.22      thorpej   846:                        /*
1.51      enami     847:                         * wait and try again if the page is busy.
                    848:                         * otherwise free the swap slot and the page.
1.22      thorpej   849:                         */
1.46      chs       850:
                    851:                        pmap_page_protect(pg, VM_PROT_NONE);
1.51      enami     852:                        if (pg->flags & PG_BUSY) {
                    853:                                if (by_list) {
                    854:                                        TAILQ_INSERT_BEFORE(pg, &curmp, listq);
                    855:                                }
1.46      chs       856:                                pg->flags |= PG_WANTED;
                    857:                                uvm_unlock_pageq();
                    858:                                UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
                    859:                                    "uao_put", 0);
                    860:                                simple_lock(&uobj->vmobjlock);
                    861:                                uvm_lock_pageq();
1.51      enami     862:                                if (by_list) {
                    863:                                        nextpg = TAILQ_NEXT(&curmp, listq);
                    864:                                        TAILQ_REMOVE(&uobj->memq, &curmp,
                    865:                                            listq);
                    866:                                } else
                    867:                                        curoff -= PAGE_SIZE;
                    868:                                continue;
1.22      thorpej   869:                        }
1.68.2.1  yamt      870:
                    871:                        /*
                    872:                         * freeing swapslot here is not strictly necessary.
                    873:                         * however, leaving it here doesn't save much
                    874:                         * because we need to update swap accounting anyway.
                    875:                         */
                    876:
1.46      chs       877:                        uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
                    878:                        uvm_pagefree(pg);
1.22      thorpej   879:                        continue;
                    880:                }
                    881:        }
                    882:        uvm_unlock_pageq();
1.51      enami     883:        if (by_list) {
                    884:                TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1.53      thorpej   885:                PRELE(curlwp);
1.51      enami     886:        }
1.55      pk        887:        simple_unlock(&uobj->vmobjlock);
1.46      chs       888:        return 0;
1.1       mrg       889: }
                    890:
                    891: /*
                    892:  * uao_get: fetch me a page
                    893:  *
                    894:  * we have three cases:
                    895:  * 1: page is resident     -> just return the page.
                    896:  * 2: page is zero-fill    -> allocate a new page and zero it.
                    897:  * 3: page is swapped out  -> fetch the page from swap.
                    898:  *
                    899:  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
                    900:  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
1.40      chs       901:  * then we will need to return EBUSY.
1.1       mrg       902:  *
                    903:  * => prefer map unlocked (not required)
                    904:  * => object must be locked!  we will _unlock_ it before starting any I/O.
                    905:  * => flags: PGO_ALLPAGES: get all of the pages
                    906:  *           PGO_LOCKED: fault data structures are locked
                    907:  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
                    908:  * => NOTE: caller must check for released pages!!
                    909:  */
1.46      chs       910:
1.5       mrg       911: static int
1.67      thorpej   912: uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
                    913:     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
1.5       mrg       914: {
1.68.2.1  yamt      915: #if defined(VMSWAP)
1.5       mrg       916:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1.68.2.1  yamt      917: #endif /* defined(VMSWAP) */
1.28      kleink    918:        voff_t current_offset;
1.52      scw       919:        struct vm_page *ptmp = NULL;    /* Quell compiler warning */
1.68.2.1  yamt      920:        int lcv, gotpages, maxpages, swslot, pageidx;
1.68.2.3! yamt      921:        bool done;
1.5       mrg       922:        UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
                    923:
1.27      chs       924:        UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
1.68.2.1  yamt      925:                    (struct uvm_aobj *)uobj, offset, flags,0);
1.37      chs       926:
1.5       mrg       927:        /*
                    928:         * get number of pages
                    929:         */
1.46      chs       930:
1.5       mrg       931:        maxpages = *npagesp;
                    932:
                    933:        /*
                    934:         * step 1: handled the case where fault data structures are locked.
                    935:         */
1.1       mrg       936:
1.5       mrg       937:        if (flags & PGO_LOCKED) {
1.46      chs       938:
1.5       mrg       939:                /*
                    940:                 * step 1a: get pages that are already resident.   only do
                    941:                 * this if the data structures are locked (i.e. the first
                    942:                 * time through).
                    943:                 */
                    944:
1.68.2.3! yamt      945:                done = true;    /* be optimistic */
1.5       mrg       946:                gotpages = 0;   /* # of pages we got so far */
                    947:                for (lcv = 0, current_offset = offset ; lcv < maxpages ;
                    948:                    lcv++, current_offset += PAGE_SIZE) {
                    949:                        /* do we care about this page?  if not, skip it */
                    950:                        if (pps[lcv] == PGO_DONTCARE)
                    951:                                continue;
                    952:                        ptmp = uvm_pagelookup(uobj, current_offset);
                    953:
                    954:                        /*
1.30      thorpej   955:                         * if page is new, attempt to allocate the page,
                    956:                         * zero-fill'd.
1.5       mrg       957:                         */
1.46      chs       958:
                    959:                        if (ptmp == NULL && uao_find_swslot(&aobj->u_obj,
1.15      chs       960:                            current_offset >> PAGE_SHIFT) == 0) {
1.5       mrg       961:                                ptmp = uvm_pagealloc(uobj, current_offset,
1.30      thorpej   962:                                    NULL, UVM_PGA_ZERO);
1.5       mrg       963:                                if (ptmp) {
                    964:                                        /* new page */
1.47      chs       965:                                        ptmp->flags &= ~(PG_FAKE);
1.5       mrg       966:                                        ptmp->pqflags |= PQ_AOBJ;
1.47      chs       967:                                        goto gotpage;
1.5       mrg       968:                                }
                    969:                        }
                    970:
                    971:                        /*
1.46      chs       972:                         * to be useful must get a non-busy page
1.5       mrg       973:                         */
1.46      chs       974:
                    975:                        if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
1.5       mrg       976:                                if (lcv == centeridx ||
                    977:                                    (flags & PGO_ALLPAGES) != 0)
                    978:                                        /* need to do a wait or I/O! */
1.68.2.3! yamt      979:                                        done = false;
1.5       mrg       980:                                        continue;
                    981:                        }
                    982:
                    983:                        /*
                    984:                         * useful page: busy/lock it and plug it in our
                    985:                         * result array
                    986:                         */
1.46      chs       987:
1.5       mrg       988:                        /* caller must un-busy this page */
1.41      chs       989:                        ptmp->flags |= PG_BUSY;
1.5       mrg       990:                        UVM_PAGE_OWN(ptmp, "uao_get1");
1.47      chs       991: gotpage:
1.5       mrg       992:                        pps[lcv] = ptmp;
                    993:                        gotpages++;
1.46      chs       994:                }
1.5       mrg       995:
                    996:                /*
                    997:                 * step 1b: now we've either done everything needed or we
                    998:                 * to unlock and do some waiting or I/O.
                    999:                 */
                   1000:
                   1001:                UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
                   1002:                *npagesp = gotpages;
                   1003:                if (done)
1.46      chs      1004:                        return 0;
1.5       mrg      1005:                else
1.46      chs      1006:                        return EBUSY;
1.1       mrg      1007:        }
                   1008:
1.5       mrg      1009:        /*
                   1010:         * step 2: get non-resident or busy pages.
                   1011:         * object is locked.   data structures are unlocked.
                   1012:         */
                   1013:
1.68.2.1  yamt     1014:        if ((flags & PGO_SYNCIO) == 0) {
                   1015:                goto done;
                   1016:        }
                   1017:
1.5       mrg      1018:        for (lcv = 0, current_offset = offset ; lcv < maxpages ;
                   1019:            lcv++, current_offset += PAGE_SIZE) {
1.27      chs      1020:
1.5       mrg      1021:                /*
                   1022:                 * - skip over pages we've already gotten or don't want
                   1023:                 * - skip over pages we don't _have_ to get
                   1024:                 */
1.27      chs      1025:
1.5       mrg      1026:                if (pps[lcv] != NULL ||
                   1027:                    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
                   1028:                        continue;
                   1029:
1.27      chs      1030:                pageidx = current_offset >> PAGE_SHIFT;
                   1031:
1.5       mrg      1032:                /*
                   1033:                 * we have yet to locate the current page (pps[lcv]).   we
                   1034:                 * first look for a page that is already at the current offset.
                   1035:                 * if we find a page, we check to see if it is busy or
                   1036:                 * released.  if that is the case, then we sleep on the page
                   1037:                 * until it is no longer busy or released and repeat the lookup.
                   1038:                 * if the page we found is neither busy nor released, then we
                   1039:                 * busy it (so we own it) and plug it into pps[lcv].   this
                   1040:                 * 'break's the following while loop and indicates we are
                   1041:                 * ready to move on to the next page in the "lcv" loop above.
                   1042:                 *
                   1043:                 * if we exit the while loop with pps[lcv] still set to NULL,
                   1044:                 * then it means that we allocated a new busy/fake/clean page
                   1045:                 * ptmp in the object and we need to do I/O to fill in the data.
                   1046:                 */
                   1047:
                   1048:                /* top of "pps" while loop */
                   1049:                while (pps[lcv] == NULL) {
                   1050:                        /* look for a resident page */
                   1051:                        ptmp = uvm_pagelookup(uobj, current_offset);
                   1052:
                   1053:                        /* not resident?   allocate one now (if we can) */
                   1054:                        if (ptmp == NULL) {
                   1055:
                   1056:                                ptmp = uvm_pagealloc(uobj, current_offset,
1.19      chs      1057:                                    NULL, 0);
1.5       mrg      1058:
                   1059:                                /* out of RAM? */
                   1060:                                if (ptmp == NULL) {
                   1061:                                        simple_unlock(&uobj->vmobjlock);
                   1062:                                        UVMHIST_LOG(pdhist,
                   1063:                                            "sleeping, ptmp == NULL\n",0,0,0,0);
                   1064:                                        uvm_wait("uao_getpage");
                   1065:                                        simple_lock(&uobj->vmobjlock);
1.41      chs      1066:                                        continue;
1.5       mrg      1067:                                }
                   1068:
                   1069:                                /*
                   1070:                                 * safe with PQ's unlocked: because we just
                   1071:                                 * alloc'd the page
                   1072:                                 */
1.46      chs      1073:
1.5       mrg      1074:                                ptmp->pqflags |= PQ_AOBJ;
                   1075:
1.41      chs      1076:                                /*
1.5       mrg      1077:                                 * got new page ready for I/O.  break pps while
                   1078:                                 * loop.  pps[lcv] is still NULL.
                   1079:                                 */
1.46      chs      1080:
1.5       mrg      1081:                                break;
                   1082:                        }
                   1083:
                   1084:                        /* page is there, see if we need to wait on it */
1.46      chs      1085:                        if ((ptmp->flags & PG_BUSY) != 0) {
1.5       mrg      1086:                                ptmp->flags |= PG_WANTED;
                   1087:                                UVMHIST_LOG(pdhist,
                   1088:                                    "sleeping, ptmp->flags 0x%x\n",
                   1089:                                    ptmp->flags,0,0,0);
1.23      thorpej  1090:                                UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
1.68.2.3! yamt     1091:                                    false, "uao_get", 0);
1.5       mrg      1092:                                simple_lock(&uobj->vmobjlock);
1.46      chs      1093:                                continue;
1.5       mrg      1094:                        }
1.41      chs      1095:
                   1096:                        /*
1.5       mrg      1097:                         * if we get here then the page has become resident and
                   1098:                         * unbusy between steps 1 and 2.  we busy it now (so we
                   1099:                         * own it) and set pps[lcv] (so that we exit the while
                   1100:                         * loop).
                   1101:                         */
1.46      chs      1102:
1.5       mrg      1103:                        /* we own it, caller must un-busy */
                   1104:                        ptmp->flags |= PG_BUSY;
                   1105:                        UVM_PAGE_OWN(ptmp, "uao_get2");
                   1106:                        pps[lcv] = ptmp;
                   1107:                }
                   1108:
                   1109:                /*
                   1110:                 * if we own the valid page at the correct offset, pps[lcv] will
                   1111:                 * point to it.   nothing more to do except go to the next page.
                   1112:                 */
1.46      chs      1113:
1.5       mrg      1114:                if (pps[lcv])
                   1115:                        continue;                       /* next lcv */
                   1116:
                   1117:                /*
1.41      chs      1118:                 * we have a "fake/busy/clean" page that we just allocated.
1.5       mrg      1119:                 * do the needed "i/o", either reading from swap or zeroing.
                   1120:                 */
1.46      chs      1121:
                   1122:                swslot = uao_find_swslot(&aobj->u_obj, pageidx);
1.5       mrg      1123:
                   1124:                /*
                   1125:                 * just zero the page if there's nothing in swap.
                   1126:                 */
1.46      chs      1127:
                   1128:                if (swslot == 0) {
                   1129:
1.5       mrg      1130:                        /*
                   1131:                         * page hasn't existed before, just zero it.
                   1132:                         */
1.46      chs      1133:
1.5       mrg      1134:                        uvm_pagezero(ptmp);
1.27      chs      1135:                } else {
1.68.2.1  yamt     1136: #if defined(VMSWAP)
                   1137:                        int error;
                   1138:
1.5       mrg      1139:                        UVMHIST_LOG(pdhist, "pagein from swslot %d",
                   1140:                             swslot, 0,0,0);
                   1141:
                   1142:                        /*
                   1143:                         * page in the swapped-out page.
                   1144:                         * unlock object for i/o, relock when done.
                   1145:                         */
1.46      chs      1146:
1.5       mrg      1147:                        simple_unlock(&uobj->vmobjlock);
1.46      chs      1148:                        error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1.5       mrg      1149:                        simple_lock(&uobj->vmobjlock);
                   1150:
                   1151:                        /*
                   1152:                         * I/O done.  check for errors.
                   1153:                         */
1.46      chs      1154:
                   1155:                        if (error != 0) {
1.5       mrg      1156:                                UVMHIST_LOG(pdhist, "<- done (error=%d)",
1.46      chs      1157:                                    error,0,0,0);
1.5       mrg      1158:                                if (ptmp->flags & PG_WANTED)
1.24      thorpej  1159:                                        wakeup(ptmp);
1.27      chs      1160:
                   1161:                                /*
                   1162:                                 * remove the swap slot from the aobj
                   1163:                                 * and mark the aobj as having no real slot.
                   1164:                                 * don't free the swap slot, thus preventing
                   1165:                                 * it from being used again.
                   1166:                                 */
1.46      chs      1167:
1.27      chs      1168:                                swslot = uao_set_swslot(&aobj->u_obj, pageidx,
                   1169:                                                        SWSLOT_BAD);
1.57      pk       1170:                                if (swslot > 0) {
1.45      chs      1171:                                        uvm_swap_markbad(swslot, 1);
                   1172:                                }
1.27      chs      1173:
1.5       mrg      1174:                                uvm_lock_pageq();
                   1175:                                uvm_pagefree(ptmp);
                   1176:                                uvm_unlock_pageq();
                   1177:                                simple_unlock(&uobj->vmobjlock);
1.46      chs      1178:                                return error;
1.5       mrg      1179:                        }
1.68.2.1  yamt     1180: #else /* defined(VMSWAP) */
                   1181:                        panic("%s: pagein", __func__);
                   1182: #endif /* defined(VMSWAP) */
                   1183:                }
                   1184:
                   1185:                if ((access_type & VM_PROT_WRITE) == 0) {
                   1186:                        ptmp->flags |= PG_CLEAN;
                   1187:                        pmap_clear_modify(ptmp);
1.5       mrg      1188:                }
                   1189:
1.41      chs      1190:                /*
1.5       mrg      1191:                 * we got the page!   clear the fake flag (indicates valid
                   1192:                 * data now in page) and plug into our result array.   note
1.41      chs      1193:                 * that page is still busy.
1.5       mrg      1194:                 *
                   1195:                 * it is the callers job to:
                   1196:                 * => check if the page is released
                   1197:                 * => unbusy the page
                   1198:                 * => activate the page
                   1199:                 */
                   1200:
1.46      chs      1201:                ptmp->flags &= ~PG_FAKE;
1.5       mrg      1202:                pps[lcv] = ptmp;
1.46      chs      1203:        }
1.1       mrg      1204:
                   1205:        /*
1.5       mrg      1206:         * finally, unlock object and return.
                   1207:         */
1.1       mrg      1208:
1.68.2.1  yamt     1209: done:
1.1       mrg      1210:        simple_unlock(&uobj->vmobjlock);
1.5       mrg      1211:        UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
1.46      chs      1212:        return 0;
1.1       mrg      1213: }
                   1214:
1.68.2.1  yamt     1215: #if defined(VMSWAP)
                   1216:
1.1       mrg      1217: /*
1.18      chs      1218:  * uao_dropswap:  release any swap resources from this aobj page.
1.41      chs      1219:  *
1.18      chs      1220:  * => aobj must be locked or have a reference count of 0.
                   1221:  */
                   1222:
                   1223: void
1.67      thorpej  1224: uao_dropswap(struct uvm_object *uobj, int pageidx)
1.18      chs      1225: {
                   1226:        int slot;
                   1227:
                   1228:        slot = uao_set_swslot(uobj, pageidx, 0);
                   1229:        if (slot) {
                   1230:                uvm_swap_free(slot, 1);
                   1231:        }
1.27      chs      1232: }
                   1233:
                   1234: /*
                   1235:  * page in every page in every aobj that is paged-out to a range of swslots.
1.41      chs      1236:  *
1.27      chs      1237:  * => nothing should be locked.
1.68.2.3! yamt     1238:  * => returns true if pagein was aborted due to lack of memory.
1.27      chs      1239:  */
1.46      chs      1240:
1.68.2.3! yamt     1241: bool
1.67      thorpej  1242: uao_swap_off(int startslot, int endslot)
1.27      chs      1243: {
                   1244:        struct uvm_aobj *aobj, *nextaobj;
1.68.2.3! yamt     1245:        bool rv;
1.27      chs      1246:
                   1247:        /*
                   1248:         * walk the list of all aobjs.
                   1249:         */
                   1250:
                   1251: restart:
                   1252:        simple_lock(&uao_list_lock);
                   1253:        for (aobj = LIST_FIRST(&uao_list);
                   1254:             aobj != NULL;
                   1255:             aobj = nextaobj) {
                   1256:
                   1257:                /*
1.46      chs      1258:                 * try to get the object lock, start all over if we fail.
1.27      chs      1259:                 * most of the time we'll get the aobj lock,
                   1260:                 * so this should be a rare case.
                   1261:                 */
1.46      chs      1262:
1.27      chs      1263:                if (!simple_lock_try(&aobj->u_obj.vmobjlock)) {
                   1264:                        simple_unlock(&uao_list_lock);
                   1265:                        goto restart;
                   1266:                }
                   1267:
                   1268:                /*
                   1269:                 * add a ref to the aobj so it doesn't disappear
                   1270:                 * while we're working.
                   1271:                 */
1.46      chs      1272:
1.27      chs      1273:                uao_reference_locked(&aobj->u_obj);
                   1274:
                   1275:                /*
                   1276:                 * now it's safe to unlock the uao list.
                   1277:                 */
1.46      chs      1278:
1.27      chs      1279:                simple_unlock(&uao_list_lock);
                   1280:
                   1281:                /*
                   1282:                 * page in any pages in the swslot range.
                   1283:                 * if there's an error, abort and return the error.
                   1284:                 */
1.46      chs      1285:
1.27      chs      1286:                rv = uao_pagein(aobj, startslot, endslot);
                   1287:                if (rv) {
                   1288:                        uao_detach_locked(&aobj->u_obj);
                   1289:                        return rv;
                   1290:                }
                   1291:
                   1292:                /*
                   1293:                 * we're done with this aobj.
                   1294:                 * relock the list and drop our ref on the aobj.
                   1295:                 */
1.46      chs      1296:
1.27      chs      1297:                simple_lock(&uao_list_lock);
                   1298:                nextaobj = LIST_NEXT(aobj, u_list);
                   1299:                uao_detach_locked(&aobj->u_obj);
                   1300:        }
                   1301:
                   1302:        /*
                   1303:         * done with traversal, unlock the list
                   1304:         */
                   1305:        simple_unlock(&uao_list_lock);
1.68.2.3! yamt     1306:        return false;
1.27      chs      1307: }
                   1308:
                   1309:
                   1310: /*
                   1311:  * page in any pages from aobj in the given range.
                   1312:  *
                   1313:  * => aobj must be locked and is returned locked.
1.68.2.3! yamt     1314:  * => returns true if pagein was aborted due to lack of memory.
1.27      chs      1315:  */
1.68.2.3! yamt     1316: static bool
1.67      thorpej  1317: uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1.27      chs      1318: {
1.68.2.3! yamt     1319:        bool rv;
1.27      chs      1320:
                   1321:        if (UAO_USES_SWHASH(aobj)) {
                   1322:                struct uao_swhash_elt *elt;
1.65      christos 1323:                int buck;
1.27      chs      1324:
                   1325: restart:
1.65      christos 1326:                for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
                   1327:                        for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
1.27      chs      1328:                             elt != NULL;
                   1329:                             elt = LIST_NEXT(elt, list)) {
                   1330:                                int i;
                   1331:
                   1332:                                for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
                   1333:                                        int slot = elt->slots[i];
                   1334:
                   1335:                                        /*
                   1336:                                         * if the slot isn't in range, skip it.
                   1337:                                         */
1.46      chs      1338:
1.41      chs      1339:                                        if (slot < startslot ||
1.27      chs      1340:                                            slot >= endslot) {
                   1341:                                                continue;
                   1342:                                        }
                   1343:
                   1344:                                        /*
                   1345:                                         * process the page,
                   1346:                                         * the start over on this object
                   1347:                                         * since the swhash elt
                   1348:                                         * may have been freed.
                   1349:                                         */
1.46      chs      1350:
1.27      chs      1351:                                        rv = uao_pagein_page(aobj,
                   1352:                                          UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
                   1353:                                        if (rv) {
                   1354:                                                return rv;
                   1355:                                        }
                   1356:                                        goto restart;
                   1357:                                }
                   1358:                        }
                   1359:                }
                   1360:        } else {
                   1361:                int i;
                   1362:
                   1363:                for (i = 0; i < aobj->u_pages; i++) {
                   1364:                        int slot = aobj->u_swslots[i];
                   1365:
                   1366:                        /*
                   1367:                         * if the slot isn't in range, skip it
                   1368:                         */
1.46      chs      1369:
1.27      chs      1370:                        if (slot < startslot || slot >= endslot) {
                   1371:                                continue;
                   1372:                        }
                   1373:
                   1374:                        /*
                   1375:                         * process the page.
                   1376:                         */
1.46      chs      1377:
1.27      chs      1378:                        rv = uao_pagein_page(aobj, i);
                   1379:                        if (rv) {
                   1380:                                return rv;
                   1381:                        }
                   1382:                }
                   1383:        }
                   1384:
1.68.2.3! yamt     1385:        return false;
1.27      chs      1386: }
                   1387:
                   1388: /*
                   1389:  * page in a page from an aobj.  used for swap_off.
1.68.2.3! yamt     1390:  * returns true if pagein was aborted due to lack of memory.
1.27      chs      1391:  *
                   1392:  * => aobj must be locked and is returned locked.
                   1393:  */
1.46      chs      1394:
1.68.2.3! yamt     1395: static bool
1.67      thorpej  1396: uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1.27      chs      1397: {
                   1398:        struct vm_page *pg;
1.57      pk       1399:        int rv, npages;
1.27      chs      1400:
                   1401:        pg = NULL;
                   1402:        npages = 1;
                   1403:        /* locked: aobj */
                   1404:        rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
1.68.2.1  yamt     1405:            &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, PGO_SYNCIO);
1.27      chs      1406:        /* unlocked: aobj */
                   1407:
                   1408:        /*
                   1409:         * relock and finish up.
                   1410:         */
1.46      chs      1411:
1.27      chs      1412:        simple_lock(&aobj->u_obj.vmobjlock);
                   1413:        switch (rv) {
1.40      chs      1414:        case 0:
1.27      chs      1415:                break;
                   1416:
1.40      chs      1417:        case EIO:
                   1418:        case ERESTART:
1.46      chs      1419:
1.27      chs      1420:                /*
                   1421:                 * nothing more to do on errors.
1.40      chs      1422:                 * ERESTART can only mean that the anon was freed,
1.27      chs      1423:                 * so again there's nothing to do.
                   1424:                 */
1.46      chs      1425:
1.68.2.3! yamt     1426:                return false;
1.59      pk       1427:
                   1428:        default:
1.68.2.3! yamt     1429:                return true;
1.27      chs      1430:        }
                   1431:
                   1432:        /*
                   1433:         * ok, we've got the page now.
                   1434:         * mark it as dirty, clear its swslot and un-busy it.
                   1435:         */
1.57      pk       1436:        uao_dropswap(&aobj->u_obj, pageidx);
1.27      chs      1437:
                   1438:        /*
1.68.2.2  yamt     1439:         * make sure it's on a page queue.
1.27      chs      1440:         */
                   1441:        uvm_lock_pageq();
1.58      pk       1442:        if (pg->wire_count == 0)
1.68.2.2  yamt     1443:                uvm_pageenqueue(pg);
1.27      chs      1444:        uvm_unlock_pageq();
1.56      yamt     1445:
1.59      pk       1446:        if (pg->flags & PG_WANTED) {
                   1447:                wakeup(pg);
                   1448:        }
                   1449:        pg->flags &= ~(PG_WANTED|PG_BUSY|PG_CLEAN|PG_FAKE);
1.56      yamt     1450:        UVM_PAGE_OWN(pg, NULL);
                   1451:
1.68.2.3! yamt     1452:        return false;
1.1       mrg      1453: }
1.68.2.1  yamt     1454:
                   1455: /*
                   1456:  * uao_dropswap_range: drop swapslots in the range.
                   1457:  *
                   1458:  * => aobj must be locked and is returned locked.
                   1459:  * => start is inclusive.  end is exclusive.
                   1460:  */
                   1461:
                   1462: void
                   1463: uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
                   1464: {
                   1465:        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
                   1466:
                   1467:        LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
                   1468:
                   1469:        uao_dropswap_range1(aobj, start, end);
                   1470: }
                   1471:
                   1472: static void
                   1473: uao_dropswap_range1(struct uvm_aobj *aobj, voff_t start, voff_t end)
                   1474: {
                   1475:        int swpgonlydelta = 0;
                   1476:
                   1477:        if (end == 0) {
                   1478:                end = INT64_MAX;
                   1479:        }
                   1480:
                   1481:        if (UAO_USES_SWHASH(aobj)) {
                   1482:                int i, hashbuckets = aobj->u_swhashmask + 1;
                   1483:                voff_t taghi;
                   1484:                voff_t taglo;
                   1485:
                   1486:                taglo = UAO_SWHASH_ELT_TAG(start);
                   1487:                taghi = UAO_SWHASH_ELT_TAG(end);
                   1488:
                   1489:                for (i = 0; i < hashbuckets; i++) {
                   1490:                        struct uao_swhash_elt *elt, *next;
                   1491:
                   1492:                        for (elt = LIST_FIRST(&aobj->u_swhash[i]);
                   1493:                             elt != NULL;
                   1494:                             elt = next) {
                   1495:                                int startidx, endidx;
                   1496:                                int j;
                   1497:
                   1498:                                next = LIST_NEXT(elt, list);
                   1499:
                   1500:                                if (elt->tag < taglo || taghi < elt->tag) {
                   1501:                                        continue;
                   1502:                                }
                   1503:
                   1504:                                if (elt->tag == taglo) {
                   1505:                                        startidx =
                   1506:                                            UAO_SWHASH_ELT_PAGESLOT_IDX(start);
                   1507:                                } else {
                   1508:                                        startidx = 0;
                   1509:                                }
                   1510:
                   1511:                                if (elt->tag == taghi) {
                   1512:                                        endidx =
                   1513:                                            UAO_SWHASH_ELT_PAGESLOT_IDX(end);
                   1514:                                } else {
                   1515:                                        endidx = UAO_SWHASH_CLUSTER_SIZE;
                   1516:                                }
                   1517:
                   1518:                                for (j = startidx; j < endidx; j++) {
                   1519:                                        int slot = elt->slots[j];
                   1520:
                   1521:                                        KASSERT(uvm_pagelookup(&aobj->u_obj,
                   1522:                                            (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
                   1523:                                            + j) << PAGE_SHIFT) == NULL);
                   1524:                                        if (slot > 0) {
                   1525:                                                uvm_swap_free(slot, 1);
                   1526:                                                swpgonlydelta++;
                   1527:                                                KASSERT(elt->count > 0);
                   1528:                                                elt->slots[j] = 0;
                   1529:                                                elt->count--;
                   1530:                                        }
                   1531:                                }
                   1532:
                   1533:                                if (elt->count == 0) {
                   1534:                                        LIST_REMOVE(elt, list);
                   1535:                                        pool_put(&uao_swhash_elt_pool, elt);
                   1536:                                }
                   1537:                        }
                   1538:                }
                   1539:        } else {
                   1540:                int i;
                   1541:
                   1542:                if (aobj->u_pages < end) {
                   1543:                        end = aobj->u_pages;
                   1544:                }
                   1545:                for (i = start; i < end; i++) {
                   1546:                        int slot = aobj->u_swslots[i];
                   1547:
                   1548:                        if (slot > 0) {
                   1549:                                uvm_swap_free(slot, 1);
                   1550:                                swpgonlydelta++;
                   1551:                        }
                   1552:                }
                   1553:        }
                   1554:
                   1555:        /*
                   1556:         * adjust the counter of pages only in swap for all
                   1557:         * the swap slots we've freed.
                   1558:         */
                   1559:
                   1560:        if (swpgonlydelta > 0) {
                   1561:                simple_lock(&uvm.swap_data_lock);
                   1562:                KASSERT(uvmexp.swpgonly >= swpgonlydelta);
                   1563:                uvmexp.swpgonly -= swpgonlydelta;
                   1564:                simple_unlock(&uvm.swap_data_lock);
                   1565:        }
                   1566: }
                   1567:
                   1568: #endif /* defined(VMSWAP) */

CVSweb <webmaster@jp.NetBSD.org>