[BACK]Return to uvm_glue.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / uvm

Annotation of src/sys/uvm/uvm_glue.c, Revision 1.92.8.4

1.92.8.4! yamt        1: /*     $NetBSD: uvm_glue.c,v 1.92.8.3 2006/06/26 12:55:08 yamt Exp $   */
1.1       mrg         2:
1.48      chs         3: /*
1.1       mrg         4:  * Copyright (c) 1997 Charles D. Cranor and Washington University.
1.48      chs         5:  * Copyright (c) 1991, 1993, The Regents of the University of California.
1.1       mrg         6:  *
                      7:  * All rights reserved.
                      8:  *
                      9:  * This code is derived from software contributed to Berkeley by
                     10:  * The Mach Operating System project at Carnegie-Mellon University.
                     11:  *
                     12:  * Redistribution and use in source and binary forms, with or without
                     13:  * modification, are permitted provided that the following conditions
                     14:  * are met:
                     15:  * 1. Redistributions of source code must retain the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer.
                     17:  * 2. Redistributions in binary form must reproduce the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer in the
                     19:  *    documentation and/or other materials provided with the distribution.
                     20:  * 3. All advertising materials mentioning features or use of this software
                     21:  *    must display the following acknowledgement:
                     22:  *     This product includes software developed by Charles D. Cranor,
1.48      chs        23:  *      Washington University, the University of California, Berkeley and
1.1       mrg        24:  *      its contributors.
                     25:  * 4. Neither the name of the University nor the names of its contributors
                     26:  *    may be used to endorse or promote products derived from this software
                     27:  *    without specific prior written permission.
                     28:  *
                     29:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     30:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     31:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     32:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     33:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     34:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     35:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     36:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     37:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     38:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     39:  * SUCH DAMAGE.
                     40:  *
                     41:  *     @(#)vm_glue.c   8.6 (Berkeley) 1/5/94
1.4       mrg        42:  * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
1.1       mrg        43:  *
                     44:  *
                     45:  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
                     46:  * All rights reserved.
1.48      chs        47:  *
1.1       mrg        48:  * Permission to use, copy, modify and distribute this software and
                     49:  * its documentation is hereby granted, provided that both the copyright
                     50:  * notice and this permission notice appear in all copies of the
                     51:  * software, derivative works or modified versions, and any portions
                     52:  * thereof, and that both notices appear in supporting documentation.
1.48      chs        53:  *
                     54:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     55:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
1.1       mrg        56:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
1.48      chs        57:  *
1.1       mrg        58:  * Carnegie Mellon requests users of this software to return to
                     59:  *
                     60:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     61:  *  School of Computer Science
                     62:  *  Carnegie Mellon University
                     63:  *  Pittsburgh PA 15213-3890
                     64:  *
                     65:  * any improvements or extensions that they make and grant Carnegie the
                     66:  * rights to redistribute these changes.
                     67:  */
1.55      lukem      68:
                     69: #include <sys/cdefs.h>
1.92.8.4! yamt       70: __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.92.8.3 2006/06/26 12:55:08 yamt Exp $");
1.1       mrg        71:
1.92.8.4! yamt       72: #include "opt_coredump.h"
1.49      lukem      73: #include "opt_kgdb.h"
1.59      yamt       74: #include "opt_kstack.h"
1.5       mrg        75: #include "opt_uvmhist.h"
                     76:
1.1       mrg        77: /*
                     78:  * uvm_glue.c: glue functions
                     79:  */
                     80:
                     81: #include <sys/param.h>
                     82: #include <sys/systm.h>
                     83: #include <sys/proc.h>
                     84: #include <sys/resourcevar.h>
                     85: #include <sys/buf.h>
                     86: #include <sys/user.h>
                     87:
                     88: #include <uvm/uvm.h>
                     89:
                     90: #include <machine/cpu.h>
                     91:
                     92: /*
                     93:  * local prototypes
                     94:  */
                     95:
1.78      junyoung   96: static void uvm_swapout(struct lwp *);
1.1       mrg        97:
1.60      chs        98: #define UVM_NUAREA_MAX 16
1.92.8.2  yamt       99: static vaddr_t uvm_uareas;
                    100: static int uvm_nuarea;
                    101: static struct simplelock uvm_uareas_slock = SIMPLELOCK_INITIALIZER;
                    102: #define        UAREA_NEXTFREE(uarea)   (*(vaddr_t *)(UAREA_TO_USER(uarea)))
1.60      chs       103:
1.75      jdolecek  104: static void uvm_uarea_free(vaddr_t);
                    105:
1.1       mrg       106: /*
                    107:  * XXXCDC: do these really belong here?
                    108:  */
                    109:
1.28      thorpej   110: /*
1.1       mrg       111:  * uvm_kernacc: can the kernel access a region of memory
                    112:  *
1.83      yamt      113:  * - used only by /dev/kmem driver (mem.c)
1.1       mrg       114:  */
                    115:
1.6       mrg       116: boolean_t
1.89      thorpej   117: uvm_kernacc(caddr_t addr, size_t len, int rw)
1.6       mrg       118: {
                    119:        boolean_t rv;
1.13      eeh       120:        vaddr_t saddr, eaddr;
1.6       mrg       121:        vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
                    122:
1.31      kleink    123:        saddr = trunc_page((vaddr_t)addr);
1.43      chs       124:        eaddr = round_page((vaddr_t)addr + len);
1.6       mrg       125:        vm_map_lock_read(kernel_map);
                    126:        rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
                    127:        vm_map_unlock_read(kernel_map);
                    128:
                    129:        return(rv);
1.1       mrg       130: }
                    131:
                    132: #ifdef KGDB
                    133: /*
                    134:  * Change protections on kernel pages from addr to addr+len
                    135:  * (presumably so debugger can plant a breakpoint).
                    136:  *
                    137:  * We force the protection change at the pmap level.  If we were
                    138:  * to use vm_map_protect a change to allow writing would be lazily-
                    139:  * applied meaning we would still take a protection fault, something
                    140:  * we really don't want to do.  It would also fragment the kernel
                    141:  * map unnecessarily.  We cannot use pmap_protect since it also won't
                    142:  * enforce a write-enable request.  Using pmap_enter is the only way
                    143:  * we can ensure the change takes place properly.
                    144:  */
1.6       mrg       145: void
1.89      thorpej   146: uvm_chgkprot(caddr_t addr, size_t len, int rw)
1.6       mrg       147: {
                    148:        vm_prot_t prot;
1.13      eeh       149:        paddr_t pa;
                    150:        vaddr_t sva, eva;
1.6       mrg       151:
                    152:        prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
1.31      kleink    153:        eva = round_page((vaddr_t)addr + len);
                    154:        for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) {
1.6       mrg       155:                /*
                    156:                 * Extract physical address for the page.
                    157:                 */
1.27      thorpej   158:                if (pmap_extract(pmap_kernel(), sva, &pa) == FALSE)
1.6       mrg       159:                        panic("chgkprot: invalid page");
1.30      thorpej   160:                pmap_enter(pmap_kernel(), sva, pa, prot, PMAP_WIRED);
1.6       mrg       161:        }
1.51      chris     162:        pmap_update(pmap_kernel());
1.1       mrg       163: }
                    164: #endif
                    165:
                    166: /*
1.52      chs       167:  * uvm_vslock: wire user memory for I/O
1.1       mrg       168:  *
                    169:  * - called from physio and sys___sysctl
                    170:  * - XXXCDC: consider nuking this (or making it a macro?)
                    171:  */
                    172:
1.26      thorpej   173: int
1.89      thorpej   174: uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
1.1       mrg       175: {
1.50      chs       176:        struct vm_map *map;
1.26      thorpej   177:        vaddr_t start, end;
1.45      chs       178:        int error;
1.26      thorpej   179:
                    180:        map = &p->p_vmspace->vm_map;
1.31      kleink    181:        start = trunc_page((vaddr_t)addr);
                    182:        end = round_page((vaddr_t)addr + len);
1.92.8.1  yamt      183:        error = uvm_fault_wire(map, start, end, access_type, 0);
1.45      chs       184:        return error;
1.1       mrg       185: }
                    186:
                    187: /*
1.52      chs       188:  * uvm_vsunlock: unwire user memory wired by uvm_vslock()
1.1       mrg       189:  *
                    190:  * - called from physio and sys___sysctl
                    191:  * - XXXCDC: consider nuking this (or making it a macro?)
                    192:  */
                    193:
1.6       mrg       194: void
1.89      thorpej   195: uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
1.1       mrg       196: {
1.43      chs       197:        uvm_fault_unwire(&p->p_vmspace->vm_map, trunc_page((vaddr_t)addr),
                    198:                round_page((vaddr_t)addr + len));
1.1       mrg       199: }
                    200:
                    201: /*
1.62      thorpej   202:  * uvm_proc_fork: fork a virtual address space
1.1       mrg       203:  *
                    204:  * - the address space is copied as per parent map's inherit values
1.62      thorpej   205:  */
                    206: void
1.89      thorpej   207: uvm_proc_fork(struct proc *p1, struct proc *p2, boolean_t shared)
1.62      thorpej   208: {
                    209:
                    210:        if (shared == TRUE) {
                    211:                p2->p_vmspace = NULL;
                    212:                uvmspace_share(p1, p2);
                    213:        } else {
                    214:                p2->p_vmspace = uvmspace_fork(p1->p_vmspace);
                    215:        }
                    216:
                    217:        cpu_proc_fork(p1, p2);
                    218: }
                    219:
                    220:
                    221: /*
                    222:  * uvm_lwp_fork: fork a thread
                    223:  *
1.1       mrg       224:  * - a new "user" structure is allocated for the child process
                    225:  *     [filled in by MD layer...]
1.20      thorpej   226:  * - if specified, the child gets a new user stack described by
                    227:  *     stack and stacksize
1.1       mrg       228:  * - NOTE: the kernel stack may be at a different location in the child
                    229:  *     process, and thus addresses of automatic variables may be invalid
1.62      thorpej   230:  *     after cpu_lwp_fork returns in the child process.  We do nothing here
                    231:  *     after cpu_lwp_fork returns.
1.1       mrg       232:  * - XXXCDC: we need a way for this to return a failure value rather
                    233:  *   than just hang
                    234:  */
1.6       mrg       235: void
1.89      thorpej   236: uvm_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
                    237:     void (*func)(void *), void *arg)
1.6       mrg       238: {
1.45      chs       239:        int error;
1.6       mrg       240:
                    241:        /*
1.7       thorpej   242:         * Wire down the U-area for the process, which contains the PCB
1.62      thorpej   243:         * and the kernel stack.  Wired state is stored in l->l_flag's
                    244:         * L_INMEM bit rather than in the vm_map_entry's wired count
1.61      chs       245:         * to prevent kernel_map fragmentation.  If we reused a cached U-area,
1.62      thorpej   246:         * L_INMEM will already be set and we don't need to do anything.
1.21      thorpej   247:         *
1.61      chs       248:         * Note the kernel stack gets read/write accesses right off the bat.
1.6       mrg       249:         */
1.61      chs       250:
1.62      thorpej   251:        if ((l2->l_flag & L_INMEM) == 0) {
1.92.8.2  yamt      252:                vaddr_t uarea = USER_TO_UAREA(l2->l_addr);
                    253:
                    254:                error = uvm_fault_wire(kernel_map, uarea,
                    255:                    uarea + USPACE, VM_PROT_READ | VM_PROT_WRITE, 0);
1.61      chs       256:                if (error)
1.62      thorpej   257:                        panic("uvm_lwp_fork: uvm_fault_wire failed: %d", error);
1.67      scw       258: #ifdef PMAP_UAREA
                    259:                /* Tell the pmap this is a u-area mapping */
1.92.8.2  yamt      260:                PMAP_UAREA(uarea);
1.67      scw       261: #endif
1.62      thorpej   262:                l2->l_flag |= L_INMEM;
1.61      chs       263:        }
1.59      yamt      264:
                    265: #ifdef KSTACK_CHECK_MAGIC
                    266:        /*
                    267:         * fill stack with magic number
                    268:         */
1.63      yamt      269:        kstack_setup_magic(l2);
1.59      yamt      270: #endif
1.6       mrg       271:
                    272:        /*
1.62      thorpej   273:         * cpu_lwp_fork() copy and update the pcb, and make the child ready
                    274:         * to run.  If this is a normal user fork, the child will exit
1.34      thorpej   275:         * directly to user mode via child_return() on its first time
                    276:         * slice and will not return here.  If this is a kernel thread,
                    277:         * the specified entry point will be executed.
1.6       mrg       278:         */
1.62      thorpej   279:        cpu_lwp_fork(l1, l2, stack, stacksize, func, arg);
1.14      thorpej   280: }
                    281:
                    282: /*
1.60      chs       283:  * uvm_uarea_alloc: allocate a u-area
                    284:  */
                    285:
1.61      chs       286: boolean_t
                    287: uvm_uarea_alloc(vaddr_t *uaddrp)
1.60      chs       288: {
                    289:        vaddr_t uaddr;
                    290:
                    291: #ifndef USPACE_ALIGN
                    292: #define USPACE_ALIGN    0
                    293: #endif
                    294:
1.62      thorpej   295:        simple_lock(&uvm_uareas_slock);
1.75      jdolecek  296:        if (uvm_nuarea > 0) {
1.92.8.2  yamt      297:                uaddr = uvm_uareas;
                    298:                uvm_uareas = UAREA_NEXTFREE(uaddr);
1.60      chs       299:                uvm_nuarea--;
1.62      thorpej   300:                simple_unlock(&uvm_uareas_slock);
1.61      chs       301:                *uaddrp = uaddr;
                    302:                return TRUE;
1.60      chs       303:        } else {
1.62      thorpej   304:                simple_unlock(&uvm_uareas_slock);
1.84      yamt      305:                *uaddrp = uvm_km_alloc(kernel_map, USPACE, USPACE_ALIGN,
                    306:                    UVM_KMF_PAGEABLE);
1.61      chs       307:                return FALSE;
1.60      chs       308:        }
                    309: }
                    310:
                    311: /*
1.75      jdolecek  312:  * uvm_uarea_free: free a u-area; never blocks
                    313:  */
                    314:
1.92      perry     315: static inline void
1.75      jdolecek  316: uvm_uarea_free(vaddr_t uaddr)
                    317: {
                    318:        simple_lock(&uvm_uareas_slock);
1.92.8.2  yamt      319:        UAREA_NEXTFREE(uaddr) = uvm_uareas;
                    320:        uvm_uareas = uaddr;
1.75      jdolecek  321:        uvm_nuarea++;
                    322:        simple_unlock(&uvm_uareas_slock);
                    323: }
                    324:
                    325: /*
                    326:  * uvm_uarea_drain: return memory of u-areas over limit
                    327:  * back to system
1.60      chs       328:  */
                    329:
                    330: void
1.75      jdolecek  331: uvm_uarea_drain(boolean_t empty)
1.60      chs       332: {
1.75      jdolecek  333:        int leave = empty ? 0 : UVM_NUAREA_MAX;
                    334:        vaddr_t uaddr;
                    335:
                    336:        if (uvm_nuarea <= leave)
                    337:                return;
1.60      chs       338:
1.62      thorpej   339:        simple_lock(&uvm_uareas_slock);
1.75      jdolecek  340:        while(uvm_nuarea > leave) {
1.92.8.2  yamt      341:                uaddr = uvm_uareas;
                    342:                uvm_uareas = UAREA_NEXTFREE(uaddr);
1.75      jdolecek  343:                uvm_nuarea--;
1.62      thorpej   344:                simple_unlock(&uvm_uareas_slock);
1.84      yamt      345:                uvm_km_free(kernel_map, uaddr, USPACE, UVM_KMF_PAGEABLE);
1.75      jdolecek  346:                simple_lock(&uvm_uareas_slock);
1.60      chs       347:        }
1.75      jdolecek  348:        simple_unlock(&uvm_uareas_slock);
1.60      chs       349: }
                    350:
                    351: /*
1.80      pk        352:  * uvm_exit: exit a virtual address space
                    353:  *
                    354:  * - the process passed to us is a dead (pre-zombie) process; we
                    355:  *   are running on a different context now (the reaper).
                    356:  * - borrow proc0's address space because freeing the vmspace
                    357:  *   of the dead process may block.
                    358:  */
                    359:
                    360: void
1.89      thorpej   361: uvm_proc_exit(struct proc *p)
1.80      pk        362: {
                    363:        struct lwp *l = curlwp; /* XXX */
                    364:        struct vmspace *ovm;
                    365:
                    366:        KASSERT(p == l->l_proc);
                    367:        ovm = p->p_vmspace;
                    368:
                    369:        /*
                    370:         * borrow proc0's address space.
                    371:         */
                    372:        pmap_deactivate(l);
                    373:        p->p_vmspace = proc0.p_vmspace;
                    374:        pmap_activate(l);
                    375:
                    376:        uvmspace_free(ovm);
                    377: }
                    378:
                    379: void
                    380: uvm_lwp_exit(struct lwp *l)
                    381: {
1.92.8.2  yamt      382:        vaddr_t va = USER_TO_UAREA(l->l_addr);
1.80      pk        383:
                    384:        l->l_flag &= ~L_INMEM;
                    385:        uvm_uarea_free(va);
                    386:        l->l_addr = NULL;
                    387: }
                    388:
                    389: /*
1.1       mrg       390:  * uvm_init_limit: init per-process VM limits
                    391:  *
                    392:  * - called for process 0 and then inherited by all others.
                    393:  */
1.60      chs       394:
1.6       mrg       395: void
1.89      thorpej   396: uvm_init_limits(struct proc *p)
1.6       mrg       397: {
                    398:
                    399:        /*
                    400:         * Set up the initial limits on process VM.  Set the maximum
                    401:         * resident set size to be all of (reasonably) available memory.
                    402:         * This causes any single, large process to start random page
                    403:         * replacement once it fills memory.
                    404:         */
                    405:
                    406:        p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
1.79      pk        407:        p->p_rlimit[RLIMIT_STACK].rlim_max = maxsmap;
1.6       mrg       408:        p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
1.79      pk        409:        p->p_rlimit[RLIMIT_DATA].rlim_max = maxdmap;
1.6       mrg       410:        p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
1.1       mrg       411: }
                    412:
                    413: #ifdef DEBUG
                    414: int    enableswap = 1;
                    415: int    swapdebug = 0;
                    416: #define        SDB_FOLLOW      1
                    417: #define SDB_SWAPIN     2
                    418: #define SDB_SWAPOUT    4
                    419: #endif
                    420:
                    421: /*
1.92.8.3  yamt      422:  * uvm_swapin: swap in an lwp's u-area.
1.1       mrg       423:  */
                    424:
1.6       mrg       425: void
1.89      thorpej   426: uvm_swapin(struct lwp *l)
1.6       mrg       427: {
1.13      eeh       428:        vaddr_t addr;
1.52      chs       429:        int s, error;
1.6       mrg       430:
1.92.8.2  yamt      431:        addr = USER_TO_UAREA(l->l_addr);
1.62      thorpej   432:        /* make L_INMEM true */
1.92.8.1  yamt      433:        error = uvm_fault_wire(kernel_map, addr, addr + USPACE,
                    434:            VM_PROT_READ | VM_PROT_WRITE, 0);
1.52      chs       435:        if (error) {
                    436:                panic("uvm_swapin: rewiring stack failed: %d", error);
                    437:        }
1.6       mrg       438:
                    439:        /*
                    440:         * Some architectures need to be notified when the user area has
                    441:         * moved to new physical page(s) (e.g.  see mips/mips/vm_machdep.c).
                    442:         */
1.62      thorpej   443:        cpu_swapin(l);
1.41      enami     444:        SCHED_LOCK(s);
1.62      thorpej   445:        if (l->l_stat == LSRUN)
                    446:                setrunqueue(l);
                    447:        l->l_flag |= L_INMEM;
1.41      enami     448:        SCHED_UNLOCK(s);
1.62      thorpej   449:        l->l_swtime = 0;
1.6       mrg       450:        ++uvmexp.swapins;
1.1       mrg       451: }
                    452:
                    453: /*
                    454:  * uvm_scheduler: process zero main loop
                    455:  *
                    456:  * - attempt to swapin every swaped-out, runnable process in order of
                    457:  *     priority.
                    458:  * - if not enough memory, wake the pagedaemon and let it clear space.
                    459:  */
                    460:
1.6       mrg       461: void
1.89      thorpej   462: uvm_scheduler(void)
1.1       mrg       463: {
1.62      thorpej   464:        struct lwp *l, *ll;
1.32      augustss  465:        int pri;
1.6       mrg       466:        int ppri;
1.1       mrg       467:
                    468: loop:
                    469: #ifdef DEBUG
1.6       mrg       470:        while (!enableswap)
1.43      chs       471:                tsleep(&proc0, PVM, "noswap", 0);
1.1       mrg       472: #endif
1.62      thorpej   473:        ll = NULL;              /* process to choose */
1.6       mrg       474:        ppri = INT_MIN; /* its priority */
1.29      thorpej   475:        proclist_lock_read();
1.6       mrg       476:
1.62      thorpej   477:        LIST_FOREACH(l, &alllwp, l_list) {
1.6       mrg       478:                /* is it a runnable swapped out process? */
1.62      thorpej   479:                if (l->l_stat == LSRUN && (l->l_flag & L_INMEM) == 0) {
                    480:                        pri = l->l_swtime + l->l_slptime -
                    481:                            (l->l_proc->p_nice - NZERO) * 8;
1.6       mrg       482:                        if (pri > ppri) {   /* higher priority?  remember it. */
1.62      thorpej   483:                                ll = l;
1.6       mrg       484:                                ppri = pri;
                    485:                        }
                    486:                }
                    487:        }
1.39      sommerfe  488:        /*
                    489:         * XXXSMP: possible unlock/sleep race between here and the
                    490:         * "scheduler" tsleep below..
                    491:         */
1.28      thorpej   492:        proclist_unlock_read();
1.1       mrg       493:
                    494: #ifdef DEBUG
1.6       mrg       495:        if (swapdebug & SDB_FOLLOW)
1.62      thorpej   496:                printf("scheduler: running, procp %p pri %d\n", ll, ppri);
1.1       mrg       497: #endif
1.6       mrg       498:        /*
                    499:         * Nothing to do, back to sleep
                    500:         */
1.62      thorpej   501:        if ((l = ll) == NULL) {
1.43      chs       502:                tsleep(&proc0, PVM, "scheduler", 0);
1.6       mrg       503:                goto loop;
                    504:        }
                    505:
                    506:        /*
                    507:         * we have found swapped out process which we would like to bring
                    508:         * back in.
                    509:         *
                    510:         * XXX: this part is really bogus cuz we could deadlock on memory
                    511:         * despite our feeble check
                    512:         */
                    513:        if (uvmexp.free > atop(USPACE)) {
1.1       mrg       514: #ifdef DEBUG
1.6       mrg       515:                if (swapdebug & SDB_SWAPIN)
                    516:                        printf("swapin: pid %d(%s)@%p, pri %d free %d\n",
1.62      thorpej   517:             l->l_proc->p_pid, l->l_proc->p_comm, l->l_addr, ppri, uvmexp.free);
1.1       mrg       518: #endif
1.62      thorpej   519:                uvm_swapin(l);
1.6       mrg       520:                goto loop;
                    521:        }
                    522:        /*
                    523:         * not enough memory, jab the pageout daemon and wait til the coast
                    524:         * is clear
                    525:         */
1.1       mrg       526: #ifdef DEBUG
1.6       mrg       527:        if (swapdebug & SDB_FOLLOW)
                    528:                printf("scheduler: no room for pid %d(%s), free %d\n",
1.62      thorpej   529:           l->l_proc->p_pid, l->l_proc->p_comm, uvmexp.free);
1.1       mrg       530: #endif
1.6       mrg       531:        uvm_wait("schedpwait");
1.1       mrg       532: #ifdef DEBUG
1.6       mrg       533:        if (swapdebug & SDB_FOLLOW)
                    534:                printf("scheduler: room again, free %d\n", uvmexp.free);
1.1       mrg       535: #endif
1.6       mrg       536:        goto loop;
1.1       mrg       537: }
                    538:
                    539: /*
1.62      thorpej   540:  * swappable: is LWP "l" swappable?
1.1       mrg       541:  */
                    542:
1.62      thorpej   543: #define        swappable(l)                                                    \
                    544:        (((l)->l_flag & (L_INMEM)) &&                                   \
                    545:         ((((l)->l_proc->p_flag) & (P_SYSTEM | P_WEXIT)) == 0) &&       \
                    546:         (l)->l_holdcnt == 0)
1.1       mrg       547:
                    548: /*
                    549:  * swapout_threads: find threads that can be swapped and unwire their
                    550:  *     u-areas.
                    551:  *
                    552:  * - called by the pagedaemon
                    553:  * - try and swap at least one processs
                    554:  * - processes that are sleeping or stopped for maxslp or more seconds
                    555:  *   are swapped... otherwise the longest-sleeping or stopped process
                    556:  *   is swapped, otherwise the longest resident process...
                    557:  */
1.60      chs       558:
1.6       mrg       559: void
1.89      thorpej   560: uvm_swapout_threads(void)
1.1       mrg       561: {
1.62      thorpej   562:        struct lwp *l;
                    563:        struct lwp *outl, *outl2;
1.6       mrg       564:        int outpri, outpri2;
                    565:        int didswap = 0;
1.48      chs       566:        extern int maxslp;
1.6       mrg       567:        /* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
1.1       mrg       568:
                    569: #ifdef DEBUG
1.6       mrg       570:        if (!enableswap)
                    571:                return;
1.1       mrg       572: #endif
                    573:
1.6       mrg       574:        /*
1.62      thorpej   575:         * outl/outpri  : stop/sleep thread with largest sleeptime < maxslp
                    576:         * outl2/outpri2: the longest resident thread (its swap time)
1.6       mrg       577:         */
1.62      thorpej   578:        outl = outl2 = NULL;
1.6       mrg       579:        outpri = outpri2 = 0;
1.29      thorpej   580:        proclist_lock_read();
1.62      thorpej   581:        LIST_FOREACH(l, &alllwp, l_list) {
1.81      yamt      582:                KASSERT(l->l_proc != NULL);
1.62      thorpej   583:                if (!swappable(l))
1.6       mrg       584:                        continue;
1.62      thorpej   585:                switch (l->l_stat) {
1.68      cl        586:                case LSONPROC:
1.69      cl        587:                        continue;
                    588:
1.62      thorpej   589:                case LSRUN:
                    590:                        if (l->l_swtime > outpri2) {
                    591:                                outl2 = l;
                    592:                                outpri2 = l->l_swtime;
1.6       mrg       593:                        }
                    594:                        continue;
1.48      chs       595:
1.62      thorpej   596:                case LSSLEEP:
                    597:                case LSSTOP:
                    598:                        if (l->l_slptime >= maxslp) {
                    599:                                uvm_swapout(l);
1.6       mrg       600:                                didswap++;
1.62      thorpej   601:                        } else if (l->l_slptime > outpri) {
                    602:                                outl = l;
                    603:                                outpri = l->l_slptime;
1.6       mrg       604:                        }
                    605:                        continue;
                    606:                }
                    607:        }
1.28      thorpej   608:        proclist_unlock_read();
1.6       mrg       609:
                    610:        /*
                    611:         * If we didn't get rid of any real duds, toss out the next most
                    612:         * likely sleeping/stopped or running candidate.  We only do this
                    613:         * if we are real low on memory since we don't gain much by doing
                    614:         * it (USPACE bytes).
                    615:         */
                    616:        if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE))) {
1.62      thorpej   617:                if ((l = outl) == NULL)
                    618:                        l = outl2;
1.1       mrg       619: #ifdef DEBUG
1.6       mrg       620:                if (swapdebug & SDB_SWAPOUT)
1.62      thorpej   621:                        printf("swapout_threads: no duds, try procp %p\n", l);
1.1       mrg       622: #endif
1.62      thorpej   623:                if (l)
                    624:                        uvm_swapout(l);
1.6       mrg       625:        }
1.1       mrg       626: }
                    627:
                    628: /*
1.62      thorpej   629:  * uvm_swapout: swap out lwp "l"
1.1       mrg       630:  *
1.48      chs       631:  * - currently "swapout" means "unwire U-area" and "pmap_collect()"
1.1       mrg       632:  *   the pmap.
                    633:  * - XXXCDC: should deactivate all process' private anonymous memory
                    634:  */
                    635:
1.6       mrg       636: static void
1.89      thorpej   637: uvm_swapout(struct lwp *l)
1.1       mrg       638: {
1.13      eeh       639:        vaddr_t addr;
1.6       mrg       640:        int s;
1.62      thorpej   641:        struct proc *p = l->l_proc;
1.1       mrg       642:
                    643: #ifdef DEBUG
1.6       mrg       644:        if (swapdebug & SDB_SWAPOUT)
1.62      thorpej   645:                printf("swapout: lid %d.%d(%s)@%p, stat %x pri %d free %d\n",
                    646:           p->p_pid, l->l_lid, p->p_comm, l->l_addr, l->l_stat,
                    647:           l->l_slptime, uvmexp.free);
1.1       mrg       648: #endif
                    649:
1.6       mrg       650:        /*
                    651:         * Mark it as (potentially) swapped out.
                    652:         */
1.41      enami     653:        SCHED_LOCK(s);
1.69      cl        654:        if (l->l_stat == LSONPROC) {
                    655:                KDASSERT(l->l_cpu != curcpu());
1.68      cl        656:                SCHED_UNLOCK(s);
                    657:                return;
                    658:        }
1.62      thorpej   659:        l->l_flag &= ~L_INMEM;
                    660:        if (l->l_stat == LSRUN)
                    661:                remrunqueue(l);
1.41      enami     662:        SCHED_UNLOCK(s);
1.62      thorpej   663:        l->l_swtime = 0;
1.53      chs       664:        p->p_stats->p_ru.ru_nswap++;
1.6       mrg       665:        ++uvmexp.swapouts;
1.68      cl        666:
                    667:        /*
                    668:         * Do any machine-specific actions necessary before swapout.
                    669:         * This can include saving floating point state, etc.
                    670:         */
                    671:        cpu_swapout(l);
1.43      chs       672:
                    673:        /*
                    674:         * Unwire the to-be-swapped process's user struct and kernel stack.
                    675:         */
1.92.8.2  yamt      676:        addr = USER_TO_UAREA(l->l_addr);
1.62      thorpej   677:        uvm_fault_unwire(kernel_map, addr, addr + USPACE); /* !L_INMEM */
1.43      chs       678:        pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
1.1       mrg       679: }
                    680:
1.92.8.4! yamt      681: #ifdef COREDUMP
1.56      thorpej   682: /*
                    683:  * uvm_coredump_walkmap: walk a process's map for the purpose of dumping
                    684:  * a core file.
                    685:  */
                    686:
                    687: int
1.89      thorpej   688: uvm_coredump_walkmap(struct proc *p, void *iocookie,
                    689:     int (*func)(struct proc *, void *, struct uvm_coredump_state *),
                    690:     void *cookie)
1.56      thorpej   691: {
                    692:        struct uvm_coredump_state state;
                    693:        struct vmspace *vm = p->p_vmspace;
                    694:        struct vm_map *map = &vm->vm_map;
                    695:        struct vm_map_entry *entry;
                    696:        int error;
                    697:
1.64      atatat    698:        entry = NULL;
                    699:        vm_map_lock_read(map);
1.87      matt      700:        state.end = 0;
1.64      atatat    701:        for (;;) {
                    702:                if (entry == NULL)
                    703:                        entry = map->header.next;
                    704:                else if (!uvm_map_lookup_entry(map, state.end, &entry))
                    705:                        entry = entry->next;
                    706:                if (entry == &map->header)
                    707:                        break;
                    708:
1.56      thorpej   709:                state.cookie = cookie;
1.86      matt      710:                if (state.end > entry->start) {
                    711:                        state.start = state.end;
                    712:                } else {
                    713:                        state.start = entry->start;
                    714:                }
                    715:                state.realend = entry->end;
1.56      thorpej   716:                state.end = entry->end;
                    717:                state.prot = entry->protection;
                    718:                state.flags = 0;
                    719:
1.82      chs       720:                /*
                    721:                 * Dump the region unless one of the following is true:
                    722:                 *
                    723:                 * (1) the region has neither object nor amap behind it
                    724:                 *     (ie. it has never been accessed).
                    725:                 *
                    726:                 * (2) the region has no amap and is read-only
                    727:                 *     (eg. an executable text section).
                    728:                 *
                    729:                 * (3) the region's object is a device.
1.85      nathanw   730:                 *
                    731:                 * (4) the region is unreadable by the process.
1.82      chs       732:                 */
1.56      thorpej   733:
1.82      chs       734:                KASSERT(!UVM_ET_ISSUBMAP(entry));
                    735:                KASSERT(state.start < VM_MAXUSER_ADDRESS);
                    736:                KASSERT(state.end <= VM_MAXUSER_ADDRESS);
                    737:                if (entry->object.uvm_obj == NULL &&
                    738:                    entry->aref.ar_amap == NULL) {
1.86      matt      739:                        state.realend = state.start;
                    740:                } else if ((entry->protection & VM_PROT_WRITE) == 0 &&
1.82      chs       741:                    entry->aref.ar_amap == NULL) {
1.86      matt      742:                        state.realend = state.start;
                    743:                } else if (entry->object.uvm_obj != NULL &&
1.82      chs       744:                    UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
1.86      matt      745:                        state.realend = state.start;
                    746:                } else if ((entry->protection & VM_PROT_READ) == 0) {
                    747:                        state.realend = state.start;
                    748:                } else {
                    749:                        if (state.start >= (vaddr_t)vm->vm_maxsaddr)
                    750:                                state.flags |= UVM_COREDUMP_STACK;
                    751:
                    752:                        /*
                    753:                         * If this an anonymous entry, only dump instantiated
                    754:                         * pages.
                    755:                         */
                    756:                        if (entry->object.uvm_obj == NULL) {
                    757:                                vaddr_t end;
                    758:
                    759:                                amap_lock(entry->aref.ar_amap);
                    760:                                for (end = state.start;
                    761:                                     end < state.end; end += PAGE_SIZE) {
                    762:                                        struct vm_anon *anon;
                    763:                                        anon = amap_lookup(&entry->aref,
                    764:                                            end - entry->start);
                    765:                                        /*
                    766:                                         * If we have already encountered an
                    767:                                         * uninstantiated page, stop at the
                    768:                                         * first instantied page.
                    769:                                         */
                    770:                                        if (anon != NULL &&
                    771:                                            state.realend != state.end) {
                    772:                                                state.end = end;
                    773:                                                break;
                    774:                                        }
                    775:
                    776:                                        /*
                    777:                                         * If this page is the first
                    778:                                         * uninstantiated page, mark this as
                    779:                                         * the real ending point.  Continue to
                    780:                                         * counting uninstantiated pages.
                    781:                                         */
                    782:                                        if (anon == NULL &&
                    783:                                            state.realend == state.end) {
                    784:                                                state.realend = end;
                    785:                                        }
                    786:                                }
                    787:                                amap_unlock(entry->aref.ar_amap);
                    788:                        }
1.82      chs       789:                }
1.86      matt      790:
1.56      thorpej   791:
1.64      atatat    792:                vm_map_unlock_read(map);
1.88      matt      793:                error = (*func)(p, iocookie, &state);
1.56      thorpej   794:                if (error)
                    795:                        return (error);
1.64      atatat    796:                vm_map_lock_read(map);
1.56      thorpej   797:        }
1.64      atatat    798:        vm_map_unlock_read(map);
1.56      thorpej   799:
                    800:        return (0);
                    801: }
1.92.8.4! yamt      802: #endif /* COREDUMP */

CVSweb <webmaster@jp.NetBSD.org>