/* * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)pmap.c 8.1 (Berkeley) 6/11/93 * * from: Header: pmap.c,v 1.39 93/04/20 11:17:12 torek Exp * $Id: pmap.c,v 1.21 1994/11/14 06:09:30 deraadt Exp $ */ /* * SPARC physical map management code. * Does not function on multiprocessors (yet). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEBUG #define PTE_BITS "\20\40V\37W\36S\35NC\33IO\32U\31M" #endif extern struct promvec *promvec; /* * The SPARCstation offers us the following challenges: * * 1. A virtual address cache. This is, strictly speaking, not * part of the architecture, but the code below assumes one. * This is a write-through cache on the 4c and a write-back cache * on others. * * 2. An MMU that acts like a cache. There is not enough space * in the MMU to map everything all the time. Instead, we need * to load MMU with the `working set' of translations for each * process. * * 3. Segmented virtual and physical spaces. The upper 12 bits of * a virtual address (the virtual segment) index a segment table, * giving a physical segment. The physical segment selects a * `Page Map Entry Group' (PMEG) and the virtual page number---the * next 5 or 6 bits of the virtual address---select the particular * `Page Map Entry' for the page. We call the latter a PTE and * call each Page Map Entry Group a pmeg (for want of a better name). * * Since there are no valid bits in the segment table, the only way * to have an invalid segment is to make one full pmeg of invalid PTEs. * We use the last one (since the ROM does as well). * * 4. Discontiguous physical pages. The Mach VM expects physical pages * to be in one sequential lump. * * 5. The MMU is always on: it is not possible to disable it. This is * mainly a startup hassle. */ struct pmap_stats { int ps_unlink_pvfirst; /* # of pv_unlinks on head */ int ps_unlink_pvsearch; /* # of pv_unlink searches */ int ps_changeprots; /* # of calls to changeprot */ int ps_useless_changeprots; /* # of changeprots for wiring */ int ps_enter_firstpv; /* pv heads entered */ int ps_enter_secondpv; /* pv nonheads entered */ int ps_useless_changewire; /* useless wiring changes */ int ps_npg_prot_all; /* # of active pages protected */ int ps_npg_prot_actual; /* # pages actually affected */ } pmap_stats; #ifdef DEBUG #define PDB_CREATE 0x0001 #define PDB_DESTROY 0x0002 #define PDB_REMOVE 0x0004 #define PDB_CHANGEPROT 0x0008 #define PDB_ENTER 0x0010 #define PDB_MMU_ALLOC 0x0100 #define PDB_MMU_STEAL 0x0200 #define PDB_CTX_ALLOC 0x0400 #define PDB_CTX_STEAL 0x0800 int pmapdebug = 0x0; #endif #define splpmap() splimp() /* * First and last managed physical addresses. */ #if 0 vm_offset_t vm_first_phys, vm_last_phys; #define managed(pa) ((pa) >= vm_first_phys && (pa) < vm_last_phys) #else vm_offset_t vm_first_phys, vm_num_phys; #define managed(pa) ((unsigned)((pa) - vm_first_phys) < vm_num_phys) #endif /* * For each managed physical page, there is a list of all currently * valid virtual mappings of that page. Since there is usually one * (or zero) mapping per page, the table begins with an initial entry, * rather than a pointer; this head entry is empty iff its pv_pmap * field is NULL. * * Note that these are per machine independent page (so there may be * only one for every two hardware pages, e.g.). Since the virtual * address is aligned on a page boundary, the low order bits are free * for storing flags. Only the head of each list has flags. * * THIS SHOULD BE PART OF THE CORE MAP */ struct pvlist { struct pvlist *pv_next; /* next pvlist, if any */ struct pmap *pv_pmap; /* pmap of this va */ int pv_va; /* virtual address */ int pv_flags; /* flags (below) */ }; /* * Flags in pv_flags. Note that PV_MOD must be 1 and PV_REF must be 2 * since they must line up with the bits in the hardware PTEs (see pte.h). */ #define PV_MOD 1 /* page modified */ #define PV_REF 2 /* page referenced */ #define PV_NC 4 /* page cannot be cached */ /*efine PV_ALLF 7 ** all of the above */ struct pvlist *pv_table; /* array of entries, one per physical page */ #define pvhead(pa) (&pv_table[atop((pa) - vm_first_phys)]) /* * Each virtual segment within each pmap is either valid or invalid. * It is valid if pm_npte[VA_VSEG(va)] is not 0. This does not mean * it is in the MMU, however; that is true iff pm_segmap[VA_VSEG(va)] * does not point to the invalid PMEG. * * If a virtual segment is valid and loaded, the correct PTEs appear * in the MMU only. If it is valid and unloaded, the correct PTEs appear * in the pm_pte[VA_VSEG(va)] only. However, some effort is made to keep * the software copies consistent enough with the MMU so that libkvm can * do user address translations. In particular, pv_changepte() and * pmap_enu() maintain consistency, while less critical changes are * not maintained. pm_pte[VA_VSEG(va)] always points to space for those * PTEs, unless this is the kernel pmap, in which case pm_pte[x] is not * used (sigh). * * Each PMEG in the MMU is either free or contains PTEs corresponding to * some pmap and virtual segment. If it contains some PTEs, it also contains * reference and modify bits that belong in the pv_table. If we need * to steal a PMEG from some process (if we need one and none are free) * we must copy the ref and mod bits, and update pm_segmap in the other * pmap to show that its virtual segment is no longer in the MMU. * * There are 128 PMEGs in a small Sun-4, of which only a few dozen are * tied down permanently, leaving `about' 100 to be spread among * running processes. These are managed as an LRU cache. Before * calling the VM paging code for a user page fault, the fault handler * calls mmu_load(pmap, va) to try to get a set of PTEs put into the * MMU. mmu_load will check the validity of the segment and tell whether * it did something. * * Since I hate the name PMEG I call this data structure an `mmu entry'. * Each mmuentry is on exactly one of three `usage' lists: free, LRU, * or locked. The LRU list is for user processes; the locked list is * for kernel entries; both are doubly linked queues headed by `mmuhd's. * The free list is a simple list, headed by a free list pointer. */ struct mmuhd { struct mmuentry *mh_next; struct mmuentry *mh_prev; }; struct mmuentry { struct mmuentry *me_next; /* queue (MUST BE FIRST) or next free */ struct mmuentry *me_prev; /* queue (MUST BE FIRST) */ struct pmap *me_pmap; /* pmap, if in use */ struct mmuentry *me_pmforw; /* pmap pmeg chain */ struct mmuentry **me_pmback; /* pmap pmeg chain */ u_short me_vseg; /* virtual segment number in pmap */ pmeg_t me_pmeg; /* hardware PMEG number */ }; struct mmuentry *mmuentry; /* allocated in pmap_bootstrap */ struct mmuentry *me_freelist; /* free list (not a queue) */ struct mmuhd me_lru = { /* LRU (user) entries */ (struct mmuentry *)&me_lru, (struct mmuentry *)&me_lru }; struct mmuhd me_locked = { /* locked (kernel) entries */ (struct mmuentry *)&me_locked, (struct mmuentry *)&me_locked }; int seginval; /* the invalid segment number */ /* * A context is simply a small number that dictates which set of 4096 * segment map entries the MMU uses. The Sun 4c has eight such sets. * These are alloted in an `almost MRU' fashion. * * Each context is either free or attached to a pmap. * * Since the virtual address cache is tagged by context, when we steal * a context we have to flush (that part of) the cache. */ union ctxinfo { union ctxinfo *c_nextfree; /* free list (if free) */ struct pmap *c_pmap; /* pmap (if busy) */ }; union ctxinfo *ctxinfo; /* allocated at in pmap_bootstrap */ int ncontext; union ctxinfo *ctx_freelist; /* context free list */ int ctx_kick; /* allocation rover when none free */ int ctx_kickdir; /* ctx_kick roves both directions */ /* XXX need per-cpu vpage[]s (and vmempage, unless we lock in /dev/mem) */ caddr_t vpage[2]; /* two reserved MD virtual pages */ caddr_t vmempage; /* one reserved MI vpage for /dev/mem */ caddr_t vdumppages; /* 32KB worth of reserved dump pages */ struct pmap kernel_pmap_store; /* the kernel's pmap */ struct ksegmap kernel_segmap_store; /* the kernel's segmap */ pmap_t kernel_pmap; /* * We need to know real physical memory ranges (for /dev/mem). */ #define MA_SIZE 32 /* size of memory descriptor arrays */ struct memarr pmemarr[MA_SIZE];/* physical memory regions */ int npmemarr; /* number of entries in pmemarr */ /* * The following four global variables are set in pmap_bootstrap * for the vm code to find. This is Wrong. */ vm_offset_t avail_start; /* first free physical page number */ vm_offset_t avail_end; /* last free physical page number */ vm_offset_t virtual_avail; /* first free virtual page number */ vm_offset_t virtual_end; /* last free virtual page number */ /* * pseudo-functions for mnemonic value * NB: setsegmap should be stba for 4c, but stha works and makes the * code right for the Sun-4 as well. */ #define getcontext() lduba(AC_CONTEXT, ASI_CONTROL) #define setcontext(c) stba(AC_CONTEXT, ASI_CONTROL, c) #if defined(SUN4) && !defined(SUN4C) #define getsegmap(va) lduha(va, ASI_SEGMAP) #define setsegmap(va, pmeg) stha(va, ASI_SEGMAP, pmeg) #endif #if !defined(SUN4) && defined(SUN4C) #define getsegmap(va) lduba(va, ASI_SEGMAP) #define setsegmap(va, pmeg) stba(va, ASI_SEGMAP, pmeg) #endif #if defined(SUN4) && defined(SUN4C) #define getsegmap(va) (cputyp==CPU_SUN4C ? lduba(va, ASI_SEGMAP) \ : lduha(va, ASI_SEGMAP)) #define setsegmap(va, pmeg) (cputyp==CPU_SUN4C ? stba(va, ASI_SEGMAP, pmeg) \ : stha(va, ASI_SEGMAP, pmeg)) #endif #define getpte(va) lda(va, ASI_PTE) #define setpte(va, pte) sta(va, ASI_PTE, pte) /*----------------------------------------------------------------*/ /* * Translations from dense (contiguous) pseudo physical addresses * (fed to the VM code, to keep it happy) to sparse (real, hardware) * physical addresses. We call the former `software' page frame * numbers and the latter `hardware' page frame numbers. The * translation is done on a `per bank' basis. * * The HWTOSW and SWTOHW macros handle the actual translation. * They are defined as no-ops on Sun-4s. * * SHOULD DO atop AND ptoa DIRECTLY IN THESE MACROS SINCE ALL CALLERS * ALWAYS NEED THAT ANYWAY ... CAN JUST PRECOOK THE TABLES (TODO) * * Since we cannot use the memory allocated to the ROM monitor, and * this happens to be just under 64K, I have chosen a bank size of * 64K. This is necessary since all banks must be completely full. * I have also chosen a physical memory limit of 128 MB. The 4c is * architecturally limited to 256 MB, but 128 MB is more than will * fit on present hardware. * * XXX FIX THIS: just make all of each bank available and then * take out the pages reserved to the monitor!! */ #define MAXMEM (128 * 1024 * 1024) /* no more than 128 MB phys mem */ #define NPGBANK 16 /* 2^4 pages per bank (64K / bank) */ #define BSHIFT 4 /* log2(NPGBANK) */ #define BOFFSET (NPGBANK - 1) /* * One would expect this to use NBPG instead of 4096. But That is no * longer a constant. As an added benefit it allows Sun4 machines to * have 2x as much physical memory. */ #define BTSIZE (MAXMEM / (1 << SUN4CM_PGSHIFT) / NPGBANK) int pmap_dtos[BTSIZE]; /* dense to sparse */ int pmap_stod[BTSIZE]; /* sparse to dense */ #define HWTOSW(pg) (pmap_stod[(pg) >> BSHIFT] | ((pg) & BOFFSET)) #define SWTOHW(pg) (pmap_dtos[(pg) >> BSHIFT] | ((pg) & BOFFSET)) /* * Sort a memory array by address. */ static void sortm(mp, n) register struct memarr *mp; register int n; { register struct memarr *mpj; register int i, j; register u_int addr, len; /* Insertion sort. This is O(n^2), but so what? */ for (i = 1; i < n; i++) { /* save i'th entry */ addr = mp[i].addr; len = mp[i].len; /* find j such that i'th entry goes before j'th */ for (j = 0, mpj = mp; j < i; j++, mpj++) if (addr < mpj->addr) break; /* slide up any additional entries */ ovbcopy(mpj, mpj + 1, (i - j) * sizeof(*mp)); mpj->addr = addr; mpj->len = len; } } #ifdef DEBUG struct memarr pmap_ama[MA_SIZE]; int pmap_nama; #define ama pmap_ama #endif /* * init_translations sets up pmap_dtos[] and pmap_stod[], and * returns the number of usable physical pages. */ int init_translations() { register struct memarr *mp; register int n, nmem; register u_int vbank = 0, pbank, v, a; register u_int pages = 0, lost = 0; #ifndef DEBUG struct memarr ama[MA_SIZE]; /* available memory array */ #endif nmem = makememarr(ama, MA_SIZE, MEMARR_AVAILPHYS); /* * Open Boot supposedly guarantees at least 3 MB free mem at 0; * this is where the kernel has been loaded (we certainly hope the * kernel is <= 3 MB). We need the memory array to be sorted, and * to start at 0, so that `software page 0' and `hardware page 0' * are the same (otherwise the VM reserves the wrong pages for the * kernel). */ sortm(ama, nmem); if (ama[0].addr != 0) { /* cannot panic here; there's no real kernel yet. */ printf("init_translations: no kernel memory?!\n"); callrom(); } #ifdef DEBUG pmap_nama = nmem; #endif for (mp = ama; --nmem >= 0; mp++) { a = mp->addr >> PGSHIFT; v = mp->len >> PGSHIFT; if ((n = a & BOFFSET) != 0) { /* round up to next bank */ n = NPGBANK - n; if (v < n) { /* not a whole bank: skip it */ lost += v; continue; } lost += n; /* lose n pages from front */ a += n; v -= n; } n = v >> BSHIFT; /* calculate number of banks */ pbank = a >> BSHIFT; /* and the bank itself */ if (pbank + n >= BTSIZE) n = BTSIZE - pbank; pages += n; /* off by a factor of 2^BSHIFT */ lost += v - (n << BSHIFT); while (--n >= 0) { pmap_dtos[vbank] = pbank << BSHIFT; pmap_stod[pbank] = vbank << BSHIFT; pbank++; vbank++; } } /* adjust page count */ pages <<= BSHIFT; #ifdef DEBUG printf("note: lost %d pages in translation\n", lost); #endif return (pages); } #if 0 /* * Pages are physically contiguous, and hardware PFN == software PFN. * * XXX assumes PAGE_SIZE == NBPG (???) */ #define HWTOSW(pg) (pg) #define SWTOHW(pg) (pg) #endif /* update pv_flags given a valid pte */ #define MR(pte) (((pte) >> PG_M_SHIFT) & (PV_MOD | PV_REF)) /*----------------------------------------------------------------*/ /* * Agree with the monitor ROM as to how many MMU entries are * to be reserved, and map all of its segments into all contexts. * * Unfortunately, while the Version 0 PROM had a nice linked list of * taken virtual memory, the Version 2 PROM provides instead a convoluted * description of *free* virtual memory. Rather than invert this, we * resort to two magic constants from the PROM vector description file. */ int mmu_reservemon(nmmu) register int nmmu; { register u_int va, eva; register int mmuseg, i; #if defined(SUN4) if (cputyp==CPU_SUN4) { va = OLDMON_STARTVADDR; eva = OLDMON_ENDVADDR; } #endif #if defined(SUN4C) if (cputyp==CPU_SUN4C) { va = OPENPROM_STARTVADDR; eva = OPENPROM_ENDVADDR; } #endif while (va < eva) { mmuseg = getsegmap(va); if (mmuseg < nmmu) nmmu = mmuseg; for (i = ncontext; --i > 0;) (*promvec->pv_setctxt)(i, (caddr_t)va, mmuseg); if (mmuseg == seginval) { va += NBPSG; continue; } /* PROM maps its memory user-accessible: fix it. */ for (i = NPTESG; --i >= 0; va += NBPG) setpte(va, getpte(va) | PG_S); } return (nmmu); } /* * TODO: agree with the ROM on physical pages by taking them away * from the page list, rather than having a dinky BTSIZE above. */ /*----------------------------------------------------------------*/ /* * MMU management. */ /* * Change contexts. We need the old context number as well as the new * one. If the context is changing, we must write all user windows * first, lest an interrupt cause them to be written to the (other) * user whose context we set here. */ #define CHANGE_CONTEXTS(old, new) \ if ((old) != (new)) { \ write_user_windows(); \ setcontext(new); \ } /* * Allocate an MMU entry (i.e., a PMEG). * If necessary, steal one from someone else. * Put it on the tail of the given queue * (which is either the LRU list or the locked list). * The locked list is not actually ordered, but this is easiest. * Also put it on the given (new) pmap's chain, * enter its pmeg number into that pmap's segmap, * and store the pmeg's new virtual segment number (me->me_vseg). * * This routine is large and complicated, but it must be fast * since it implements the dynamic allocation of MMU entries. */ struct mmuentry * me_alloc(mh, newpm, newvseg) register struct mmuhd *mh; register struct pmap *newpm; register int newvseg; { register struct mmuentry *me; register struct pmap *pm; register int i, va, pa, *pte, tpte; int ctx; /* try free list first */ if ((me = me_freelist) != NULL) { me_freelist = me->me_next; #ifdef DEBUG if (me->me_pmap != NULL) panic("me_alloc: freelist entry has pmap"); if (pmapdebug & PDB_MMU_ALLOC) printf("me_alloc: got pmeg %x\n", me->me_pmeg); #endif insque(me, mh->mh_prev); /* onto end of queue */ /* onto on pmap chain; pmap is already locked, if needed */ me->me_pmforw = NULL; me->me_pmback = newpm->pm_mmuback; *newpm->pm_mmuback = me; newpm->pm_mmuback = &me->me_pmforw; /* into pmap segment table, with backpointers */ newpm->pm_segmap[newvseg] = me->me_pmeg; me->me_pmap = newpm; me->me_vseg = newvseg; return (me); } /* no luck, take head of LRU list */ if ((me = me_lru.mh_next) == (struct mmuentry *)&me_lru) panic("me_alloc: all pmegs gone"); pm = me->me_pmap; if (pm == NULL) panic("me_alloc: LRU entry has no pmap"); if (pm == kernel_pmap) panic("me_alloc: stealing from kernel"); pte = pm->pm_pte[me->me_vseg]; if (pte == NULL) panic("me_alloc: LRU entry's pmap has no ptes"); #ifdef DEBUG if (pmapdebug & (PDB_MMU_ALLOC | PDB_MMU_STEAL)) printf("me_alloc: stealing pmeg %x from pmap %x\n", me->me_pmeg, pm); #endif /* * Remove from LRU list, and insert at end of new list * (probably the LRU list again, but so what?). */ remque(me); insque(me, mh->mh_prev); /* * The PMEG must be mapped into some context so that we can * read its PTEs. Use its current context if it has one; * if not, and since context 0 is reserved for the kernel, * the simplest method is to switch to 0 and map the PMEG * to virtual address 0---which, being a user space address, * is by definition not in use. * * XXX for ncpus>1 must use per-cpu VA? * XXX do not have to flush cache immediately */ ctx = getcontext(); if (pm->pm_ctx) { CHANGE_CONTEXTS(ctx, pm->pm_ctxnum); #ifdef notdef if (vactype != VAC_NONE) #endif cache_flush_segment(me->me_vseg); va = VSTOVA(me->me_vseg); } else { CHANGE_CONTEXTS(ctx, 0); setsegmap(0, me->me_pmeg); /* * No cache flush needed: it happened earlier when * the old context was taken. */ va = 0; } /* * Record reference and modify bits for each page, * and copy PTEs into kernel memory so that they can * be reloaded later. */ i = NPTESG; do { tpte = getpte(va); if (tpte & PG_V) { pa = ptoa(HWTOSW(tpte & PG_PFNUM)); if (managed(pa)) pvhead(pa)->pv_flags |= MR(tpte); } *pte++ = tpte & ~(PG_U|PG_M); va += NBPG; } while (--i > 0); /* update segment tables */ simple_lock(&pm->pm_lock); /* what if other cpu takes mmuentry ?? */ if (pm->pm_ctx) setsegmap(VSTOVA(me->me_vseg), seginval); pm->pm_segmap[me->me_vseg] = seginval; /* off old pmap chain */ if ((*me->me_pmback = me->me_pmforw) != NULL) { me->me_pmforw->me_pmback = me->me_pmback; me->me_pmforw = NULL; } else pm->pm_mmuback = me->me_pmback; simple_unlock(&pm->pm_lock); setcontext(ctx); /* done with old context */ /* onto new pmap chain; new pmap is already locked, if needed */ /* me->me_pmforw = NULL; */ /* done earlier */ me->me_pmback = newpm->pm_mmuback; *newpm->pm_mmuback = me; newpm->pm_mmuback = &me->me_pmforw; /* into new segment table, with backpointers */ newpm->pm_segmap[newvseg] = me->me_pmeg; me->me_pmap = newpm; me->me_vseg = newvseg; return (me); } /* * Free an MMU entry. * * Assumes the corresponding pmap is already locked. * Does NOT flush cache, but does record ref and mod bits. * The rest of each PTE is discarded. * CALLER MUST SET CONTEXT to pm->pm_ctxnum (if pmap has * a context) or to 0 (if not). Caller must also update * pm->pm_segmap and (possibly) the hardware. */ void me_free(pm, pmeg) register struct pmap *pm; register u_int pmeg; { register struct mmuentry *me = &mmuentry[pmeg]; register int i, va, pa, tpte; #ifdef DEBUG if (pmapdebug & PDB_MMU_ALLOC) printf("me_free: freeing pmeg %x from pmap %x\n", me->me_pmeg, pm); if (me->me_pmeg != pmeg) panic("me_free: wrong mmuentry"); if (pm != me->me_pmap) panic("me_free: pm != me_pmap"); #endif /* just like me_alloc, but no cache flush, and context already set */ if (pm->pm_ctx) va = VSTOVA(me->me_vseg); else { setsegmap(0, me->me_pmeg); va = 0; } i = NPTESG; do { tpte = getpte(va); if (tpte & PG_V) { pa = ptoa(HWTOSW(tpte & PG_PFNUM)); if (managed(pa)) pvhead(pa)->pv_flags |= MR(tpte); } va += NBPG; } while (--i > 0); /* take mmu entry off pmap chain */ *me->me_pmback = me->me_pmforw; if ((*me->me_pmback = me->me_pmforw) != NULL) me->me_pmforw->me_pmback = me->me_pmback; else pm->pm_mmuback = me->me_pmback; /* ... and remove from segment map */ pm->pm_segmap[me->me_vseg] = seginval; /* off LRU or lock chain */ remque(me); /* no associated pmap; on free list */ me->me_pmap = NULL; me->me_next = me_freelist; me_freelist = me; } /* * `Page in' (load or inspect) an MMU entry; called on page faults. * Returns 1 if we reloaded the segment, -1 if the segment was * already loaded and the page was marked valid (in which case the * fault must be a bus error or something), or 0 (segment loaded but * PTE not valid, or segment not loaded at all). */ int mmu_pagein(pm, va, bits) register struct pmap *pm; register int va, bits; { register int *pte; register struct mmuentry *me; register int vseg = VA_VSEG(va), pmeg, i, s; /* return 0 if we have no PTEs to load */ if ((pte = pm->pm_pte[vseg]) == NULL) return (0); /* return -1 if the fault is `hard', 0 if not */ if (pm->pm_segmap[vseg] != seginval) return (bits && (getpte(va) & bits) == bits ? -1 : 0); /* reload segment: write PTEs into a new LRU entry */ va = VA_ROUNDDOWNTOSEG(va); s = splpmap(); /* paranoid */ pmeg = me_alloc(&me_lru, pm, vseg)->me_pmeg; setsegmap(va, pmeg); i = NPTESG; do { setpte(va, *pte++); va += NBPG; } while (--i > 0); splx(s); return (1); } /* * Allocate a context. If necessary, steal one from someone else. * Changes hardware context number and loads segment map. * * This routine is only ever called from locore.s just after it has * saved away the previous process, so there are no active user windows. */ void ctx_alloc(pm) register struct pmap *pm; { register union ctxinfo *c; register int cnum, i; register pmeg_t *segp; register int gap_start, gap_end; register unsigned long va; #ifdef DEBUG if (pm->pm_ctx) panic("ctx_alloc pm_ctx"); if (pmapdebug & PDB_CTX_ALLOC) printf("ctx_alloc(%x)\n", pm); #endif gap_start = pm->pm_gap_start; gap_end = pm->pm_gap_end; if ((c = ctx_freelist) != NULL) { ctx_freelist = c->c_nextfree; cnum = c - ctxinfo; setcontext(cnum); } else { if ((ctx_kick += ctx_kickdir) >= ncontext) { ctx_kick = ncontext - 1; ctx_kickdir = -1; } else if (ctx_kick < 1) { ctx_kick = 1; ctx_kickdir = 1; } c = &ctxinfo[cnum = ctx_kick]; #ifdef DEBUG if (c->c_pmap == NULL) panic("ctx_alloc cu_pmap"); if (pmapdebug & (PDB_CTX_ALLOC | PDB_CTX_STEAL)) printf("ctx_alloc: steal context %x from %x\n", cnum, c->c_pmap); #endif c->c_pmap->pm_ctx = NULL; setcontext(cnum); #ifdef notdef if (vactype != VAC_NONE) #endif cache_flush_context(); if (gap_start < c->c_pmap->pm_gap_start) gap_start = c->c_pmap->pm_gap_start; if (gap_end > c->c_pmap->pm_gap_end) gap_end = c->c_pmap->pm_gap_end; } c->c_pmap = pm; pm->pm_ctx = c; pm->pm_ctxnum = cnum; /* * Write pmap's segment table into the MMU. * * Only write those pmeg numbers that seems interesting by * maintaining a pair of segment pointers in between the pmap * has no valid mappings. * * If a context was just allocated from the free list, trust that * all its pmeg numbers are `seginval'. We make sure this is the * case initially in pmap_bootstrap(). Otherwise, the context was * freed by calling ctx_free() in pmap_release(), which in turn is * supposedly called only when all mappings have been removed. * * On the other hand, if the context had to be stolen from another * pmap, we possibly shrink the gap to be the disjuction of the new * and the previous map. */ segp = pm->pm_segmap; for (va = 0, i = NUSEG; --i >= 0; va += NBPSG) { if (VA_VSEG(va) >= gap_start) { va = VSTOVA(gap_end); i -= gap_end - gap_start; segp += gap_end - gap_start; if (i < 0) break; gap_start = NUSEG; /* mustn't re-enter this branch */ } setsegmap(va, *segp++); } } /* * Give away a context. Flushes cache and sets current context to 0. */ void ctx_free(pm) struct pmap *pm; { register union ctxinfo *c; register int newc, oldc; if ((c = pm->pm_ctx) == NULL) panic("ctx_free"); pm->pm_ctx = NULL; oldc = getcontext(); if (vactype != VAC_NONE) { newc = pm->pm_ctxnum; CHANGE_CONTEXTS(oldc, newc); cache_flush_context(); setcontext(0); } else { CHANGE_CONTEXTS(oldc, 0); } c->c_nextfree = ctx_freelist; ctx_freelist = c; } /*----------------------------------------------------------------*/ /* * pvlist functions. */ /* * Walk the given pv list, and for each PTE, set or clear some bits * (e.g., PG_W or PG_NC). * * As a special case, this never clears PG_W on `pager' pages. * These, being kernel addresses, are always in hardware and have * a context. * * This routine flushes the cache for any page whose PTE changes, * as long as the process has a context; this is overly conservative. * It also copies ref and mod bits to the pvlist, on the theory that * this might save work later. (XXX should test this theory) */ void pv_changepte(pv0, bis, bic) register struct pvlist *pv0; register int bis, bic; { register int *pte; register struct pvlist *pv; register struct pmap *pm; register int va, vseg, pmeg, i, flags; int ctx, s; write_user_windows(); /* paranoid? */ s = splpmap(); /* paranoid? */ if (pv0->pv_pmap == NULL) { splx(s); return; } ctx = getcontext(); flags = pv0->pv_flags; for (pv = pv0; pv != NULL; pv = pv->pv_next) { pm = pv->pv_pmap; if(pm==NULL)panic("pv_changepte 1"); va = pv->pv_va; vseg = VA_VSEG(va); pte = pm->pm_pte[vseg]; if ((pmeg = pm->pm_segmap[vseg]) != seginval) { register int tpte; /* in hardware: fix hardware copy */ if (pm->pm_ctx) { extern vm_offset_t pager_sva, pager_eva; /* * Bizarreness: we never clear PG_W on * pager pages, nor PG_NC on DVMA pages. */ if (bic == PG_W && va >= pager_sva && va < pager_eva) continue; if (bic == PG_NC && va >= DVMA_BASE && va < DVMA_END) continue; setcontext(pm->pm_ctxnum); /* XXX should flush only when necessary */ tpte = getpte(va); #ifdef notdef if (vactype != VAC_NONE) #endif if (tpte & PG_M) cache_flush_page(va); } else { /* XXX per-cpu va? */ setcontext(0); setsegmap(0, pmeg); va = VA_VPG(va) << PGSHIFT; tpte = getpte(va); } if (tpte & PG_V) flags |= (tpte >> PG_M_SHIFT) & (PV_MOD|PV_REF); tpte = (tpte | bis) & ~bic; setpte(va, tpte); if (pte != NULL) /* update software copy */ pte[VA_VPG(va)] = tpte; } else { /* not in hardware: just fix software copy */ if (pte == NULL) panic("pv_changepte 2"); pte += VA_VPG(va); *pte = (*pte | bis) & ~bic; } } pv0->pv_flags = flags; setcontext(ctx); splx(s); } /* * Sync ref and mod bits in pvlist (turns off same in hardware PTEs). * Returns the new flags. * * This is just like pv_changepte, but we never add or remove bits, * hence never need to adjust software copies. */ int pv_syncflags(pv0) register struct pvlist *pv0; { register struct pvlist *pv; register struct pmap *pm; register int tpte, va, vseg, pmeg, i, flags; int ctx, s; write_user_windows(); /* paranoid? */ s = splpmap(); /* paranoid? */ if (pv0->pv_pmap == NULL) { /* paranoid */ splx(s); return (0); } ctx = getcontext(); flags = pv0->pv_flags; for (pv = pv0; pv != NULL; pv = pv->pv_next) { pm = pv->pv_pmap; va = pv->pv_va; vseg = VA_VSEG(va); if ((pmeg = pm->pm_segmap[vseg]) == seginval) continue; if (pm->pm_ctx) { setcontext(pm->pm_ctxnum); /* XXX should flush only when necessary */ tpte = getpte(va); #ifdef notdef if (vactype != VAC_NONE) #endif if (tpte & PG_M) cache_flush_page(va); } else { /* XXX per-cpu va? */ setcontext(0); setsegmap(0, pmeg); va = VA_VPG(va) << PGSHIFT; tpte = getpte(va); } if (tpte & (PG_M|PG_U) && tpte & PG_V) { flags |= (tpte >> PG_M_SHIFT) & (PV_MOD|PV_REF); tpte &= ~(PG_M|PG_U); setpte(va, tpte); } } pv0->pv_flags = flags; setcontext(ctx); splx(s); return (flags); } /* * pv_unlink is a helper function for pmap_remove. * It takes a pointer to the pv_table head for some physical address * and removes the appropriate (pmap, va) entry. * * Once the entry is removed, if the pv_table head has the cache * inhibit bit set, see if we can turn that off; if so, walk the * pvlist and turn off PG_NC in each PTE. (The pvlist is by * definition nonempty, since it must have at least two elements * in it to have PV_NC set, and we only remove one here.) */ static void pv_unlink(pv, pm, va) register struct pvlist *pv; register struct pmap *pm; register vm_offset_t va; { register struct pvlist *npv; #ifdef DIAGNOSTIC if (pv->pv_pmap == NULL) panic("pv_unlink0"); #endif /* * First entry is special (sigh). */ npv = pv->pv_next; if (pv->pv_pmap == pm && pv->pv_va == va) { pmap_stats.ps_unlink_pvfirst++; if (npv != NULL) { pv->pv_next = npv->pv_next; pv->pv_pmap = npv->pv_pmap; pv->pv_va = npv->pv_va; free((caddr_t)npv, M_VMPVENT); } else pv->pv_pmap = NULL; } else { register struct pvlist *prev; for (prev = pv;; prev = npv, npv = npv->pv_next) { pmap_stats.ps_unlink_pvsearch++; if (npv == NULL) panic("pv_unlink"); if (npv->pv_pmap == pm && npv->pv_va == va) break; } prev->pv_next = npv->pv_next; free((caddr_t)npv, M_VMPVENT); } if (pv->pv_flags & PV_NC) { /* * Not cached: check to see if we can fix that now. */ va = pv->pv_va; for (npv = pv->pv_next; npv != NULL; npv = npv->pv_next) if (BADALIAS(va, npv->pv_va)) return; pv->pv_flags &= ~PV_NC; pv_changepte(pv, 0, PG_NC); } } /* * pv_link is the inverse of pv_unlink, and is used in pmap_enter. * It returns PG_NC if the (new) pvlist says that the address cannot * be cached. */ static int pv_link(pv, pm, va) register struct pvlist *pv; register struct pmap *pm; register vm_offset_t va; { register struct pvlist *npv; register int ret; if (pv->pv_pmap == NULL) { /* no pvlist entries yet */ pmap_stats.ps_enter_firstpv++; pv->pv_next = NULL; pv->pv_pmap = pm; pv->pv_va = va; return (0); } /* * Before entering the new mapping, see if * it will cause old mappings to become aliased * and thus need to be `discached'. */ ret = 0; pmap_stats.ps_enter_secondpv++; if (pv->pv_flags & PV_NC) { /* already uncached, just stay that way */ ret = PG_NC; } else { /* MAY NEED TO DISCACHE ANYWAY IF va IS IN DVMA SPACE? */ for (npv = pv; npv != NULL; npv = npv->pv_next) { if (BADALIAS(va, npv->pv_va)) { pv->pv_flags |= PV_NC; pv_changepte(pv, ret = PG_NC, 0); break; } } } npv = (struct pvlist *)malloc(sizeof *npv, M_VMPVENT, M_WAITOK); npv->pv_next = pv->pv_next; npv->pv_pmap = pm; npv->pv_va = va; pv->pv_next = npv; return (ret); } /* * Walk the given list and flush the cache for each (MI) page that is * potentially in the cache. */ pv_flushcache(pv) register struct pvlist *pv; { register struct pmap *pm; register int i, s, ctx; write_user_windows(); /* paranoia? */ s = splpmap(); /* XXX extreme paranoia */ if ((pm = pv->pv_pmap) != NULL) { ctx = getcontext(); for (;;) { if (pm->pm_ctx) { setcontext(pm->pm_ctxnum); cache_flush_page(pv->pv_va); } pv = pv->pv_next; if (pv == NULL) break; pm = pv->pv_pmap; } setcontext(ctx); } splx(s); } /*----------------------------------------------------------------*/ /* * At last, pmap code. */ #if defined(SUN4) && defined(SUN4C) int nptesg; #endif /* * Bootstrap the system enough to run with VM enabled. * * nmmu is the number of mmu entries (``PMEGs''); * nctx is the number of contexts. */ void pmap_bootstrap(nmmu, nctx) int nmmu, nctx; { register union ctxinfo *ci; register struct mmuentry *me; register int i, j, n, z, vs; register caddr_t p; register void (*rom_setmap)(int ctx, caddr_t va, int pmeg); int lastpage; extern char end[]; #ifdef DDB extern char *esym; char *theend = end; #endif extern caddr_t reserve_dumppages(caddr_t); cnt.v_page_size = NBPG; vm_set_page_size(); kernel_pmap = (pmap_t)&kernel_pmap_store; ncontext = nctx; #if defined(SUN4) && defined(SUN4C) /* In this case NPTESG is not a #define */ nptesg = (NBPSG >> pgshift); #endif /* * Last segment is the `invalid' one (one PMEG of pte's with !pg_v). * It will never be used for anything else. */ seginval = --nmmu; /* * Preserve the monitor ROM's reserved VM region, so that * we can use L1-A or the monitor's debugger. As a side * effect we map the ROM's reserved VM into all contexts * (otherwise L1-A crashes the machine!). */ nmmu = mmu_reservemon(nmmu); /* * Allocate and clear mmu entry and context structures. */ p = end; #ifdef DDB if (esym != 0) theend = p = esym; #endif mmuentry = me = (struct mmuentry *)p; p += nmmu * sizeof *me; ctxinfo = ci = (union ctxinfo *)p; p += nctx * sizeof *ci; #ifdef DDB bzero(theend, p - theend); #else bzero(end, p - end); #endif /* * Set up the `constants' for the call to vm_init() * in main(). All pages beginning at p (rounded up to * the next whole page) and continuing through the number * of available pages are free, but they start at a higher * virtual address. This gives us two mappable MD pages * for pmap_zero_page and pmap_copy_page, and one MI page * for /dev/mem, all with no associated physical memory. */ p = (caddr_t)(((u_int)p + NBPG - 1) & ~PGOFSET); avail_start = (int)p - KERNBASE; avail_end = init_translations() << PGSHIFT; i = (int)p; vpage[0] = p, p += NBPG; vpage[1] = p, p += NBPG; vmempage = p, p += NBPG; p = reserve_dumppages(p); virtual_avail = (vm_offset_t)p; virtual_end = VM_MAX_KERNEL_ADDRESS; p = (caddr_t)i; /* retract to first free phys */ /* * Intialize the kernel pmap. */ { register struct pmap *k = kernel_pmap; k->pm_ctx = ctxinfo; /* k->pm_ctxnum = 0; */ simple_lock_init(&k->pm_lock); k->pm_refcount = 1; /* k->pm_mmuforw = 0; */ k->pm_mmuback = &k->pm_mmuforw; k->pm_segmap = &kernel_segmap_store.ks_segmap[-NUSEG]; k->pm_pte = &kernel_segmap_store.ks_pte[-NUSEG]; k->pm_npte = &kernel_segmap_store.ks_npte[-NUSEG]; for (i = NKSEG; --i >= 0;) kernel_segmap_store.ks_segmap[i] = seginval; } /* * All contexts are free except the kernel's. * * XXX sun4c could use context 0 for users? */ ci->c_pmap = kernel_pmap; ctx_freelist = ci + 1; for (i = 1; i < ncontext; i++) { ci++; ci->c_nextfree = ci + 1; } ci->c_nextfree = NULL; ctx_kick = 0; ctx_kickdir = -1; /* me_freelist = NULL; */ /* already NULL */ /* * Init mmu entries that map the kernel physical addresses. * If the page bits in p are 0, we filled the last segment * exactly (now how did that happen?); if not, it is * the last page filled in the last segment. * * All the other MMU entries are free. * * THIS ASSUMES SEGMENT i IS MAPPED BY MMU ENTRY i DURING THE * BOOT PROCESS */ z = ((((u_int)p + NBPSG - 1) & ~SGOFSET) - KERNBASE) >> SGSHIFT; lastpage = VA_VPG(p); if (lastpage == 0) lastpage = NPTESG; p = (caddr_t)KERNBASE; /* first va */ vs = VA_VSEG(KERNBASE); /* first virtual segment */ rom_setmap = promvec->pv_setctxt; for (i = 0;;) { /* * Distribute each kernel segment into all contexts. * This is done through the monitor ROM, rather than * directly here: if we do a setcontext we will fault, * as we are not (yet) mapped in any other context. */ for (j = 1; j < nctx; j++) rom_setmap(j, p, i); /* set up the mmu entry */ me->me_pmeg = i; insque(me, me_locked.mh_prev); /* me->me_pmforw = NULL; */ me->me_pmback = kernel_pmap->pm_mmuback; *kernel_pmap->pm_mmuback = me; kernel_pmap->pm_mmuback = &me->me_pmforw; me->me_pmap = kernel_pmap; me->me_vseg = vs; kernel_pmap->pm_segmap[vs] = i; n = ++i < z ? NPTESG : lastpage; kernel_pmap->pm_npte[vs] = n; me++; vs++; if (i < z) { p += NBPSG; continue; } /* * Unmap the pages, if any, that are not part of * the final segment. */ for (p += n << PGSHIFT; j < NPTESG; j++, p += NBPG) setpte(p, 0); break; } for (; i < nmmu; i++, me++) { me->me_pmeg = i; me->me_next = me_freelist; /* me->me_pmap = NULL; */ me_freelist = me; } /* Erase all spurious user-space segmaps */ for (i = 1; i < ncontext; i++) { setcontext(i); for (p = 0, j = NUSEG; --j >= 0; p += NBPSG) setsegmap(p, seginval); } setcontext(0); /* * write protect & encache kernel text; * set red zone at kernel base; enable cache on message buffer. */ { extern char etext[], msgbuf[]; #ifdef KGDB register int mask = ~PG_NC; /* XXX chgkprot is busted */ #else register int mask = ~(PG_W | PG_NC); #endif for (p = (caddr_t)roundup((int)msgbuf+1, NBPG); p < etext; p += NBPG) setpte(p, getpte(p) & mask); p = (caddr_t)KERNBASE; setpte(p, 0); p += NBPG; setpte(p, getpte(p) & ~PG_NC); } /* * Grab physical memory list (for /dev/mem). */ npmemarr = makememarr(pmemarr, MA_SIZE, MEMARR_TOTALPHYS); } /* * Bootstrap memory allocator. This function allows for early dynamic * memory allocation until the virtual memory system has been bootstrapped. * After that point, either kmem_alloc or malloc should be used. This * function works by stealing pages from the (to be) managed page pool, * stealing virtual address space, then mapping the pages and zeroing them. * * It should be used from pmap_bootstrap till vm_page_startup, afterwards * it cannot be used, and will generate a panic if tried. Note that this * memory will never be freed, and in essence it is wired down. */ void * pmap_bootstrap_alloc(size) int size; { register void *mem; size = round_page(size); mem = (void *)virtual_avail; virtual_avail = pmap_map(virtual_avail, avail_start, avail_start + size, VM_PROT_READ|VM_PROT_WRITE); avail_start += size; bzero((void *)mem, size); return (mem); } /* * Initialize the pmap module. */ void pmap_init(phys_start, phys_end) register vm_offset_t phys_start, phys_end; { register vm_size_t s; if (PAGE_SIZE != NBPG) panic("pmap_init: CLSIZE!=1"); /* * Allocate and clear memory for the pv_table. */ s = sizeof(struct pvlist) * atop(phys_end - phys_start); s = round_page(s); pv_table = (struct pvlist *)kmem_alloc(kernel_map, s); bzero((caddr_t)pv_table, s); vm_first_phys = phys_start; vm_num_phys = phys_end - phys_start; } /* * Map physical addresses into kernel VM. */ vm_offset_t pmap_map(va, pa, endpa, prot) register vm_offset_t va, pa, endpa; register int prot; { register int pgsize = PAGE_SIZE; while (pa < endpa) { pmap_enter(kernel_pmap, va, pa, prot, 1); va += pgsize; pa += pgsize; } return (va); } /* * Create and return a physical map. * * If size is nonzero, the map is useless. (ick) */ struct pmap * pmap_create(size) vm_size_t size; { register struct pmap *pm; if (size) return (NULL); pm = (struct pmap *)malloc(sizeof *pm, M_VMPMAP, M_WAITOK); #ifdef DEBUG if (pmapdebug & PDB_CREATE) printf("pmap_create: created %x\n", pm); #endif bzero((caddr_t)pm, sizeof *pm); pmap_pinit(pm); return (pm); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ void pmap_pinit(pm) register struct pmap *pm; { register int i; register struct usegmap *usp; #ifdef DEBUG if (pmapdebug & PDB_CREATE) printf("pmap_pinit(%x)\n", pm); #endif usp = malloc(sizeof(struct usegmap), M_VMPMAP, M_WAITOK); bzero((caddr_t)usp, sizeof (struct usegmap)); pm->pm_segstore = usp; /* pm->pm_ctx = NULL; */ simple_lock_init(&pm->pm_lock); pm->pm_refcount = 1; /* pm->pm_mmuforw = NULL; */ pm->pm_mmuback = &pm->pm_mmuforw; pm->pm_segmap = usp->us_segmap; pm->pm_pte = usp->us_pte; pm->pm_npte = usp->us_npte; for (i = NUSEG; --i >= 0;) usp->us_segmap[i] = seginval; /*bzero((caddr_t)usp->us_pte, sizeof usp->us_pte);*/ /*bzero((caddr_t)usp->us_npte, sizeof usp->us_npte);*/ pm->pm_gap_end = VA_VSEG(VM_MAXUSER_ADDRESS); } /* * Retire the given pmap from service. * Should only be called if the map contains no valid mappings. */ void pmap_destroy(pm) register struct pmap *pm; { int count; if (pm == NULL) return; #ifdef DEBUG if (pmapdebug & PDB_DESTROY) printf("pmap_destroy(%x)\n", pm); #endif simple_lock(&pm->pm_lock); count = --pm->pm_refcount; simple_unlock(&pm->pm_lock); if (count == 0) { pmap_release(pm); free((caddr_t)pm, M_VMPMAP); } } /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. */ void pmap_release(pm) register struct pmap *pm; { register union ctxinfo *c; register int s = splpmap(); /* paranoia */ #ifdef DEBUG if (pmapdebug & PDB_DESTROY) printf("pmap_release(%x)\n", pm); #endif if (pm->pm_mmuforw) panic("pmap_release mmuforw"); if ((c = pm->pm_ctx) != NULL) { if (pm->pm_ctxnum == 0) panic("pmap_release: releasing kernel"); ctx_free(pm); } splx(s); if (pm->pm_segstore) free((caddr_t)pm->pm_segstore, M_VMPMAP); } /* * Add a reference to the given pmap. */ void pmap_reference(pm) struct pmap *pm; { if (pm != NULL) { simple_lock(&pm->pm_lock); pm->pm_refcount++; simple_unlock(&pm->pm_lock); } } static int pmap_rmk(struct pmap *, vm_offset_t, vm_offset_t, int, int, int); static int pmap_rmu(struct pmap *, vm_offset_t, vm_offset_t, int, int, int); /* * Remove the given range of mapping entries. * The starting and ending addresses are already rounded to pages. * Sheer lunacy: pmap_remove is often asked to remove nonexistent * mappings. */ void pmap_remove(pm, va, endva) register struct pmap *pm; register vm_offset_t va, endva; { register vm_offset_t nva; register int vseg, nleft, s, ctx; register int (*rm)(struct pmap *, vm_offset_t, vm_offset_t, int, int, int); if (pm == NULL) return; #ifdef DEBUG if (pmapdebug & PDB_REMOVE) printf("pmap_remove(%x, %x, %x)\n", pm, va, endva); #endif if (pm == kernel_pmap) { /* * Removing from kernel address space. */ rm = pmap_rmk; } else { /* * Removing from user address space. */ write_user_windows(); rm = pmap_rmu; } ctx = getcontext(); s = splpmap(); /* XXX conservative */ simple_lock(&pm->pm_lock); for (; va < endva; va = nva) { /* do one virtual segment at a time */ vseg = VA_VSEG(va); nva = VSTOVA(vseg + 1); if (nva == 0 || nva > endva) nva = endva; if ((nleft = pm->pm_npte[vseg]) != 0) pm->pm_npte[vseg] = (*rm)(pm, va, nva, vseg, nleft, pm->pm_segmap[vseg]); } simple_unlock(&pm->pm_lock); splx(s); setcontext(ctx); } /* * The following magic number was chosen because: * 1. It is the same amount of work to cache_flush_page 4 pages * as to cache_flush_segment 1 segment (so at 4 the cost of * flush is the same). * 2. Flushing extra pages is bad (causes cache not to work). * 3. The current code, which malloc()s 5 pages for each process * for a user vmspace/pmap, almost never touches all 5 of those * pages. */ #if 0 #define PMAP_RMK_MAGIC (cacheinfo.c_hwflush?5:64) /* if > magic, use cache_flush_segment */ #else #define PMAP_RMK_MAGIC 5 /* if > magic, use cache_flush_segment */ #endif /* * Remove a range contained within a single segment. * These are egregiously complicated routines. */ /* remove from kernel, return new nleft */ static int pmap_rmk(pm, va, endva, vseg, nleft, pmeg) register struct pmap *pm; register vm_offset_t va, endva; register int vseg, nleft, pmeg; { register int i, tpte, perpage, npg; register struct pvlist *pv; #ifdef DEBUG if (pmeg == seginval) panic("pmap_rmk: not loaded"); if (pm->pm_ctx == NULL) panic("pmap_rmk: lost context"); #endif setcontext(0); /* decide how to flush cache */ npg = (endva - va) >> PGSHIFT; if (npg > PMAP_RMK_MAGIC) { /* flush the whole segment */ perpage = 0; #ifdef notdef if (vactype != VAC_NONE) #endif cache_flush_segment(vseg); } else { /* flush each page individually; some never need flushing */ perpage = 1; } while (va < endva) { tpte = getpte(va); if ((tpte & PG_V) == 0) { va += PAGE_SIZE; continue; } pv = NULL; /* if cacheable, flush page as needed */ if ((tpte & PG_NC) == 0) { if (perpage) cache_flush_page(va); } if ((tpte & PG_TYPE) == PG_OBMEM) { i = ptoa(HWTOSW(tpte & PG_PFNUM)); if (managed(i)) { pv = pvhead(i); pv->pv_flags |= MR(tpte); pv_unlink(pv, pm, va); } } nleft--; setpte(va, 0); va += NBPG; } /* * If the segment is all gone, remove it from everyone and * free the MMU entry. */ if (nleft == 0) { va = VSTOVA(vseg); /* retract */ setsegmap(va, seginval); for (i = ncontext; --i > 0;) { setcontext(i); setsegmap(va, seginval); } me_free(pm, pmeg); } return (nleft); } /* * Just like pmap_rmk_magic, but we have a different threshold. * Note that this may well deserve further tuning work. */ #if 0 #define PMAP_RMU_MAGIC (cacheinfo.c_hwflush?4:64) /* if > magic, use cache_flush_segment */ #else #define PMAP_RMU_MAGIC 4 /* if > magic, use cache_flush_segment */ #endif /* remove from user */ static int pmap_rmu(pm, va, endva, vseg, nleft, pmeg) register struct pmap *pm; register vm_offset_t va, endva; register int vseg, nleft, pmeg; { register int *pte0, i, pteva, tpte, perpage, npg; register struct pvlist *pv; pte0 = pm->pm_pte[vseg]; if (pmeg == seginval) { register int *pte = pte0 + VA_VPG(va); /* * PTEs are not in MMU. Just invalidate software copies. */ for (; va < endva; pte++, va += PAGE_SIZE) { tpte = *pte; if ((tpte & PG_V) == 0) { /* nothing to remove (braindead VM layer) */ continue; } if ((tpte & PG_TYPE) == PG_OBMEM) { i = ptoa(HWTOSW(tpte & PG_PFNUM)); if (managed(i)) pv_unlink(pvhead(i), pm, va); } nleft--; *pte = 0; } if (nleft == 0) { free((caddr_t)pte0, M_VMPMAP); pm->pm_pte[vseg] = NULL; } return (nleft); } /* * PTEs are in MMU. Invalidate in hardware, update ref & * mod bits, and flush cache if required. */ if (pm->pm_ctx) { /* process has a context, must flush cache */ npg = (endva - va) >> PGSHIFT; setcontext(pm->pm_ctxnum); if (npg > PMAP_RMU_MAGIC) { perpage = 0; /* flush the whole segment */ #ifdef notdef if (vactype != VAC_NONE) #endif cache_flush_segment(vseg); } else perpage = 1; pteva = va; } else { /* no context, use context 0; cache flush unnecessary */ setcontext(0); /* XXX use per-cpu pteva? */ setsegmap(0, pmeg); pteva = VA_VPG(va) << PGSHIFT; perpage = 0; } for (; va < endva; pteva += PAGE_SIZE, va += PAGE_SIZE) { tpte = getpte(pteva); if ((tpte & PG_V) == 0) continue; pv = NULL; /* if cacheable, flush page as needed */ if ((tpte & PG_NC) == 0) { if (perpage) cache_flush_page(va); } if ((tpte & PG_TYPE) == PG_OBMEM) { i = ptoa(HWTOSW(tpte & PG_PFNUM)); if (managed(i)) { pv = pvhead(i); pv->pv_flags |= MR(tpte); pv_unlink(pv, pm, va); } } nleft--; setpte(pteva, 0); } /* * If the segment is all gone, and the context is loaded, give * the segment back. */ if (nleft == 0 && pm->pm_ctx != NULL) { va = VSTOVA(vseg); /* retract */ setsegmap(va, seginval); free((caddr_t)pte0, M_VMPMAP); pm->pm_pte[vseg] = NULL; me_free(pm, pmeg); if (vseg + 1 == pm->pm_gap_start) pm->pm_gap_start = vseg; if (vseg == pm->pm_gap_end) pm->pm_gap_end = vseg + 1; } return (nleft); } /* * Lower (make more strict) the protection on the specified * physical page. * * There are only two cases: either the protection is going to 0 * (in which case we do the dirty work here), or it is going from * to read-only (in which case pv_changepte does the trick). */ void pmap_page_protect(pa, prot) vm_offset_t pa; vm_prot_t prot; { register struct pvlist *pv, *pv0, *npv; register struct pmap *pm; register int *pte; register int va, vseg, pteva, tpte; register int flags, nleft, i, pmeg, s, ctx, doflush; #ifdef DEBUG if ((pmapdebug & PDB_CHANGEPROT) || (pmapdebug & PDB_REMOVE && prot == VM_PROT_NONE)) printf("pmap_page_protect(%x, %x)\n", pa, prot); #endif /* * Skip unmanaged pages, or operations that do not take * away write permission. */ if (!managed(pa) || prot & VM_PROT_WRITE) return; write_user_windows(); /* paranoia */ if (prot & VM_PROT_READ) { pv_changepte(pvhead(pa), 0, PG_W); return; } /* * Remove all access to all people talking to this page. * Walk down PV list, removing all mappings. * The logic is much like that for pmap_remove, * but we know we are removing exactly one page. */ pv = pvhead(pa); s = splpmap(); if ((pm = pv->pv_pmap) == NULL) { splx(s); return; } ctx = getcontext(); pv0 = pv; flags = pv->pv_flags & ~PV_NC; for (;; pm = pv->pv_pmap) { va = pv->pv_va; vseg = VA_VSEG(va); if ((nleft = pm->pm_npte[vseg]) == 0) panic("pmap_remove_all: empty vseg"); nleft--; pm->pm_npte[vseg] = nleft; pmeg = pm->pm_segmap[vseg]; pte = pm->pm_pte[vseg]; if (pmeg == seginval) { if (nleft) { pte += VA_VPG(va); *pte = 0; } else { free((caddr_t)pte, M_VMPMAP); pm->pm_pte[vseg] = NULL; } goto nextpv; } if (pm->pm_ctx) { setcontext(pm->pm_ctxnum); pteva = va; #ifdef notdef doflush = vactype != VAC_NONE; #else doflush = 1; #endif } else { setcontext(0); /* XXX use per-cpu pteva? */ setsegmap(0, pmeg); pteva = VA_VPG(va) << PGSHIFT; doflush = 0; } if (nleft) { if (doflush) cache_flush_page(va); tpte = getpte(pteva); if ((tpte & PG_V) == 0) panic("pmap_page_protect !PG_V 1"); flags |= MR(tpte); setpte(pteva, 0); } else { if (doflush) cache_flush_page(va); tpte = getpte(pteva); if ((tpte & PG_V) == 0) panic("pmap_page_protect !PG_V 2"); flags |= MR(tpte); if (pm->pm_ctx) { setsegmap(va, seginval); if (pm == kernel_pmap) { for (i = ncontext; --i > 0;) { setcontext(i); setsegmap(va, seginval); } goto skipptefree; } } free((caddr_t)pte, M_VMPMAP); pm->pm_pte[vseg] = NULL; skipptefree: me_free(pm, pmeg); } nextpv: npv = pv->pv_next; if (pv != pv0) free((caddr_t)pv, M_VMPVENT); if ((pv = npv) == NULL) break; } pv0->pv_pmap = NULL; pv0->pv_next = NULL; /* ? */ pv0->pv_flags = flags; setcontext(ctx); splx(s); } /* * Lower (make more strict) the protection on the specified * range of this pmap. * * There are only two cases: either the protection is going to 0 * (in which case we call pmap_remove to do the dirty work), or * it is going from read/write to read-only. The latter is * fairly easy. */ void pmap_protect(pm, sva, eva, prot) register struct pmap *pm; vm_offset_t sva, eva; vm_prot_t prot; { register int va, nva, vseg, pteva, pmeg; register int s, ctx; if (pm == NULL || prot & VM_PROT_WRITE) return; if ((prot & VM_PROT_READ) == 0) { pmap_remove(pm, sva, eva); return; } write_user_windows(); ctx = getcontext(); s = splpmap(); simple_lock(&pm->pm_lock); for (va = sva; va < eva;) { vseg = VA_VSEG(va); nva = VSTOVA(vseg + 1); if (nva == 0) panic("pmap_protect: last segment"); /* cannot happen */ if (nva > eva) nva = eva; if (pm->pm_npte[vseg] == 0) { va = nva; continue; } pmeg = pm->pm_segmap[vseg]; if (pmeg == seginval) { register int *pte = &pm->pm_pte[vseg][VA_VPG(va)]; /* not in MMU; just clear PG_W from core copies */ for (; va < nva; va += NBPG) *pte++ &= ~PG_W; } else { /* in MMU: take away write bits from MMU PTEs */ if ( #ifdef notdef vactype != VAC_NONE && #endif pm->pm_ctx) { register int tpte; /* * Flush cache so that any existing cache * tags are updated. This is really only * needed for PTEs that lose PG_W. */ setcontext(pm->pm_ctxnum); for (; va < nva; va += NBPG) { tpte = getpte(va); pmap_stats.ps_npg_prot_all++; if (tpte & PG_W) { pmap_stats.ps_npg_prot_actual++; cache_flush_page(va); setpte(va, tpte & ~PG_W); } } } else { register int pteva; /* * No context, hence not cached; * just update PTEs. */ setcontext(0); /* XXX use per-cpu pteva? */ setsegmap(0, pmeg); pteva = VA_VPG(va) << PGSHIFT; for (; va < nva; pteva += NBPG, va += NBPG) setpte(pteva, getpte(pteva) & ~PG_W); } } } simple_unlock(&pm->pm_lock); splx(s); setcontext(ctx); } /* * Change the protection and/or wired status of the given (MI) virtual page. * XXX: should have separate function (or flag) telling whether only wiring * is changing. */ void pmap_changeprot(pm, va, prot, wired) register struct pmap *pm; register vm_offset_t va; vm_prot_t prot; int wired; { register int vseg, tpte, newprot, pmeg, ctx, i, s; #ifdef DEBUG if (pmapdebug & PDB_CHANGEPROT) printf("pmap_changeprot(%x, %x, %x, %x)\n", pm, va, prot, wired); #endif write_user_windows(); /* paranoia */ if (pm == kernel_pmap) newprot = prot & VM_PROT_WRITE ? PG_S|PG_W : PG_S; else newprot = prot & VM_PROT_WRITE ? PG_W : 0; vseg = VA_VSEG(va); s = splpmap(); /* conservative */ pmap_stats.ps_changeprots++; /* update PTEs in software or hardware */ if ((pmeg = pm->pm_segmap[vseg]) == seginval) { register int *pte = &pm->pm_pte[vseg][VA_VPG(va)]; /* update in software */ if ((*pte & PG_PROT) == newprot) goto useless; *pte = (*pte & ~PG_PROT) | newprot; } else { /* update in hardware */ ctx = getcontext(); if (pm->pm_ctx) { /* use current context; flush writeback cache */ setcontext(pm->pm_ctxnum); tpte = getpte(va); if ((tpte & PG_PROT) == newprot) { setcontext(ctx); goto useless; } if (vactype == VAC_WRITEBACK && (tpte & (PG_U | PG_NC)) == PG_U) cache_flush_page((int)va); } else { setcontext(0); /* XXX use per-cpu va? */ setsegmap(0, pmeg); va = VA_VPG(va) << PGSHIFT; tpte = getpte(va); if ((tpte & PG_PROT) == newprot) { setcontext(ctx); goto useless; } } tpte = (tpte & ~PG_PROT) | newprot; setpte(va, tpte); setcontext(ctx); } splx(s); return; useless: /* only wiring changed, and we ignore wiring */ pmap_stats.ps_useless_changeprots++; splx(s); } /* * Insert (MI) physical page pa at virtual address va in the given pmap. * NB: the pa parameter includes type bits PMAP_OBIO, PMAP_NC as necessary. * * If pa is not in the `managed' range it will not be `bank mapped'. * This works during bootstrap only because the first 4MB happens to * map one-to-one. * * There may already be something else there, or we might just be * changing protections and/or wiring on an existing mapping. * XXX should have different entry points for changing! */ void pmap_enter(pm, va, pa, prot, wired) register struct pmap *pm; vm_offset_t va, pa; vm_prot_t prot; int wired; { register struct pvlist *pv; register int pteproto, ctx; if (pm == NULL) return; #ifdef DEBUG if (pmapdebug & PDB_ENTER) printf("pmap_enter(%x, %x, %x, %x, %x)\n", pm, va, pa, prot, wired); #endif pteproto = PG_V | ((pa & PMAP_TNC) << PG_TNC_SHIFT); pa &= ~PMAP_TNC; /* * Set up prototype for new PTE. Cannot set PG_NC from PV_NC yet * since the pvlist no-cache bit might change as a result of the * new mapping. */ if (managed(pa)) { pteproto |= SWTOHW(atop(pa)); pv = pvhead(pa); } else { pteproto |= atop(pa) & PG_PFNUM; pv = NULL; } if (prot & VM_PROT_WRITE) pteproto |= PG_W; ctx = getcontext(); if (pm == kernel_pmap) pmap_enk(pm, va, prot, wired, pv, pteproto | PG_S); else pmap_enu(pm, va, prot, wired, pv, pteproto); setcontext(ctx); } /* enter new (or change existing) kernel mapping */ pmap_enk(pm, va, prot, wired, pv, pteproto) register struct pmap *pm; vm_offset_t va; vm_prot_t prot; int wired; register struct pvlist *pv; register int pteproto; { register int vseg, tpte, pmeg, i, s; vseg = VA_VSEG(va); s = splpmap(); /* XXX way too conservative */ if (pm->pm_segmap[vseg] != seginval && (tpte = getpte(va)) & PG_V) { register int addr = tpte & PG_PFNUM; /* old mapping exists */ if (addr == (pteproto & PG_PFNUM)) { /* just changing protection and/or wiring */ splx(s); pmap_changeprot(pm, va, prot, wired); return; } /*printf("pmap_enk: changing existing va=>pa entry\n");*/ /* * Switcheroo: changing pa for this va. * If old pa was managed, remove from pvlist. * If old page was cached, flush cache. */ addr = ptoa(HWTOSW(addr)); if (managed(addr)) pv_unlink(pvhead(addr), pm, va); if ( #ifdef notdef vactype != VAC_NONE && #endif (tpte & PG_NC) == 0) { setcontext(0); /* ??? */ cache_flush_page((int)va); } } else { /* adding new entry */ pm->pm_npte[vseg]++; } /* * If the new mapping is for a managed PA, enter into pvlist. * Note that the mapping for a malloc page will always be * unique (hence will never cause a second call to malloc). */ if (pv != NULL) pteproto |= pv_link(pv, pm, va); pmeg = pm->pm_segmap[vseg]; if (pmeg == seginval) { register int tva; /* * Allocate an MMU entry now (on locked list), * and map it into every context. Set all its * PTEs invalid (we will then overwrite one, but * this is more efficient than looping twice). */ #ifdef DEBUG if (pm->pm_ctx == NULL || pm->pm_ctxnum != 0) panic("pmap_enk: kern seg but no kern ctx"); #endif pmeg = me_alloc(&me_locked, pm, vseg)->me_pmeg; pm->pm_segmap[vseg] = pmeg; i = ncontext - 1; do { setcontext(i); setsegmap(va, pmeg); } while (--i >= 0); /* set all PTEs to invalid, then overwrite one PTE below */ tva = VA_ROUNDDOWNTOSEG(va); i = NPTESG; do { setpte(tva, 0); tva += NBPG; } while (--i > 0); } /* ptes kept in hardware only */ setpte(va, pteproto); splx(s); } /* enter new (or change existing) user mapping */ pmap_enu(pm, va, prot, wired, pv, pteproto) register struct pmap *pm; vm_offset_t va; vm_prot_t prot; int wired; register struct pvlist *pv; register int pteproto; { register int vseg, *pte, tpte, pmeg, s, doflush; register int x; write_user_windows(); /* XXX conservative */ vseg = VA_VSEG(va); s = splpmap(); /* XXX conservative */ /* * If there is no space in which the PTEs can be written * while they are not in the hardware, this must be a new * virtual segment. Get PTE space and count the segment. * * TO SPEED UP CTX ALLOC, PUT SEGMENT BOUNDS STUFF HERE * AND IN pmap_rmu() */ x = pm->pm_gap_start + (pm->pm_gap_end - pm->pm_gap_start) / 2; if (vseg > x) { if (vseg < pm->pm_gap_end) pm->pm_gap_end = vseg; } else { if (vseg >= pm->pm_gap_start && x != pm->pm_gap_start) pm->pm_gap_start = vseg + 1; } #ifdef DEBUG if (pm->pm_gap_end < pm->pm_gap_start) { printf("pmap_enu: gap_start %x, gap_end %x", pm->pm_gap_start, pm->pm_gap_end); panic("pmap_enu: gap botch"); } #endif retry: pte = pm->pm_pte[vseg]; if (pte == NULL) { /* definitely a new mapping */ register int size = NPTESG * sizeof *pte; pte = (int *)malloc((u_long)size, M_VMPMAP, M_WAITOK); if (pm->pm_pte[vseg] != NULL) { printf("pmap_enter: pte filled during sleep\n"); /* can this happen? */ free((caddr_t)pte, M_VMPMAP); goto retry; } #ifdef DEBUG if (pm->pm_segmap[vseg] != seginval) panic("pmap_enter: new ptes, but not seginval"); #endif bzero((caddr_t)pte, size); pm->pm_pte[vseg] = pte; pm->pm_npte[vseg] = 1; } else { /* might be a change: fetch old pte */ doflush = 0; if ((pmeg = pm->pm_segmap[vseg]) == seginval) tpte = pte[VA_VPG(va)]; /* software pte */ else { if (pm->pm_ctx) { /* hardware pte */ setcontext(pm->pm_ctxnum); tpte = getpte(va); doflush = 1; } else { setcontext(0); /* XXX use per-cpu pteva? */ setsegmap(0, pmeg); tpte = getpte(VA_VPG(va) << PGSHIFT); } } if (tpte & PG_V) { register int addr = tpte & PG_PFNUM; /* old mapping exists */ if (addr == (pteproto & PG_PFNUM)) { /* just changing prot and/or wiring */ splx(s); /* caller should call this directly: */ pmap_changeprot(pm, va, prot, wired); if (wired) pm->pm_stats.wired_count++; else pm->pm_stats.wired_count--; return; } /* * Switcheroo: changing pa for this va. * If old pa was managed, remove from pvlist. * If old page was cached, flush cache. */ /*printf("%s[%d]: pmap_enu: changing existing va(%x)=>pa entry\n", curproc->p_comm, curproc->p_pid, va);*/ addr = ptoa(HWTOSW(addr)); if (managed(addr)) pv_unlink(pvhead(addr), pm, va); if ( #ifdef notdef vactype != VAC_NONE && #endif doflush && (tpte & PG_NC) == 0) cache_flush_page((int)va); } else { /* adding new entry */ pm->pm_npte[vseg]++; /* * Increment counters */ if (wired) pm->pm_stats.wired_count++; } } if (pv != NULL) pteproto |= pv_link(pv, pm, va); /* * Update hardware or software PTEs (whichever are active). */ if ((pmeg = pm->pm_segmap[vseg]) != seginval) { /* ptes are in hardare */ if (pm->pm_ctx) setcontext(pm->pm_ctxnum); else { setcontext(0); /* XXX use per-cpu pteva? */ setsegmap(0, pmeg); va = VA_VPG(va) << PGSHIFT; } setpte(va, pteproto); } /* update software copy */ pte += VA_VPG(va); *pte = pteproto; splx(s); } /* * Change the wiring attribute for a map/virtual-address pair. */ /* ARGSUSED */ void pmap_change_wiring(pm, va, wired) struct pmap *pm; vm_offset_t va; int wired; { pmap_stats.ps_useless_changewire++; } /* * Extract the physical page address associated * with the given map/virtual_address pair. * GRR, the vm code knows; we should not have to do this! */ vm_offset_t pmap_extract(pm, va) register struct pmap *pm; vm_offset_t va; { register int tpte; register int vseg; if (pm == NULL) { printf("pmap_extract: null pmap\n"); return (0); } vseg = VA_VSEG(va); if (pm->pm_segmap[vseg] != seginval) { register int ctx = getcontext(); if (pm->pm_ctx) { setcontext(pm->pm_ctxnum); tpte = getpte(va); } else { setcontext(0); tpte = getpte(VA_VPG(va) << PGSHIFT); } setcontext(ctx); } else { register int *pte = pm->pm_pte[vseg]; if (pte == NULL) { printf("pmap_extract: invalid vseg\n"); return (0); } tpte = pte[VA_VPG(va)]; } if ((tpte & PG_V) == 0) { printf("pmap_extract: invalid pte\n"); return (0); } tpte &= PG_PFNUM; tpte = HWTOSW(tpte); return ((tpte << PGSHIFT) | (va & PGOFSET)); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ /* ARGSUSED */ void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) struct pmap *dst_pmap, *src_pmap; vm_offset_t dst_addr; vm_size_t len; vm_offset_t src_addr; { } /* * Require that all active physical maps contain no * incorrect entries NOW. [This update includes * forcing updates of any address map caching.] */ void pmap_update() { } /* * Garbage collects the physical map system for * pages which are no longer used. * Success need not be guaranteed -- that is, there * may well be pages which are not referenced, but * others may be collected. * Called by the pageout daemon when pages are scarce. */ /* ARGSUSED */ void pmap_collect(pm) struct pmap *pm; { } /* * Clear the modify bit for the given physical page. */ void pmap_clear_modify(pa) register vm_offset_t pa; { register struct pvlist *pv; if (managed(pa)) { pv = pvhead(pa); (void) pv_syncflags(pv); pv->pv_flags &= ~PV_MOD; } } /* * Tell whether the given physical page has been modified. */ int pmap_is_modified(pa) register vm_offset_t pa; { register struct pvlist *pv; if (managed(pa)) { pv = pvhead(pa); if (pv->pv_flags & PV_MOD || pv_syncflags(pv) & PV_MOD) return (1); } return (0); } /* * Clear the reference bit for the given physical page. */ void pmap_clear_reference(pa) vm_offset_t pa; { register struct pvlist *pv; if (managed(pa)) { pv = pvhead(pa); (void) pv_syncflags(pv); pv->pv_flags &= ~PV_REF; } } /* * Tell whether the given physical page has been referenced. */ int pmap_is_referenced(pa) vm_offset_t pa; { register struct pvlist *pv; if (managed(pa)) { pv = pvhead(pa); if (pv->pv_flags & PV_REF || pv_syncflags(pv) & PV_REF) return (1); } return (0); } /* * Make the specified pages (by pmap, offset) pageable (or not) as requested. * * A page which is not pageable may not take a fault; therefore, its page * table entry must remain valid for the duration (or at least, the trap * handler must not call vm_fault). * * This routine is merely advisory; pmap_enter will specify that these pages * are to be wired down (or not) as appropriate. */ /* ARGSUSED */ void pmap_pageable(pm, start, end, pageable) struct pmap *pm; vm_offset_t start, end; int pageable; { } /* * Routine: pmap_kernel * Function: * Returns the physical map handle for the kernel. */ pmap_t pmap_kernel() { return (kernel_pmap); } /* * Fill the given MI physical page with zero bytes. * * We avoid stomping on the cache. * XXX might be faster to use destination's context and allow cache to fill? */ void pmap_zero_page(pa) register vm_offset_t pa; { register caddr_t va; register int pte; if (managed(pa)) { /* * The following might not be necessary since the page * is being cleared because it is about to be allocated, * i.e., is in use by no one. */ #if 1 #ifdef notdef if (vactype != VAC_NONE) #endif pv_flushcache(pvhead(pa)); #endif pte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(pa)); } else pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM); va = vpage[0]; setpte(va, pte); qzero(va, NBPG); setpte(va, 0); } /* * Copy the given MI physical source page to its destination. * * We avoid stomping on the cache as above (with same `XXX' note). * We must first flush any write-back cache for the source page. * We go ahead and stomp on the kernel's virtual cache for the * source page, since the cache can read memory MUCH faster than * the processor. */ void pmap_copy_page(src, dst) vm_offset_t src, dst; { register caddr_t sva, dva; register int spte, dpte; if (managed(src)) { if (vactype == VAC_WRITEBACK) pv_flushcache(pvhead(src)); spte = PG_V | PG_S | SWTOHW(atop(src)); } else spte = PG_V | PG_S | (atop(src) & PG_PFNUM); if (managed(dst)) { /* similar `might not be necessary' comment applies */ #if 1 #ifdef notdef if (vactype != VAC_NONE) #endif pv_flushcache(pvhead(dst)); #endif dpte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(dst)); } else dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM); sva = vpage[0]; dva = vpage[1]; setpte(sva, spte); setpte(dva, dpte); qcopy(sva, dva, NBPG); /* loads cache, so we must ... */ cache_flush_page((int)sva); setpte(sva, 0); setpte(dva, 0); } /* * Turn a cdevsw d_mmap value into a byte address for pmap_enter. * XXX this should almost certainly be done differently, and * elsewhere, or even not at all */ vm_offset_t pmap_phys_address(x) int x; { return (x); } /* * Turn off cache for a given (va, number of pages). * * We just assert PG_NC for each PTE; the addresses must reside * in locked kernel space. A cache flush is also done. */ kvm_uncache(va, npages) register caddr_t va; register int npages; { register int pte; for (; --npages >= 0; va += NBPG) { pte = getpte(va); if ((pte & PG_V) == 0) panic("kvm_uncache !pg_v"); pte |= PG_NC; setpte(va, pte); cache_flush_page((int)va); } } /* * For /dev/mem. */ int pmap_enter_hw(pm, va, pa, prot, wired) register struct pmap *pm; vm_offset_t va, pa; vm_prot_t prot; int wired; { register struct memarr *ma; register int n; register u_int t; if (pa >= MAXMEM) /* ??? */ return (EFAULT); for (ma = pmemarr, n = npmemarr; --n >= 0; ma++) { t = (u_int)pa - ma->addr; if (t < ma->len) goto ok; } return (EFAULT); ok: pa = (HWTOSW(atop(pa)) << PGSHIFT) | (pa & PGOFSET); if (pa >= vm_first_phys + vm_num_phys) /* ??? */ return (EFAULT); pmap_enter(pm, va, pa, prot, wired); return (0); } int pmap_count_ptes(pm) register struct pmap *pm; { register int idx, total; if (pm == kernel_pmap) idx = NKSEG; else idx = NUSEG; for (total = 0; idx;) total += pm->pm_npte[--idx]; pm->pm_stats.resident_count = total; return (total); }