/* $NetBSD: pmap.c,v 1.12 2008/03/20 09:09:20 kochi Exp $ */ /*- * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center and by Chris G. Demetriou. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 1998,2000 Doug Rabson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp * with some ideas from NetBSD's alpha pmap */ /* __FBSDID("$FreeBSD: src/sys/ia64/ia64/pmap.c,v 1.172 2005/11/20 06:09:48 alc Exp $"); */ /* XXX: This module is a mess. Need to clean up Locking, list traversal. etc....... */ #include __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.12 2008/03/20 09:09:20 kochi Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include /* * Kernel virtual memory management. */ static int nkpt; struct ia64_lpte **ia64_kptdir; #define KPTE_DIR_INDEX(va) \ ((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1)) #define KPTE_PTE_INDEX(va) \ ((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1)) #define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte)) /* Values for ptc.e. XXX values for SKI. */ static uint64_t pmap_ptc_e_base = 0x100000000; static uint64_t pmap_ptc_e_count1 = 3; static uint64_t pmap_ptc_e_count2 = 2; static uint64_t pmap_ptc_e_stride1 = 0x2000; static uint64_t pmap_ptc_e_stride2 = 0x100000000; kmutex_t pmap_ptc_lock; /* Global PTC lock */ /* VHPT Base */ vaddr_t vhpt_base; vaddr_t pmap_vhpt_log2size; struct ia64_bucket *pmap_vhpt_bucket; int pmap_vhpt_nbuckets; kmutex_t pmap_vhptlock; /* VHPT collision chain lock */ int pmap_vhpt_inserts; int pmap_vhpt_resident; int pmap_vhpt_collisions; #ifdef DEBUG static void dump_vhpt(void); #endif /* * Data for the RID allocator */ static int pmap_ridcount; static int pmap_rididx; static int pmap_ridmapsz; static int pmap_ridmax; static uint64_t *pmap_ridmap; kmutex_t pmap_rid_lock; /* RID allocator lock */ bool pmap_initialized; /* Has pmap_init completed? */ u_long pmap_pages_stolen; /* instrumentation */ struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ static vaddr_t kernel_vm_end; /* VA of last avail page ( end of kernel Address Space ) */ /* * This variable contains the number of CPU IDs we need to allocate * space for when allocating the pmap structure. It is used to * size a per-CPU array of ASN and ASN Generation number. */ u_long pmap_ncpuids; #ifndef PMAP_PV_LOWAT #define PMAP_PV_LOWAT 16 #endif int pmap_pv_lowat = PMAP_PV_LOWAT; /* * PV table management functions. */ void *pmap_pv_page_alloc(struct pool *, int); void pmap_pv_page_free(struct pool *, void *); struct pool_allocator pmap_pv_page_allocator = { pmap_pv_page_alloc, pmap_pv_page_free, 0, }; bool pmap_poolpage_alloc(paddr_t *); void pmap_poolpage_free(paddr_t); /* * List of all pmaps, used to update them when e.g. additional kernel * page tables are allocated. This list is kept LRU-ordered by * pmap_activate(). XXX: Check on this..... */ TAILQ_HEAD(, pmap) pmap_all_pmaps; /* * The pools from which pmap structures and sub-structures are allocated. */ struct pool pmap_pmap_pool; struct pool pmap_ia64_lpte_pool; struct pool pmap_pv_pool; kmutex_t pmap_main_lock; kmutex_t pmap_all_pmaps_slock; #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) /* XXX(kochi) need to use only spin lock? */ #define PMAP_MAP_TO_HEAD_LOCK() \ spinlockmgr(&pmap_main_lock, LK_SHARED, NULL) #define PMAP_MAP_TO_HEAD_UNLOCK() \ spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL) #define PMAP_HEAD_TO_MAP_LOCK() \ spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, NULL) #define PMAP_HEAD_TO_MAP_UNLOCK() \ spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL) #else #define PMAP_MAP_TO_HEAD_LOCK() /* nothing */ #define PMAP_MAP_TO_HEAD_UNLOCK() /* nothing */ #define PMAP_HEAD_TO_MAP_LOCK() /* nothing */ #define PMAP_HEAD_TO_MAP_UNLOCK() /* nothing */ #endif /* MULTIPROCESSOR || LOCKDEBUG */ #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED) #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY) #define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED) #define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK) #define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT) #define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56) #define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED) #define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED #define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY #define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT #define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED #define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED /* * The VHPT bucket head structure. */ struct ia64_bucket { uint64_t chain; kmutex_t lock; u_int length; }; /* Local Helper functions */ static void pmap_invalidate_all(pmap_t); static void pmap_invalidate_page(pmap_t, vaddr_t); static pmap_t pmap_switch(pmap_t pm); static pmap_t pmap_install(pmap_t); static struct ia64_lpte *pmap_find_kpte(vaddr_t); static void pmap_set_pte(struct ia64_lpte *, vaddr_t, vaddr_t, bool, bool); static void pmap_free_pte(struct ia64_lpte *pte, vaddr_t va); static __inline void pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot); static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vaddr_t va, pv_entry_t pv, int freepte); static struct ia64_lpte * pmap_find_pte(vaddr_t va); static int pmap_remove_entry(pmap_t pmap, struct vm_page * pg, vaddr_t va, pv_entry_t pv); static void pmap_insert_entry(pmap_t pmap, vaddr_t va, struct vm_page *pg); static __inline int pmap_track_modified(vaddr_t va); static void pmap_enter_vhpt(struct ia64_lpte *, vaddr_t); static int pmap_remove_vhpt(vaddr_t); static struct ia64_lpte * pmap_find_vhpt(vaddr_t); void pmap_page_purge(struct vm_page * pg); static void pmap_remove_page(pmap_t pmap, vaddr_t va); static u_int32_t pmap_allocate_rid(void); static void pmap_free_rid(uint32_t rid); static vaddr_t pmap_steal_vhpt_memory(vsize_t); /* * pmap_steal_memory: [ INTERFACE ] * * Bootstrap memory allocator (alternative to uvm_pageboot_alloc()). * This function allows for early dynamic memory allocation until the * virtual memory system has been bootstrapped. After that point, either * kmem_alloc or malloc should be used. This function works by stealing * pages from the (to be) managed page pool, then implicitly mapping the * pages (by using their RR7 addresses) and zeroing them. * * It may be used once the physical memory segments have been pre-loaded * into the vm_physmem[] array. Early memory allocation MUST use this * interface! This cannot be used after uvm_page_init(), and will * generate a panic if tried. * * Note that this memory will never be freed, and in essence it is wired * down. * * We must adjust *vstartp and/or *vendp iff we use address space * from the kernel virtual address range defined by pmap_virtual_space(). * * Note: no locking is necessary in this function. */ vaddr_t pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) { int lcv, npgs, x; vaddr_t va; paddr_t pa; size = round_page(size); npgs = atop(size); #if 0 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); #endif for (lcv = 0; lcv < vm_nphysseg; lcv++) { if (uvm.page_init_done == true) panic("pmap_steal_memory: called _after_ bootstrap"); #if 0 printf(" bank %d: avail_start 0x%lx, start 0x%lx, " "avail_end 0x%lx\n", lcv, vm_physmem[lcv].avail_start, vm_physmem[lcv].start, vm_physmem[lcv].avail_end); #endif if (vm_physmem[lcv].avail_start != vm_physmem[lcv].start || vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end) continue; #if 0 printf(" avail_end - avail_start = 0x%lx\n", vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start); #endif if ((vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start) < npgs) continue; /* * There are enough pages here; steal them! */ pa = ptoa(vm_physmem[lcv].avail_start); vm_physmem[lcv].avail_start += npgs; vm_physmem[lcv].start += npgs; /* * Have we used up this segment? */ if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) { if (vm_nphysseg == 1) panic("pmap_steal_memory: out of memory!"); /* Remove this segment from the list. */ vm_nphysseg--; for (x = lcv; x < vm_nphysseg; x++) { /* structure copy */ vm_physmem[x] = vm_physmem[x + 1]; } } va = IA64_PHYS_TO_RR7(pa); memset((void *)va, 0, size); pmap_pages_stolen += npgs; return va; } /* * If we got here, this was no memory left. */ panic("pmap_steal_memory: no memory to steal"); } /* * pmap_steal_vhpt_memory: Derived from alpha/pmap.c:pmap_steal_memory() * Note: This function is not visible outside the pmap module. * Based on pmap_steal_memory(); * Assumptions: size is always a power of 2. * Returns: Allocated memory at a naturally aligned address */ static vaddr_t pmap_steal_vhpt_memory(vsize_t size) { int lcv, npgs, x; vaddr_t va; paddr_t pa; paddr_t vhpt_start = 0, start1, start2, end1, end2; size = round_page(size); npgs = atop(size); #if 1 printf("VHPTPSM: size 0x%lx (npgs 0x%x)\n", size, npgs); #endif for (lcv = 0; lcv < vm_nphysseg; lcv++) { if (uvm.page_init_done == true) panic("pmap_vhpt_steal_memory: called _after_ bootstrap"); #if 1 printf(" lcv %d: avail_start 0x%lx, start 0x%lx, " "avail_end 0x%lx\n", lcv, vm_physmem[lcv].avail_start, vm_physmem[lcv].start, vm_physmem[lcv].avail_end); printf(" avail_end - avail_start = 0x%lx\n", vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start); #endif if (vm_physmem[lcv].avail_start != vm_physmem[lcv].start || /* XXX: ??? */ vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end) continue; /* Break off a VHPT sized, aligned chunk off this segment. */ start1 = vm_physmem[lcv].avail_start; /* Align requested start address on requested size boundary */ end1 = vhpt_start = roundup(start1, npgs); start2 = vhpt_start + npgs; end2 = vm_physmem[lcv].avail_end; /* Case 1: Doesn't fit. skip this segment */ if (start2 > end2) { vhpt_start = 0; continue; } /* For all cases of fit: * - Remove segment. * - Re-insert fragments via uvm_page_physload(); */ /* * We _fail_ on a vhpt request which exhausts memory. */ if (start1 == end1 && start2 == end2 && vm_nphysseg == 1) { #ifdef DEBUG printf("pmap_vhpt_steal_memory: out of memory!"); #endif return -1; } /* Remove this segment from the list. */ vm_nphysseg--; // physmem -= end2 - start1; for (x = lcv; x < vm_nphysseg; x++) { /* structure copy */ vm_physmem[x] = vm_physmem[x + 1]; } /* Case 2: Perfect fit - skip segment reload. */ if (start1 == end1 && start2 == end2) break; /* Case 3: Left unfit - reload it. */ if (start1 != end1) { uvm_page_physload(start1, end1, start1, end1, VM_FREELIST_DEFAULT); } /* Case 4: Right unfit - reload it. */ if (start2 != end2) { uvm_page_physload(start2, end2, start2, end2, VM_FREELIST_DEFAULT); } /* Case 5: Both unfit - Redundant, isn't it ? */ break; } /* * If we got here, we couldn't find a fit. */ if (vhpt_start == 0) { #ifdef DEBUG printf("pmap_steal_vhpt_memory: no VHPT aligned fit found."); #endif return -1; } /* * There are enough pages here; steal them! */ pa = ptoa(vhpt_start); va = IA64_PHYS_TO_RR7(pa); memset((void *)va, 0, size); pmap_pages_stolen += npgs; return va; } /* * pmap_bootstrap: * * Bootstrap the system to run with virtual memory. * * Note: no locking is necessary in this function. */ void pmap_bootstrap() { struct ia64_pal_result res; vaddr_t base, limit; size_t size; vsize_t bufsz; int i, ridbits; /* * Query the PAL Code to find the loop parameters for the * ptc.e instruction. */ res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0); if (res.pal_status != 0) panic("Can't configure ptc.e parameters"); pmap_ptc_e_base = res.pal_result[0]; pmap_ptc_e_count1 = res.pal_result[1] >> 32; pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1); pmap_ptc_e_stride1 = res.pal_result[2] >> 32; pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1); if (bootverbose) printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, " "stride1=0x%lx, stride2=0x%lx\n", pmap_ptc_e_base, pmap_ptc_e_count1, pmap_ptc_e_count2, pmap_ptc_e_stride1, pmap_ptc_e_stride2); mutex_init(&pmap_ptc_lock, MUTEX_SPIN, IPL_VM); /* * Setup RIDs. RIDs 0..7 are reserved for the kernel. * * We currently need at least 19 bits in the RID because PID_MAX * can only be encoded in 17 bits and we need RIDs for 5 regions * per process. With PID_MAX equalling 99999 this means that we * need to be able to encode 499995 (=5*PID_MAX). * The Itanium processor only has 18 bits and the architected * minimum is exactly that. So, we cannot use a PID based scheme * in those cases. Enter pmap_ridmap... * We should avoid the map when running on a processor that has * implemented enough bits. This means that we should pass the * process/thread ID to pmap. This we currently don't do, so we * use the map anyway. However, we don't want to allocate a map * that is large enough to cover the range dictated by the number * of bits in the RID, because that may result in a RID map of * 2MB in size for a 24-bit RID. A 64KB map is enough. * The bottomline: we create a 32KB map when the processor only * implements 18 bits (or when we can't figure it out). Otherwise * we create a 64KB map. */ res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); if (res.pal_status != 0) { if (bootverbose) printf("Can't read VM Summary - assuming 18 Region ID bits\n"); ridbits = 18; /* guaranteed minimum */ } else { ridbits = (res.pal_result[1] >> 8) & 0xff; if (bootverbose) printf("Processor supports %d Region ID bits\n", ridbits); } if (ridbits > 19) ridbits = 19; pmap_ridmax = (1 << ridbits); pmap_ridmapsz = pmap_ridmax / 64; pmap_ridmap = (uint64_t *)uvm_pageboot_alloc(pmap_ridmax / 8); pmap_ridmap[0] |= 0xff; pmap_rididx = 0; pmap_ridcount = 8; /* XXX: The FreeBSD pmap.c defines initialises this like this: * mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF); * MTX_DEF can *sleep*. */ mutex_init(&pmap_rid_lock, MUTEX_ADAPTIVE, IPL_VM); /* * Compute the number of pages kmem_map will have. */ kmeminit_nkmempages(); /* * Figure out how many initial PTE's are necessary to map the * kernel. We also reserve space for kmem_alloc_pageable() * for vm_fork(). */ /* Get size of buffer cache and set an upper limit */ bufsz = buf_memcalc(); buf_setvalimit(bufsz); nkpt = (((ubc_nwins << ubc_winshift) + bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + USRIOSIZE + (maxproc * UPAGES) + nkmempages) / NKPTEPG; /* * Allocate some memory for initial kernel 'page tables'. */ ia64_kptdir = (void *)uvm_pageboot_alloc((nkpt + 1) * PAGE_SIZE); for (i = 0; i < nkpt; i++) { ia64_kptdir[i] = (void*)( (vaddr_t)ia64_kptdir + PAGE_SIZE * (i + 1)); } kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE; /* * Initialize the pmap pools and list. */ pmap_ncpuids = pmap_ridmax; pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", &pool_allocator_nointr, IPL_NONE); /* This may block. */ /* XXX: Need to convert ia64_kptdir[][] to a pool. ????*/ /* The default pool allocator uses uvm_km_alloc & friends. * XXX: We should be using regular vm_alloced mem for regular, non-kernel ptesl */ pool_init(&pmap_ia64_lpte_pool, sizeof (struct ia64_lpte), sizeof(void *), 0, 0, "ptpl", NULL, IPL_NONE); pool_init(&pmap_pv_pool, sizeof (struct pv_entry), sizeof(void *), 0, 0, "pvpl", &pmap_pv_page_allocator, IPL_NONE); TAILQ_INIT(&pmap_all_pmaps); /* * Figure out a useful size for the VHPT, based on the size of * physical memory and try to locate a region which is large * enough to contain the VHPT (which must be a power of two in * size and aligned to a natural boundary). * We silently bump up the VHPT size to the minimum size if the * user has set the tunable too small. Likewise, the VHPT size * is silently capped to the maximum allowed. */ pmap_vhpt_log2size = PMAP_VHPT_LOG2SIZE; if (pmap_vhpt_log2size == 0) { pmap_vhpt_log2size = 15; size = 1UL << pmap_vhpt_log2size; while (size < physmem * 32) { pmap_vhpt_log2size++; size <<= 1; } } else if (pmap_vhpt_log2size < 15) pmap_vhpt_log2size = 15; if (pmap_vhpt_log2size > 61) pmap_vhpt_log2size = 61; vhpt_base = 0; base = limit = 0; size = 1UL << pmap_vhpt_log2size; while (vhpt_base == 0 && size) { if (bootverbose) printf("Trying VHPT size 0x%lx\n", size); /* allocate size bytes aligned at size */ /* #ifdef MULTIPROCESSOR, then (size * MAXCPU) bytes */ base = pmap_steal_vhpt_memory(size); if (!base) { /* Can't fit, try next smaller size. */ pmap_vhpt_log2size--; size >>= 1; } else vhpt_base = IA64_PHYS_TO_RR7(base); } if (pmap_vhpt_log2size < 15) panic("Can't find space for VHPT"); if (bootverbose) printf("Putting VHPT at 0x%lx\n", base); mutex_init(&pmap_vhptlock, MUTEX_SPIN, IPL_VM); __asm __volatile("mov cr.pta=%0;; srlz.i;;" :: "r" (vhpt_base + (1<<8) + (pmap_vhpt_log2size<<2) + 1)); #ifdef DEBUG dump_vhpt(); #endif /* * Initialise vhpt pte entries. */ pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte); pmap_vhpt_bucket = (void *)uvm_pageboot_alloc(pmap_vhpt_nbuckets * sizeof(struct ia64_bucket)); struct ia64_lpte *pte; pte = (struct ia64_lpte *)vhpt_base; for (i = 0; i < pmap_vhpt_nbuckets; i++) { pte[i].pte = 0; pte[i].itir = 0; pte[i].tag = 1UL << 63; /* Invalid tag */ pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i); /* Stolen memory is zeroed! */ mutex_init(&pmap_vhpt_bucket[i].lock, MUTEX_SPIN, IPL_VM); } /* * Initialize the locks. */ mutex_init(&pmap_main_lock, MUTEX_ADAPTIVE, IPL_VM); mutex_init(&pmap_all_pmaps_slock, MUTEX_SPIN, IPL_VM); /* * Initialize the kernel pmap (which is statically allocated). */ memset(pmap_kernel(), 0, sizeof(struct pmap)); mutex_init(&pmap_kernel()->pm_slock, MUTEX_SPIN, IPL_VM); for (i = 0; i < 5; i++) pmap_kernel()->pm_rid[i] = 0; pmap_kernel()->pm_active = 1; TAILQ_INIT(&pmap_kernel()->pm_pvlist); TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); /* * Region 5 is mapped via the vhpt. */ ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1); /* * Region 6 is direct mapped UC and region 7 is direct mapped * WC. The details of this is controlled by the Alt {I,D}TLB * handlers. Here we just make sure that they have the largest * possible page size to minimise TLB usage. */ ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2)); ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2)); /* * Clear out any random TLB entries left over from booting. */ /*XXX: look into API related stuff here */ pmap_invalidate_all(pmap_kernel()); map_gateway_page(); } /* * pmap_init: [ INTERFACE ] * * Initialize the pmap module. Called by vm_init(), to initialize any * structures that the pmap system needs to map virtual memory. * * Note: no locking is necessary in this function. */ void pmap_init(void) { /* * Set a low water mark on the pv_entry pool, so that we are * more likely to have these around even in extreme memory * starvation. */ pool_setlowat(&pmap_pv_pool, pmap_pv_lowat); /* * Now it is safe to enable pv entry recording. */ pmap_initialized = true; } /* * vtophys: virtual address to physical address. For use by * machine-dependent code only. */ paddr_t vtophys(va) vaddr_t va; { paddr_t pa; if (pmap_extract(pmap_kernel(), va, &pa) == true) return pa; return 0; } /* * pmap_virtual_space: [ INTERFACE ] * * Define the initial bounds of the kernel virtual address space. */ void pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) { *vstartp = VM_MIN_KERNEL_ADDRESS; *vendp = VM_MAX_KERNEL_ADDRESS; } /* * pmap_remove_all: [ INTERFACE ] * * This function is a hint to the pmap implementation that all * entries in pmap will be removed before any more entries are * entered. */ void pmap_remove_all(pmap_t pmap) { /* Nothing Yet */ } /* * pmap_remove: [ INTERFACE ] * * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) { pmap_t oldpmap; vaddr_t va; pv_entry_t pv; struct ia64_lpte *pte; if (pmap->pm_stats.resident_count == 0) return; PMAP_MAP_TO_HEAD_LOCK(); PMAP_LOCK(pmap); oldpmap = pmap_install(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if (sva + PAGE_SIZE == eva) { pmap_remove_page(pmap, sva); goto out; } if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) { TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { va = pv->pv_va; if (va >= sva && va < eva) { pte = pmap_find_vhpt(va); KASSERT(pte != NULL); pmap_remove_pte(pmap, pte, va, pv, 1); pmap_invalidate_page(pmap, va); } } } else { for (va = sva; va < eva; va += PAGE_SIZE) { pte = pmap_find_vhpt(va); if (pte != NULL) { pmap_remove_pte(pmap, pte, va, 0, 1); pmap_invalidate_page(pmap, va); } } } out: pmap_install(oldpmap); PMAP_UNLOCK(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); } /* * pmap_zero_page: [ INTERFACE ] * * Zero the specified (machine independent) page by mapping the page * into virtual memory and clear its contents, one machine dependent * page at a time. * * Note: no locking is necessary in this function. */ void pmap_zero_page(paddr_t phys) { vaddr_t va = IA64_PHYS_TO_RR7(phys); bzero((void *) va, PAGE_SIZE); } /* * pmap_copy_page: [ INTERFACE ] * * Copy the specified (machine independent) page by mapping the page * into virtual memory and using memcpy to copy the page, one machine * dependent page at a time. * * Note: no locking is necessary in this function. */ void pmap_copy_page(paddr_t psrc, paddr_t pdst) { vaddr_t vsrc = IA64_PHYS_TO_RR7(psrc); vaddr_t vdst = IA64_PHYS_TO_RR7(pdst); bcopy((void *) vsrc, (void *) vdst, PAGE_SIZE); } /* * pmap_collect: [ INTERFACE ] * * Garbage collects the physical map system for pages which are no * longer used. Success need not be guaranteed -- that is, there * may well be pages which are not referenced, but others may be * collected. * * Called by the pageout daemon when pages are scarce. */ void pmap_collect(pmap_t pmap) { #ifdef DEBUG printf("pmap_collect(%p)\n", pmap); #endif /* * If called for the kernel pmap, just return. We * handle this case in the event that we ever want * to have swappable kernel threads. */ if (pmap == pmap_kernel()) return; /* * This process is about to be swapped out; free all of * the PT pages by removing the physical mappings for its * entire address space. Note: pmap_remove() performs * all necessary locking. * XXX: Removes wired pages as well via pmap_remove(). Fixme.!!!!! */ pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS); } /* * pmap_unwire: [ INTERFACE ] * * Clear the wired attribute for a map/virtual-address pair. * * The mapping must already exist in the pmap. */ void pmap_unwire(pmap_t pmap, vaddr_t va) { pmap_t oldpmap; struct ia64_lpte *pte; if (pmap == NULL) return; PMAP_LOCK(pmap); oldpmap = pmap_install(pmap); pte = pmap_find_vhpt(va); KASSERT(pte != NULL); /* * If wiring actually changed (always?) clear the wire bit and * update the wire count. Note that wiring is not a hardware * characteristic so there is no need to invalidate the TLB. */ if (pmap_wired(pte)) { pmap->pm_stats.wired_count--; pmap_clear_wired(pte); } #ifdef DIAGNOSTIC else { printf("pmap_unwire: wiring for pmap %p va 0x%lx " "didn't change!\n", pmap, va); } #endif pmap_install(oldpmap); PMAP_UNLOCK(pmap); } /* * pmap_kenter_pa: [ INTERFACE ] * * Enter a va -> pa mapping into the kernel pmap without any * physical->virtual tracking. * * Note: no locking is necessary in this function. */ void pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) { struct ia64_lpte *pte; pte = pmap_find_kpte(va); if (pmap_present(pte)) pmap_invalidate_page(pmap_kernel(), va); else pmap_enter_vhpt(pte, va); pmap_pte_prot(pmap_kernel(), pte, prot); pmap_set_pte(pte, va, pa, false, false); } /* * pmap_kremove: [ INTERFACE ] * * Remove a mapping entered with pmap_kenter_pa() starting at va, * for size bytes (assumed to be page rounded). */ void pmap_kremove(vaddr_t va, vsize_t size) { struct ia64_lpte *pte; pte = pmap_find_kpte(va); if (pmap_present(pte)) { pmap_remove_vhpt(va); pmap_invalidate_page(pmap_kernel(), va); pmap_clear_present(pte); } } /* * pmap_create: [ INTERFACE ] * * Create and return a physical map. * * Note: no locking is necessary in this function. */ pmap_t pmap_create(void) { pmap_t pmap; int i; #ifdef DEBUG printf("pmap_create()\n"); #endif pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); memset(pmap, 0, sizeof(*pmap)); for (i = 0; i < 5; i++) pmap->pm_rid[i] = pmap_allocate_rid(); pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); memset(&pmap->pm_stats, 0, sizeof (pmap->pm_stats) ); mutex_init(&pmap->pm_slock, MUTEX_SPIN, IPL_VM); mutex_enter(&pmap_all_pmaps_slock); TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); mutex_exit(&pmap_all_pmaps_slock); return pmap; } /* * pmap_destroy: [ INTERFACE ] * * Drop the reference count on the specified pmap, releasing * all resources if the reference count drops to zero. */ void pmap_destroy(pmap_t pmap) { int i; #ifdef DEBUG printf("pmap_destroy(%p)\n", pmap); #endif for (i = 0; i < 5; i++) if (pmap->pm_rid[i]) pmap_free_rid(pmap->pm_rid[i]); /* * Remove it from the global list of all pmaps. */ mutex_enter(&pmap_all_pmaps_slock); TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); mutex_exit(&pmap_all_pmaps_slock); pool_put(&pmap_pmap_pool, pmap); } /* * pmap_activate: [ INTERFACE ] * * Activate the pmap used by the specified process. This includes * reloading the MMU context if the current process, and marking * the pmap in use by the processor. * * Note: We may use only spin locks here, since we are called * by a critical section in cpu_switch()! */ void pmap_activate(struct lwp *l) { pmap_install(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)); } /* * pmap_deactivate: [ INTERFACE ] * * Mark that the pmap used by the specified process is no longer * in use by the processor. * */ void pmap_deactivate(struct lwp *l) { } /* * pmap_protect: [ INTERFACE ] * * Set the physical protection on the specified range of this map * as requested. */ /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) { pmap_t oldpmap; struct ia64_lpte *pte; vaddr_t pa; struct vm_page *pg; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; if ((sva & PAGE_MASK) || (eva & PAGE_MASK)) panic("pmap_protect: unaligned addresses"); //uvm_lock_pageq(); PMAP_LOCK(pmap); oldpmap = pmap_install(pmap); while (sva < eva) { /* * If page is invalid, skip this page */ pte = pmap_find_vhpt(sva); if (pte == NULL) { sva += PAGE_SIZE; continue; } if (pmap_prot(pte) != prot) { if (pmap_managed(pte)) { pa = pmap_ppn(pte); pg = PHYS_TO_VM_PAGE(pa); if (pmap_dirty(pte)) pmap_clear_dirty(pte); if (pmap_accessed(pte)) { pmap_clear_accessed(pte); } } pmap_pte_prot(pmap, pte, prot); pmap_invalidate_page(pmap, sva); } sva += PAGE_SIZE; } //uvm_unlock_pageq(); pmap_install(oldpmap); PMAP_UNLOCK(pmap); } /* * pmap_extract: [ INTERFACE ] * * Extract the physical address associated with the given * pmap/virtual address pair. */ bool pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) { struct ia64_lpte *pte; pmap_t oldpmap; paddr_t pa; pa = 0; mutex_enter(&pmap->pm_slock); oldpmap = pmap_install(pmap); /* XXX: isn't this a little inefficient ? */ pte = pmap_find_vhpt(va); if (pte != NULL && pmap_present(pte)) pap = (paddr_t *) pmap_ppn(pte); else return false; pmap_install(oldpmap); mutex_exit(&pmap->pm_slock); return true; } /* * pmap_clear_modify: [ INTERFACE ] * * Clear the modify bits on the specified physical page. */ bool pmap_clear_modify(struct vm_page *pg) { bool rv = false; struct ia64_lpte *pte; pmap_t oldpmap; pv_entry_t pv; if (pg->flags & PG_FAKE) return rv; TAILQ_FOREACH(pv, &pg->mdpage.pv_list, pv_list) { PMAP_LOCK(pv->pv_pmap); oldpmap = pmap_install(pv->pv_pmap); pte = pmap_find_vhpt(pv->pv_va); KASSERT(pte != NULL); if (pmap_dirty(pte)) { rv = true; pmap_clear_dirty(pte); pmap_invalidate_page(pv->pv_pmap, pv->pv_va); } pmap_install(oldpmap); PMAP_UNLOCK(pv->pv_pmap); } return rv; } /* * pmap_page_protect: [ INTERFACE ] * * Lower the permission for all mappings to a given page to * the permissions specified. */ void pmap_page_protect(struct vm_page *pg, vm_prot_t prot) { struct ia64_lpte *pte; pmap_t oldpmap, pmap; pv_entry_t pv; if ((prot & VM_PROT_WRITE) != 0) return; if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { if (pg->flags & PG_RDONLY) return; TAILQ_FOREACH(pv, &pg->mdpage.pv_list, pv_list) { pmap = pv->pv_pmap; PMAP_LOCK(pmap); oldpmap = pmap_install(pmap); pte = pmap_find_vhpt(pv->pv_va); KASSERT(pte != NULL); pmap_pte_prot(pmap, pte, prot); pmap_invalidate_page(pmap, pv->pv_va); pmap_install(oldpmap); PMAP_UNLOCK(pmap); } //UVM_LOCK_ASSERT_PAGEQ(); pg->flags |= PG_RDONLY; } else { pmap_page_purge(pg); } } /* * pmap_reference: [ INTERFACE ] * * Add a reference to the specified pmap. */ void pmap_reference(pmap_t pmap) { #ifdef DEBUG printf("pmap_reference(%p)\n", pmap); #endif PMAP_LOCK(pmap); pmap->pm_count++; PMAP_UNLOCK(pmap); } /* * pmap_clear_reference: [ INTERFACE ] * * Clear the reference bit on the specified physical page. */ bool pmap_clear_reference(struct vm_page *pg) { return false; } /* * pmap_phys_address: [ INTERFACE ] * * Return the physical address corresponding to the specified * cookie. Used by the device pager to decode a device driver's * mmap entry point return value. * * Note: no locking is necessary in this function. */ paddr_t pmap_phys_address(paddr_t ppn) { return ia64_ptob(ppn); } /* * pmap_enter: [ INTERFACE ] * * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * Note: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) { pmap_t oldpmap; vaddr_t opa; struct ia64_lpte origpte; struct ia64_lpte *pte; bool managed, wired; struct vm_page *pg; int error = 0; printf("Entered pmap_enter() \n"); PMAP_MAP_TO_HEAD_LOCK(); PMAP_LOCK(pmap); oldpmap = pmap_install(pmap); va &= ~PAGE_MASK; managed = false; wired = (flags & PMAP_WIRED) !=0; pg = PHYS_TO_VM_PAGE(pa); #ifdef DIAGNOSTIC if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); #endif /* * Find (or create) a pte for the given mapping. */ while ((pte = pmap_find_pte(va)) == NULL) { pmap_install(oldpmap); PMAP_UNLOCK(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); uvm_kick_pdaemon(); PMAP_MAP_TO_HEAD_LOCK(); PMAP_LOCK(pmap); oldpmap = pmap_install(pmap); } origpte = *pte; if (!pmap_present(pte)) { opa = ~0UL; pmap_enter_vhpt(pte, va); } else opa = pmap_ppn(pte); /* * Mapping has not changed, must be protection or wiring change. */ if (opa == pa) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && !pmap_wired(&origpte)) pmap->pm_stats.wired_count++; else if (!wired && pmap_wired(&origpte)) pmap->pm_stats.wired_count--; managed = (pmap_managed(&origpte)) ? true : false; /* * We might be turning off write access to the page, * so we go ahead and sense modify status. */ if (managed && pmap_dirty(&origpte) && pmap_track_modified(va)) pg->flags &= ~PG_CLEAN; pmap_invalidate_page(pmap, va); goto validate; } /* * Mapping has changed, invalidate old range and fall * through to handle validating new mapping. */ if (opa != ~0UL) { pmap_remove_pte(pmap, pte, va, 0, 0); pmap_enter_vhpt(pte, va); } /* * Enter on the PV list if part of our managed memory. */ if ((flags & (PG_FAKE)) == 0) { pmap_insert_entry(pmap, va, pg); managed = true; } /* * Increment counters */ pmap->pm_stats.resident_count++; if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. This * adds the pte to the VHPT if necessary. */ pmap_pte_prot(pmap, pte, prot); pmap_set_pte(pte, va, pa, wired, managed); PMAP_MAP_TO_HEAD_UNLOCK(); pmap_install(oldpmap); PMAP_UNLOCK(pmap); return error; /* XXX: Look into this. */ } /* * Routine: pmap_page_purge: => was: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_page_purge(struct vm_page * pg) { pmap_t oldpmap; pv_entry_t pv; #if defined(DIAGNOSTIC) /* * XXX this makes pmap_page_protect(NONE) illegal for non-managed * pages! */ if (pg->flags & PG_FAKE) { panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(pg)); } #endif //UVM_LOCK_ASSERT_PAGEQ(); while ((pv = TAILQ_FIRST(&pg->mdpage.pv_list)) != NULL) { struct ia64_lpte *pte; pmap_t pmap = pv->pv_pmap; vaddr_t va = pv->pv_va; PMAP_LOCK(pmap); oldpmap = pmap_install(pmap); pte = pmap_find_vhpt(va); KASSERT(pte != NULL); if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(pg)) panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(pg)); pmap_remove_pte(pmap, pte, va, pv, 1); pmap_install(oldpmap); PMAP_UNLOCK(pmap); } //UVM_LOCK_ASSERT_PAGEQ(); pg->flags |= PG_RDONLY; } pmap_t pmap_switch(pmap_t pm) { pmap_t prevpm; int i; //LOCK_ASSERT(simple_lock_held(&sched_lock)); prevpm = curcpu()->ci_pmap; if (prevpm == pm) return prevpm; // if (prevpm != NULL) // atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask)); if (pm == NULL) { for (i = 0; i < 5; i++) { ia64_set_rr(IA64_RR_BASE(i), (i << 8)|(PAGE_SHIFT << 2)|1); } } else { for (i = 0; i < 5; i++) { ia64_set_rr(IA64_RR_BASE(i), (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1); } // atomic_set_32(&pm->pm_active, PCPU_GET(cpumask)); } curcpu()->ci_pmap = pm; __asm __volatile("srlz.d"); return prevpm; } static pmap_t pmap_install(pmap_t pm) { pmap_t prevpm; int splsched; splsched = splsched(); prevpm = pmap_switch(pm); splx(splsched); return prevpm; } static uint32_t pmap_allocate_rid(void) { uint64_t bit, bits; int rid; mutex_enter(&pmap_rid_lock); if (pmap_ridcount == pmap_ridmax) panic("pmap_allocate_rid: All Region IDs used"); /* Find an index with a free bit. */ while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) { pmap_rididx++; if (pmap_rididx == pmap_ridmapsz) pmap_rididx = 0; } rid = pmap_rididx * 64; /* Find a free bit. */ bit = 1UL; while (bits & bit) { rid++; bit <<= 1; } pmap_ridmap[pmap_rididx] |= bit; pmap_ridcount++; mutex_exit(&pmap_rid_lock); return rid; } static void pmap_free_rid(uint32_t rid) { uint64_t bit; int idx; idx = rid / 64; bit = ~(1UL << (rid & 63)); mutex_enter(&pmap_rid_lock); pmap_ridmap[idx] &= bit; pmap_ridcount--; mutex_exit(&pmap_rid_lock); } /*************************************************** * Manipulate TLBs for a pmap ***************************************************/ static void pmap_invalidate_page(pmap_t pmap, vaddr_t va) { KASSERT((pmap == pmap_kernel() || pmap == curcpu()->ci_pmap)); ia64_ptc_g(va, PAGE_SHIFT << 2); } static void pmap_invalidate_all_1(void *arg) { u_int64_t addr; int i, j; register_t psr; psr = intr_disable(); addr = pmap_ptc_e_base; for (i = 0; i < pmap_ptc_e_count1; i++) { for (j = 0; j < pmap_ptc_e_count2; j++) { ia64_ptc_e(addr); addr += pmap_ptc_e_stride2; } addr += pmap_ptc_e_stride1; } intr_restore(psr); } static void pmap_invalidate_all(pmap_t pmap) { KASSERT(pmap == pmap_kernel() || pmap == curcpu()->ci_pmap); #ifdef MULTIPROCESSOR smp_rendezvous(0, pmap_invalidate_all_1, 0, 0); #else pmap_invalidate_all_1(0); #endif } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Find the kernel lpte for mapping the given virtual address, which * must be in the part of region 5 which we can cover with our kernel * 'page tables'. */ static struct ia64_lpte * pmap_find_kpte(vaddr_t va) { KASSERT((va >> 61) == 5); KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG)); return &ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]; } /*************************************************** * Low level helper routines..... ***************************************************/ /* * Find a pte suitable for mapping a user-space address. If one exists * in the VHPT, that one will be returned, otherwise a new pte is * allocated. */ static struct ia64_lpte * pmap_find_pte(vaddr_t va) { struct ia64_lpte *pte; if (va >= VM_MAXUSER_ADDRESS) return pmap_find_kpte(va); pte = pmap_find_vhpt(va); if (pte == NULL) { pte = pool_get(&pmap_ia64_lpte_pool, PR_NOWAIT); pte->tag = 1UL << 63; } return pte; } static __inline void pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot) { static int prot2ar[4] = { PTE_AR_R, /* VM_PROT_NONE */ PTE_AR_RW, /* VM_PROT_WRITE */ PTE_AR_RX, /* VM_PROT_EXECUTE */ PTE_AR_RWX /* VM_PROT_WRITE|VM_PROT_EXECUTE */ }; pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK); pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56; pte->pte |= (prot == VM_PROT_NONE || pm == pmap_kernel()) ? PTE_PL_KERN : PTE_PL_USER; pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1]; } /* * Set a pte to contain a valid mapping and enter it in the VHPT. If * the pte was orginally valid, then its assumed to already be in the * VHPT. * This functions does not set the protection bits. It's expected * that those have been set correctly prior to calling this function. */ static void pmap_set_pte(struct ia64_lpte *pte, vaddr_t va, vaddr_t pa, bool wired, bool managed) { pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK; pte->pte |= PTE_PRESENT | PTE_MA_WB; pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED); pte->pte |= (wired) ? PTE_WIRED : 0; pte->pte |= pa & PTE_PPN_MASK; pte->itir = PAGE_SHIFT << 2; pte->tag = ia64_ttag(va); } /* * Remove the (possibly managed) mapping represented by pte from the * given pmap. */ static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vaddr_t va, pv_entry_t pv, int freepte) { int error; struct vm_page *pg; KASSERT(pmap == pmap_kernel() || pmap == curcpu()->ci_pmap); /* * First remove from the VHPT. */ error = pmap_remove_vhpt(va); if (error) return error; pmap_invalidate_page(pmap, va); if (pmap_wired(pte)) pmap->pm_stats.wired_count -= 1; pmap->pm_stats.resident_count -= 1; if (pmap_managed(pte)) { pg = PHYS_TO_VM_PAGE(pmap_ppn(pte)); if (pmap_dirty(pte)) if (pmap_track_modified(va)) pg->flags &= ~(PG_CLEAN); if (pmap_accessed(pte)) pg->flags &= ~PG_CLEAN; /* XXX: Do we need this ? */ if (freepte) pmap_free_pte(pte, va); error = pmap_remove_entry(pmap, pg, va, pv); } if (freepte) pmap_free_pte(pte, va); return 0; } /* * Free a pte which is now unused. This simply returns it to the zone * allocator if it is a user mapping. For kernel mappings, clear the * valid bit to make it clear that the mapping is not currently used. */ static void pmap_free_pte(struct ia64_lpte *pte, vaddr_t va) { if (va < VM_MAXUSER_ADDRESS) while (0); // pool_put(pool_ia64_lpte_pool, pte); XXX: Fixme for userspace else pmap_clear_present(pte); } /* * this routine defines the region(s) of memory that should * not be tested for the modified bit. */ static __inline int pmap_track_modified(vaddr_t va) { extern char *kmembase, kmemlimit; if ((va < (vaddr_t) kmembase) || (va >= (vaddr_t) kmemlimit)) return 1; else return 0; } /*************************************************** * page management routines. ***************************************************/ /* * get a new pv_entry, allocating a block from the system * when needed. * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ /* * get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t locked_pmap) { pv_entry_t allocated_pv; //LOCK_ASSERT(simple_lock_held(locked_pmap->slock)); //UVM_LOCK_ASSERT_PAGEQ(); allocated_pv = pool_get(&pmap_pv_pool, PR_NOWAIT); return allocated_pv; /* XXX: Nice to have all this stuff later: * Reclaim pv entries: At first, destroy mappings to inactive * pages. After that, if a pv entry is still needed, destroy * mappings to active pages. */ } /* * free the pv_entry back to the free list */ static __inline void free_pv_entry(pv_entry_t pv) { pool_put(&pmap_pv_pool, pv); } /* * Add an ia64_lpte to the VHPT. */ static void pmap_enter_vhpt(struct ia64_lpte *pte, vaddr_t va) { struct ia64_bucket *bckt; struct ia64_lpte *vhpte; uint64_t pte_pa; /* Can fault, so get it out of the way. */ pte_pa = ia64_tpa((vaddr_t)pte); vhpte = (struct ia64_lpte *)ia64_thash(va); bckt = (struct ia64_bucket *)vhpte->chain; /* XXX: fixme */ mutex_enter(&bckt->lock); pte->chain = bckt->chain; ia64_mf(); bckt->chain = pte_pa; pmap_vhpt_inserts++; bckt->length++; /*XXX : fixme */ mutex_exit(&bckt->lock); } /* * Remove the ia64_lpte matching va from the VHPT. Return zero if it * worked or an appropriate error code otherwise. */ static int pmap_remove_vhpt(vaddr_t va) { struct ia64_bucket *bckt; struct ia64_lpte *pte; struct ia64_lpte *lpte; struct ia64_lpte *vhpte; uint64_t chain, tag; tag = ia64_ttag(va); vhpte = (struct ia64_lpte *)ia64_thash(va); bckt = (struct ia64_bucket *)vhpte->chain; lpte = NULL; mutex_enter(&bckt->lock); chain = bckt->chain; pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); while (chain != 0 && pte->tag != tag) { lpte = pte; chain = pte->chain; pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); } if (chain == 0) { mutex_exit(&bckt->lock); return ENOENT; } /* Snip this pv_entry out of the collision chain. */ if (lpte == NULL) bckt->chain = pte->chain; else lpte->chain = pte->chain; ia64_mf(); bckt->length--; mutex_exit(&bckt->lock); return 0; } /* * Find the ia64_lpte for the given va, if any. */ static struct ia64_lpte * pmap_find_vhpt(vaddr_t va) { struct ia64_bucket *bckt; struct ia64_lpte *pte; uint64_t chain, tag; tag = ia64_ttag(va); pte = (struct ia64_lpte *)ia64_thash(va); bckt = (struct ia64_bucket *)pte->chain; mutex_enter(&bckt->lock); chain = bckt->chain; pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); while (chain != 0 && pte->tag != tag) { chain = pte->chain; pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); } mutex_exit(&bckt->lock); return (chain != 0) ? pte : NULL; } /* * Remove an entry from the list of managed mappings. */ static int pmap_remove_entry(pmap_t pmap, struct vm_page * pg, vaddr_t va, pv_entry_t pv) { if (!pv) { if (pg->mdpage.pv_list_count < pmap->pm_stats.resident_count) { TAILQ_FOREACH(pv, &pg->mdpage.pv_list, pv_list) { if (pmap == pv->pv_pmap && va == pv->pv_va) break; } } else { TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { if (va == pv->pv_va) break; } } } if (pv) { TAILQ_REMOVE(&pg->mdpage.pv_list, pv, pv_list); pg->mdpage.pv_list_count--; if (TAILQ_FIRST(&pg->mdpage.pv_list) == NULL) { //UVM_LOCK_ASSERT_PAGEQ(); pg->flags |= PG_RDONLY; } TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); free_pv_entry(pv); return 0; } else { return ENOENT; } } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vaddr_t va, struct vm_page *pg) { pv_entry_t pv; pv = get_pv_entry(pmap); pv->pv_pmap = pmap; pv->pv_va = va; //LOCK_ASSERT(simple_lock_held(pmap->slock)); //UVM_LOCK_ASSERT_PAGEQ(); TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); TAILQ_INSERT_TAIL(&pg->mdpage.pv_list, pv, pv_list); pg->mdpage.pv_list_count++; } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vaddr_t va) { struct ia64_lpte *pte; KASSERT(pmap == pmap_kernel() || pmap == curcpu()->ci_pmap); pte = pmap_find_vhpt(va); if (pte) { pmap_remove_pte(pmap, pte, va, 0, 1); pmap_invalidate_page(pmap, va); } return; } /* * pmap_pv_page_alloc: * * Allocate a page for the pv_entry pool. */ void * pmap_pv_page_alloc(struct pool *pp, int flags) { paddr_t pg; if (pmap_poolpage_alloc(&pg)) return (void *)IA64_PHYS_TO_RR7(pg); return NULL; } /* * pmap_pv_page_free: * * Free a pv_entry pool page. */ void pmap_pv_page_free(struct pool *pp, void *v) { pmap_poolpage_free(IA64_RR_MASK((vaddr_t)v)); } /******************** misc. functions ********************/ /* * pmap_poolpage_alloc: based on alpha/pmap_physpage_alloc * * Allocate a single page from the VM system and return the * physical address for that page. */ bool pmap_poolpage_alloc(paddr_t *pap) { struct vm_page *pg; paddr_t pa; pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); if (pg != NULL) { pa = VM_PAGE_TO_PHYS(pg); #ifdef DEBUG mutex_enter(&pg->mdpage.pv_mutex); if (pg->wire_count != 0) { printf("pmap_physpage_alloc: page 0x%lx has " "%d references\n", pa, pg->wire_count); panic("pmap_physpage_alloc"); } mutex_exit(&pg->mdpage.pv_mutex); #endif *pap = pa; return true; } return false; } /* * pmap_poolpage_free: based on alpha/pmap_physpage_free: * * Free the single page table page at the specified physical address. */ void pmap_poolpage_free(paddr_t pa) { struct vm_page *pg; if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) panic("pmap_physpage_free: bogus physical page address"); #ifdef DEBUG mutex_enter(&pg->mdpage.pv_mutex); if (pg->wire_count != 0) panic("pmap_physpage_free: page still has references"); mutex_exit(&pg->mdpage.pv_mutex); #endif uvm_pagefree(pg); } #ifdef DEBUG static void dump_vhpt(void) { vaddr_t base; vsize_t size, i; struct ia64_lpte *pte; __asm __volatile("mov %0=cr.pta;; srlz.i;;" : "=r" (base)); #define VHPTBASE(x) ( (x) & (~0x7fffUL) ) #define VHPTSIZE(x) ( (vsize_t) (1 << (((x) & 0x7cUL) >> 2))) size = VHPTSIZE(base); base = VHPTBASE(base); pte = (void *) base; printf("vhpt base = %lx \n", base); printf("vhpt size = %lx \n", size); for(i = 0; i < size/sizeof(struct ia64_lpte);i++ ) { if(pte[i].pte & PTE_PRESENT) { printf("PTE_PRESENT "); if(pte[i].pte & PTE_MA_MASK) printf("MA: "); if(pte[i].pte & PTE_MA_WB) printf("WB "); if(pte[i].pte & PTE_MA_UC) printf("UC "); if(pte[i].pte & PTE_MA_UCE) printf("UCE "); if(pte[i].pte & PTE_MA_WC) printf("WC "); if(pte[i].pte & PTE_MA_NATPAGE) printf("NATPAGE "); if(pte[i].pte & PTE_ACCESSED) printf("PTE_ACCESSED "); if(pte[i].pte & PTE_DIRTY) printf("PTE_DIRTY "); if(pte[i].pte & PTE_PL_MASK) printf("PL: "); if(pte[i].pte & PTE_PL_KERN) printf("KERN"); if(pte[i].pte & PTE_PL_USER) printf("USER"); if(pte[i].pte & PTE_AR_MASK) printf("AR: "); if(pte[i].pte & PTE_AR_R) printf("R "); if(pte[i].pte & PTE_AR_RX) printf("RX "); if(pte[i].pte & PTE_AR_RWX) printf("RWX "); if(pte[i].pte & PTE_AR_R_RW) printf("R RW "); if(pte[i].pte & PTE_AR_RX_RWX) printf("RX RWX "); printf("ppn = %lx", (pte[i].pte & PTE_PPN_MASK) >> 12); if(pte[i].pte & PTE_ED) printf("ED "); if(pte[i].pte & PTE_IG_MASK) printf("OS: "); if(pte[i].pte & PTE_WIRED) printf("WIRED "); if(pte[i].pte & PTE_MANAGED) printf("MANAGED "); printf("\n"); } } } #endif