Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/arch/amd64/include/pmap.h,v rcsdiff: /ftp/cvs/cvsroot/src/sys/arch/amd64/include/pmap.h,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.10.4.9 retrieving revision 1.11 diff -u -p -r1.10.4.9 -r1.11 --- src/sys/arch/amd64/include/pmap.h 2007/12/03 18:34:42 1.10.4.9 +++ src/sys/arch/amd64/include/pmap.h 2007/08/29 23:38:03 1.11 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.10.4.9 2007/12/03 18:34:42 ad Exp $ */ +/* $NetBSD: pmap.h,v 1.11 2007/08/29 23:38:03 ad Exp $ */ /* * @@ -67,26 +67,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/* + * pmap.h: see pmap.c for the history of this pmap module. + */ + #ifndef _AMD64_PMAP_H_ #define _AMD64_PMAP_H_ +#ifndef _LOCORE #if defined(_KERNEL_OPT) -#include "opt_xen.h" +#include "opt_largepages.h" #endif -#include - +#include #include #include -#ifdef _KERNEL -#include -#endif +#include #include -#ifdef XEN -#include -#include -#endif /* XEN */ +#endif /* * The x86_64 pmap module closely resembles the i386 one. It uses @@ -159,12 +158,7 @@ #define VA_SIGN_POS(va) ((va) & ~VA_SIGN_MASK) #define L4_SLOT_PTE 255 -#ifndef XEN #define L4_SLOT_KERN 256 -#else -/* Xen use slots 256-272, let's move farther */ -#define L4_SLOT_KERN 320 -#endif #define L4_SLOT_KERNBASE 511 #define L4_SLOT_APTE 510 @@ -221,8 +215,31 @@ #define NTOPLEVEL_PDES (PAGE_SIZE / (sizeof (pd_entry_t))) +#define KERNSPACE (NKL4_ENTRIES * NBPD_L4) + #define NPDPG (PAGE_SIZE / sizeof (pd_entry_t)) +#define ptei(VA) (((VA_SIGN_POS(VA)) & L1_MASK) >> L1_SHIFT) + +/* + * pl*_pi: index in the ptp page for a pde mapping a VA. + * (pl*_i below is the index in the virtual array of all pdes per level) + */ +#define pl1_pi(VA) (((VA_SIGN_POS(VA)) & L1_MASK) >> L1_SHIFT) +#define pl2_pi(VA) (((VA_SIGN_POS(VA)) & L2_MASK) >> L2_SHIFT) +#define pl3_pi(VA) (((VA_SIGN_POS(VA)) & L3_MASK) >> L3_SHIFT) +#define pl4_pi(VA) (((VA_SIGN_POS(VA)) & L4_MASK) >> L4_SHIFT) + +/* + * pl*_i: generate index into pde/pte arrays in virtual space + */ +#define pl1_i(VA) (((VA_SIGN_POS(VA)) & L1_FRAME) >> L1_SHIFT) +#define pl2_i(VA) (((VA_SIGN_POS(VA)) & L2_FRAME) >> L2_SHIFT) +#define pl3_i(VA) (((VA_SIGN_POS(VA)) & L3_FRAME) >> L3_SHIFT) +#define pl4_i(VA) (((VA_SIGN_POS(VA)) & L4_FRAME) >> L4_SHIFT) +#define pl_i(va, lvl) \ + (((VA_SIGN_POS(va)) & ptp_masks[(lvl)-1]) >> ptp_shifts[(lvl)-1]) + #define PTP_MASK_INITIALIZER { L1_FRAME, L2_FRAME, L3_FRAME, L4_FRAME } #define PTP_SHIFT_INITIALIZER { L1_SHIFT, L2_SHIFT, L3_SHIFT, L4_SHIFT } #define NKPTP_INITIALIZER { NKL1_START_ENTRIES, NKL2_START_ENTRIES, \ @@ -233,6 +250,18 @@ #define PDES_INITIALIZER { L2_BASE, L3_BASE, L4_BASE } #define APDES_INITIALIZER { AL2_BASE, AL3_BASE, AL4_BASE } +/* + * PTP macros: + * a PTP's index is the PD index of the PDE that points to it + * a PTP's offset is the byte-offset in the PTE space that this PTP is at + * a PTP's VA is the first VA mapped by that PTP + * + * note that PAGE_SIZE == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_va2o(va, lvl) (pl_i(va, (lvl)+1) * PAGE_SIZE) + #define PTP_LEVELS 4 /* @@ -243,89 +272,338 @@ #define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ /* PG_AVAIL3 not used */ -#define PG_X 0 /* XXX dummy */ - /* * Number of PTE's per cache line. 8 byte pte, 64-byte cache line * Used to avoid false sharing of cache lines. */ #define NPTECL 8 -#include -#ifndef XEN -#define pmap_pa2pte(a) (a) -#define pmap_pte2pa(a) ((a) & PG_FRAME) -#define pmap_pte_set(p, n) do { *(p) = (n); } while (0) -#define pmap_pte_testset(p, n) \ - atomic_swap_ulong((volatile unsigned long *)p, n) -#define pmap_pte_setbits(p, b) \ - atomic_or_ulong((volatile unsigned long *)p, b) -#define pmap_pte_clearbits(p, b) \ - atomic_and_ulong((volatile unsigned long *)p, ~(b)) -#define pmap_pte_flush() /* nothing */ -#else -static __inline pt_entry_t -pmap_pa2pte(paddr_t pa) +#if defined(_KERNEL) && !defined(_LOCORE) +/* + * pmap data structures: see pmap.c for details of locking. + */ + +struct pmap; +typedef struct pmap *pmap_t; + +/* + * we maintain a list of all non-kernel pmaps + */ + +LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ + +/* + * the pmap structure + * + * note that the pm_obj contains the simple_lock, the reference count, + * page list, and number of PTPs within the pmap. + * + * pm_lock is the same as the spinlock for vm object 0. Changes to + * the other objects may only be made if that lock has been taken + * (the other object locks are only used when uvm_pagealloc is called) + */ + +struct pmap { + struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */ +#define pm_lock pm_obj[0].vmobjlock +#define pm_obj_l1 pm_obj[0] +#define pm_obj_l2 pm_obj[1] +#define pm_obj_l3 pm_obj[2] + LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ + pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ + paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ + struct vm_page *pm_ptphint[PTP_LEVELS-1]; + /* pointer to a PTP in our pmap */ + struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ + + int pm_flags; /* see below */ + + union descriptor *pm_ldt; /* user-set LDT */ + int pm_ldt_len; /* number of LDT entries */ + int pm_ldt_sel; /* LDT selector */ + u_int32_t pm_cpus; /* mask of CPUs using pmap */ +}; + +/* pm_flags */ +#define PMF_USER_LDT 0x01 /* pmap has user-set LDT */ + +/* + * for each managed physical page we maintain a list of 's + * which it is mapped at. the list is headed by a pv_head structure. + * there is one pv_head per managed phys page (allocated at boot time). + * the pv_head structure points to a list of pv_entry structures (each + * describes one mapping). + */ + +struct pv_entry { /* locked by its list's pvh_lock */ + SPLAY_ENTRY(pv_entry) pv_node; /* splay-tree node */ + struct pmap *pv_pmap; /* the pmap */ + vaddr_t pv_va; /* the virtual address */ + struct vm_page *pv_ptp; /* the vm_page of the PTP */ + struct pmap_cpu *pv_alloc_cpu; /* CPU allocated from */ +}; + +/* + * pv_entrys are dynamically allocated in chunks from a single page. + * we keep track of how many pv_entrys are in use for each page and + * we can free pv_entry pages if needed. there is one lock for the + * entire allocation system. + */ + +struct pv_page_info { + TAILQ_ENTRY(pv_page) pvpi_list; + struct pv_entry *pvpi_pvfree; + int pvpi_nfree; +}; + +/* + * number of pv_entry's in a pv_page + * (note: won't work on systems where NPBG isn't a constant) + */ + +#define PVE_PER_PVPAGE ((PAGE_SIZE - sizeof(struct pv_page_info)) / \ + sizeof(struct pv_entry)) + +/* + * a pv_page: where pv_entrys are allocated from + */ + +struct pv_page { + struct pv_page_info pvinfo; + struct pv_entry pvents[PVE_PER_PVPAGE]; +}; + +/* + * pmap_remove_record: a record of VAs that have been unmapped, used to + * flush TLB. if we have more than PMAP_RR_MAX then we stop recording. + */ + +#define PMAP_RR_MAX 16 /* max of 16 pages (64K) */ + +struct pmap_remove_record { + int prr_npages; + vaddr_t prr_vas[PMAP_RR_MAX]; +}; + +/* + * global kernel variables + */ + +/* PTDpaddr: is the physical address of the kernel's PDP */ +extern u_long PTDpaddr; + +extern struct pmap kernel_pmap_store; /* kernel pmap */ +extern int pmap_pg_g; /* do we support PG_G? */ + +extern paddr_t ptp_masks[]; +extern int ptp_shifts[]; +extern long nkptp[], nbpd[], nkptpmax[]; + +/* + * macros + */ + +#define pmap_kernel() (&kernel_pmap_store) +#define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) +#define pmap_wired_count(pmap) ((pmap)->pm_stats.wired_count) + +#define pmap_clear_modify(pg) pmap_clear_attrs(pg, PG_M) +#define pmap_clear_reference(pg) pmap_clear_attrs(pg, PG_U) +#define pmap_copy(DP,SP,D,L,S) +#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) +#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) +#define pmap_move(DP,SP,D,L,S) +#define pmap_phys_address(ppn) ptob(ppn) +#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ + + +/* + * prototypes + */ + +void pmap_activate __P((struct lwp *)); +void pmap_bootstrap __P((vaddr_t)); +bool pmap_clear_attrs __P((struct vm_page *, unsigned)); +void pmap_deactivate __P((struct lwp *)); +static void pmap_page_protect __P((struct vm_page *, vm_prot_t)); +void pmap_page_remove __P((struct vm_page *)); +static void pmap_protect __P((struct pmap *, vaddr_t, + vaddr_t, vm_prot_t)); +void pmap_remove __P((struct pmap *, vaddr_t, vaddr_t)); +bool pmap_test_attrs __P((struct vm_page *, unsigned)); +static void pmap_update_pg __P((vaddr_t)); +static void pmap_update_2pg __P((vaddr_t,vaddr_t)); +void pmap_write_protect __P((struct pmap *, vaddr_t, + vaddr_t, vm_prot_t)); +void pmap_changeprot_local(vaddr_t, vm_prot_t); + +vaddr_t reserve_dumppages __P((vaddr_t)); /* XXX: not a pmap fn */ + +void pmap_tlb_shootdown __P((pmap_t, vaddr_t, vaddr_t, pt_entry_t)); +void pmap_tlb_shootwait __P((void)); +void pmap_prealloc_lowmem_ptps __P((void)); + +#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ + +/* + * Do idle page zero'ing uncached to avoid polluting the cache. + */ +bool pmap_pageidlezero __P((paddr_t)); +#define PMAP_PAGEIDLEZERO(pa) pmap_pageidlezero((pa)) + +/* + * inline functions + */ + +static __inline void +pmap_remove_all(struct pmap *pmap) { - return (pt_entry_t)xpmap_ptom_masked(pa); + /* Nothing. */ } -static __inline paddr_t -pmap_pte2pa(pt_entry_t pte) +/* + * pmap_update_pg: flush one page from the TLB (or flush the whole thing + * if hardware doesn't support one-page flushing) + */ + +__inline static void +pmap_update_pg(va) + vaddr_t va; { - return xpmap_mtop_masked(pte & PG_FRAME); + invlpg(va); } -static __inline void -pmap_pte_set(pt_entry_t *pte, pt_entry_t npte) + +/* + * pmap_update_2pg: flush two pages from the TLB + */ + +__inline static void +pmap_update_2pg(va, vb) + vaddr_t va, vb; { - int s = splvm(); - xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(pte), npte); - splx(s); + invlpg(va); + invlpg(vb); } -static __inline pt_entry_t -pmap_pte_testset(volatile pt_entry_t *pte, pt_entry_t npte) +/* + * pmap_page_protect: change the protection of all recorded mappings + * of a managed page + * + * => this function is a frontend for pmap_page_remove/pmap_clear_attrs + * => we only have to worry about making the page more protected. + * unprotecting a page is done on-demand at fault time. + */ + +__inline static void +pmap_page_protect(struct vm_page *pg, vm_prot_t prot) { - int s = splvm(); - pt_entry_t opte = *pte; - xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(__UNVOLATILE(pte)), - npte); - xpq_flush_queue(); - splx(s); - return opte; + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { + (void) pmap_clear_attrs(pg, PG_RW); + } else { + pmap_page_remove(pg); + } + } } -static __inline void -pmap_pte_setbits(volatile pt_entry_t *pte, pt_entry_t bits) +/* + * pmap_protect: change the protection of pages in a pmap + * + * => this function is a frontend for pmap_remove/pmap_write_protect + * => we only have to worry about making the page more protected. + * unprotecting a page is done on-demand at fault time. + */ + +__inline static void +pmap_protect(pmap, sva, eva, prot) + struct pmap *pmap; + vaddr_t sva, eva; + vm_prot_t prot; { - int s = splvm(); - xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(__UNVOLATILE(pte)), - (*pte) | bits); - xpq_flush_queue(); - splx(s); + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { + pmap_write_protect(pmap, sva, eva, prot); + } else { + pmap_remove(pmap, sva, eva); + } + } } -static __inline void -pmap_pte_clearbits(volatile pt_entry_t *pte, pt_entry_t bits) -{ - int s = splvm(); - xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(__UNVOLATILE(pte)), - (*pte) & ~bits); - xpq_flush_queue(); - splx(s); -} +/* + * various address inlines + * + * vtopte: return a pointer to the PTE mapping a VA, works only for + * user and PT addresses + * + * kvtopte: return a pointer to the PTE mapping a kernel VA + */ -static __inline void -pmap_pte_flush(void) +#include + +static __inline pt_entry_t * +vtopte(vaddr_t va) { - int s = splvm(); - xpq_flush_queue(); - splx(s); + + KASSERT(va < (L4_SLOT_KERN * NBPD_L4)); + + return (PTE_BASE + pl1_i(va)); } + +static __inline pt_entry_t * +kvtopte(vaddr_t va) +{ + + KASSERT(va >= (L4_SLOT_KERN * NBPD_L4)); + +#ifdef LARGEPAGES + { + pd_entry_t *pde; + + pde = L2_BASE + pl2_i(va); + if (*pde & PG_PS) + return ((pt_entry_t *)pde); + } #endif -void pmap_prealloc_lowmem_ptps(void); -void pmap_changeprot_local(vaddr_t, vm_prot_t); + return (PTE_BASE + pl1_i(va)); +} + +#define pmap_pte_set(p, n) x86_atomic_testset_u64(p, n) +#define pmap_pte_setbits(p, b) x86_atomic_setbits_u64(p, b) +#define pmap_pte_clearbits(p, b) x86_atomic_clearbits_u64(p, b) +#define pmap_cpu_has_pg_n() (1) +#define pmap_cpu_has_invlpg (1) + +paddr_t vtophys __P((vaddr_t)); +vaddr_t pmap_map __P((vaddr_t, paddr_t, paddr_t, vm_prot_t)); +void pmap_cpu_init_early(struct cpu_info *); +void pmap_cpu_init_late(struct cpu_info *); +void sse2_zero_page(void *); +void sse2_copy_page(void *, void *); + +#if 0 /* XXXfvdl was USER_LDT, need to check if that can be supported */ +void pmap_ldt_cleanup __P((struct lwp *)); +#define PMAP_FORK +#endif /* USER_LDT */ + +/* + * Hooks for the pool allocator. + */ +#define POOL_VTOPHYS(va) vtophys((vaddr_t) (va)) + +/* + * TLB shootdown mailbox. + */ + +struct pmap_mbox { + volatile void *mb_pointer; + volatile uintptr_t mb_addr1; + volatile uintptr_t mb_addr2; + volatile uintptr_t mb_head; + volatile uintptr_t mb_tail; + volatile uintptr_t mb_global; +}; +#endif /* _KERNEL && !_LOCORE */ #endif /* _AMD64_PMAP_H_ */