Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

===================================================================
RCS file: /ftp/cvs/cvsroot/src/sys/arch/amd64/include/pmap.h,v
rcsdiff: /ftp/cvs/cvsroot/src/sys/arch/amd64/include/pmap.h,v: warning: Unknown phrases like `commitid ...;' are present.
retrieving revision 1.10.4.9
retrieving revision 1.11
diff -u -p -r1.10.4.9 -r1.11
--- src/sys/arch/amd64/include/pmap.h	2007/12/03 18:34:42	1.10.4.9
+++ src/sys/arch/amd64/include/pmap.h	2007/08/29 23:38:03	1.11
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.10.4.9 2007/12/03 18:34:42 ad Exp $	*/
+/*	$NetBSD: pmap.h,v 1.11 2007/08/29 23:38:03 ad Exp $	*/
 
 /*
  *
@@ -67,26 +67,25 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+/*
+ * pmap.h: see pmap.c for the history of this pmap module.
+ */
+
 #ifndef	_AMD64_PMAP_H_
 #define	_AMD64_PMAP_H_
 
+#ifndef _LOCORE
 #if defined(_KERNEL_OPT)
-#include "opt_xen.h"
+#include "opt_largepages.h"
 #endif
 
-#include <sys/atomic.h>
-
+#include <machine/cpufunc.h>
 #include <machine/pte.h>
 #include <machine/segments.h>
-#ifdef _KERNEL
-#include <machine/cpufunc.h>
-#endif
+#include <machine/atomic.h>
 
 #include <uvm/uvm_object.h>
-#ifdef XEN
-#include <xen/xenfunc.h>
-#include <xen/xenpmap.h>
-#endif /* XEN */
+#endif
 
 /*
  * The x86_64 pmap module closely resembles the i386 one. It uses
@@ -159,12 +158,7 @@
 #define VA_SIGN_POS(va)		((va) & ~VA_SIGN_MASK)
 
 #define L4_SLOT_PTE		255
-#ifndef XEN
 #define L4_SLOT_KERN		256
-#else
-/* Xen use slots 256-272, let's move farther */
-#define L4_SLOT_KERN		320
-#endif
 #define L4_SLOT_KERNBASE	511
 #define L4_SLOT_APTE		510
 
@@ -221,8 +215,31 @@
 
 #define NTOPLEVEL_PDES		(PAGE_SIZE / (sizeof (pd_entry_t)))
 
+#define KERNSPACE		(NKL4_ENTRIES * NBPD_L4)
+
 #define NPDPG			(PAGE_SIZE / sizeof (pd_entry_t))
 
+#define ptei(VA)	(((VA_SIGN_POS(VA)) & L1_MASK) >> L1_SHIFT)
+
+/*
+ * pl*_pi: index in the ptp page for a pde mapping a VA.
+ * (pl*_i below is the index in the virtual array of all pdes per level)
+ */
+#define pl1_pi(VA)	(((VA_SIGN_POS(VA)) & L1_MASK) >> L1_SHIFT)
+#define pl2_pi(VA)	(((VA_SIGN_POS(VA)) & L2_MASK) >> L2_SHIFT)
+#define pl3_pi(VA)	(((VA_SIGN_POS(VA)) & L3_MASK) >> L3_SHIFT)
+#define pl4_pi(VA)	(((VA_SIGN_POS(VA)) & L4_MASK) >> L4_SHIFT)
+
+/*
+ * pl*_i: generate index into pde/pte arrays in virtual space
+ */
+#define pl1_i(VA)	(((VA_SIGN_POS(VA)) & L1_FRAME) >> L1_SHIFT)
+#define pl2_i(VA)	(((VA_SIGN_POS(VA)) & L2_FRAME) >> L2_SHIFT)
+#define pl3_i(VA)	(((VA_SIGN_POS(VA)) & L3_FRAME) >> L3_SHIFT)
+#define pl4_i(VA)	(((VA_SIGN_POS(VA)) & L4_FRAME) >> L4_SHIFT)
+#define pl_i(va, lvl) \
+        (((VA_SIGN_POS(va)) & ptp_masks[(lvl)-1]) >> ptp_shifts[(lvl)-1])
+
 #define PTP_MASK_INITIALIZER	{ L1_FRAME, L2_FRAME, L3_FRAME, L4_FRAME }
 #define PTP_SHIFT_INITIALIZER	{ L1_SHIFT, L2_SHIFT, L3_SHIFT, L4_SHIFT }
 #define NKPTP_INITIALIZER	{ NKL1_START_ENTRIES, NKL2_START_ENTRIES, \
@@ -233,6 +250,18 @@
 #define PDES_INITIALIZER	{ L2_BASE, L3_BASE, L4_BASE }
 #define APDES_INITIALIZER	{ AL2_BASE, AL3_BASE, AL4_BASE }
 
+/*
+ * PTP macros:
+ *   a PTP's index is the PD index of the PDE that points to it
+ *   a PTP's offset is the byte-offset in the PTE space that this PTP is at
+ *   a PTP's VA is the first VA mapped by that PTP
+ *
+ * note that PAGE_SIZE == number of bytes in a PTP (4096 bytes == 1024 entries)
+ *           NBPD == number of bytes a PTP can map (4MB)
+ */
+
+#define ptp_va2o(va, lvl)	(pl_i(va, (lvl)+1) * PAGE_SIZE)
+
 #define PTP_LEVELS	4
 
 /*
@@ -243,89 +272,338 @@
 #define PG_PVLIST	PG_AVAIL2	/* mapping has entry on pvlist */
 /* PG_AVAIL3 not used */
 
-#define	PG_X		0		/* XXX dummy */
-
 /*
  * Number of PTE's per cache line.  8 byte pte, 64-byte cache line
  * Used to avoid false sharing of cache lines.
  */
 #define NPTECL		8
 
-#include <x86/pmap.h>
 
-#ifndef XEN
-#define pmap_pa2pte(a)			(a)
-#define pmap_pte2pa(a)			((a) & PG_FRAME)
-#define pmap_pte_set(p, n)		do { *(p) = (n); } while (0)
-#define pmap_pte_testset(p, n)		\
-    atomic_swap_ulong((volatile unsigned long *)p, n)
-#define pmap_pte_setbits(p, b)		\
-    atomic_or_ulong((volatile unsigned long *)p, b)
-#define pmap_pte_clearbits(p, b)	\
-    atomic_and_ulong((volatile unsigned long *)p, ~(b))
-#define pmap_pte_flush()		/* nothing */
-#else
-static __inline pt_entry_t
-pmap_pa2pte(paddr_t pa)
+#if defined(_KERNEL) && !defined(_LOCORE)
+/*
+ * pmap data structures: see pmap.c for details of locking.
+ */
+
+struct pmap;
+typedef struct pmap *pmap_t;
+
+/*
+ * we maintain a list of all non-kernel pmaps
+ */
+
+LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */
+
+/*
+ * the pmap structure
+ *
+ * note that the pm_obj contains the simple_lock, the reference count,
+ * page list, and number of PTPs within the pmap.
+ *
+ * pm_lock is the same as the spinlock for vm object 0. Changes to
+ * the other objects may only be made if that lock has been taken
+ * (the other object locks are only used when uvm_pagealloc is called)
+ */
+
+struct pmap {
+	struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */
+#define	pm_lock	pm_obj[0].vmobjlock
+#define pm_obj_l1 pm_obj[0]
+#define pm_obj_l2 pm_obj[1]
+#define pm_obj_l3 pm_obj[2]
+	LIST_ENTRY(pmap) pm_list;	/* list (lck by pm_list lock) */
+	pd_entry_t *pm_pdir;		/* VA of PD (lck by object lock) */
+	paddr_t pm_pdirpa;		/* PA of PD (read-only after create) */
+	struct vm_page *pm_ptphint[PTP_LEVELS-1];
+					/* pointer to a PTP in our pmap */
+	struct pmap_statistics pm_stats;  /* pmap stats (lck by object lock) */
+
+	int pm_flags;			/* see below */
+
+	union descriptor *pm_ldt;	/* user-set LDT */
+	int pm_ldt_len;			/* number of LDT entries */
+	int pm_ldt_sel;			/* LDT selector */
+	u_int32_t pm_cpus;		/* mask of CPUs using pmap */
+};
+
+/* pm_flags */
+#define	PMF_USER_LDT	0x01	/* pmap has user-set LDT */
+
+/*
+ * for each managed physical page we maintain a list of <PMAP,VA>'s
+ * which it is mapped at.  the list is headed by a pv_head structure.
+ * there is one pv_head per managed phys page (allocated at boot time).
+ * the pv_head structure points to a list of pv_entry structures (each
+ * describes one mapping).
+ */
+
+struct pv_entry {                       /* locked by its list's pvh_lock */
+        SPLAY_ENTRY(pv_entry) pv_node;  /* splay-tree node */
+        struct pmap *pv_pmap;           /* the pmap */
+        vaddr_t pv_va;                  /* the virtual address */
+        struct vm_page *pv_ptp;         /* the vm_page of the PTP */
+	struct pmap_cpu *pv_alloc_cpu;	/* CPU allocated from */
+};    
+
+/*
+ * pv_entrys are dynamically allocated in chunks from a single page.
+ * we keep track of how many pv_entrys are in use for each page and
+ * we can free pv_entry pages if needed.  there is one lock for the
+ * entire allocation system.
+ */
+
+struct pv_page_info {
+	TAILQ_ENTRY(pv_page) pvpi_list;
+	struct pv_entry *pvpi_pvfree;
+	int pvpi_nfree;
+};
+
+/*
+ * number of pv_entry's in a pv_page
+ * (note: won't work on systems where NPBG isn't a constant)
+ */
+
+#define PVE_PER_PVPAGE ((PAGE_SIZE - sizeof(struct pv_page_info)) / \
+			sizeof(struct pv_entry))
+
+/*
+ * a pv_page: where pv_entrys are allocated from
+ */
+
+struct pv_page {
+	struct pv_page_info pvinfo;
+	struct pv_entry pvents[PVE_PER_PVPAGE];
+};
+
+/*
+ * pmap_remove_record: a record of VAs that have been unmapped, used to
+ * flush TLB.  if we have more than PMAP_RR_MAX then we stop recording.
+ */
+
+#define PMAP_RR_MAX	16	/* max of 16 pages (64K) */
+
+struct pmap_remove_record {
+	int prr_npages;
+	vaddr_t prr_vas[PMAP_RR_MAX];
+};
+
+/*
+ * global kernel variables
+ */
+
+/* PTDpaddr: is the physical address of the kernel's PDP */
+extern u_long PTDpaddr;
+
+extern struct pmap kernel_pmap_store;	/* kernel pmap */
+extern int pmap_pg_g;			/* do we support PG_G? */
+
+extern paddr_t ptp_masks[];
+extern int ptp_shifts[];
+extern long nkptp[], nbpd[], nkptpmax[];
+
+/*
+ * macros
+ */
+
+#define	pmap_kernel()			(&kernel_pmap_store)
+#define	pmap_resident_count(pmap)	((pmap)->pm_stats.resident_count)
+#define	pmap_wired_count(pmap)		((pmap)->pm_stats.wired_count)
+
+#define pmap_clear_modify(pg)		pmap_clear_attrs(pg, PG_M)
+#define pmap_clear_reference(pg)	pmap_clear_attrs(pg, PG_U)
+#define pmap_copy(DP,SP,D,L,S)		
+#define pmap_is_modified(pg)		pmap_test_attrs(pg, PG_M)
+#define pmap_is_referenced(pg)		pmap_test_attrs(pg, PG_U)
+#define pmap_move(DP,SP,D,L,S)		
+#define pmap_phys_address(ppn)		ptob(ppn)
+#define pmap_valid_entry(E) 		((E) & PG_V) /* is PDE or PTE valid? */
+
+
+/*
+ * prototypes
+ */
+
+void		pmap_activate __P((struct lwp *));
+void		pmap_bootstrap __P((vaddr_t));
+bool		pmap_clear_attrs __P((struct vm_page *, unsigned));
+void		pmap_deactivate __P((struct lwp *));
+static void	pmap_page_protect __P((struct vm_page *, vm_prot_t));
+void		pmap_page_remove  __P((struct vm_page *));
+static void	pmap_protect __P((struct pmap *, vaddr_t,
+				vaddr_t, vm_prot_t));
+void		pmap_remove __P((struct pmap *, vaddr_t, vaddr_t));
+bool		pmap_test_attrs __P((struct vm_page *, unsigned));
+static void	pmap_update_pg __P((vaddr_t));
+static void	pmap_update_2pg __P((vaddr_t,vaddr_t));
+void		pmap_write_protect __P((struct pmap *, vaddr_t,
+				vaddr_t, vm_prot_t));
+void		pmap_changeprot_local(vaddr_t, vm_prot_t);
+
+vaddr_t reserve_dumppages __P((vaddr_t)); /* XXX: not a pmap fn */
+
+void	pmap_tlb_shootdown __P((pmap_t, vaddr_t, vaddr_t, pt_entry_t));
+void	pmap_tlb_shootwait __P((void));
+void	pmap_prealloc_lowmem_ptps __P((void));
+
+#define PMAP_GROWKERNEL		/* turn on pmap_growkernel interface */
+
+/*
+ * Do idle page zero'ing uncached to avoid polluting the cache.
+ */
+bool		pmap_pageidlezero __P((paddr_t));
+#define	PMAP_PAGEIDLEZERO(pa)	pmap_pageidlezero((pa))
+
+/*
+ * inline functions
+ */
+
+static __inline void
+pmap_remove_all(struct pmap *pmap)
 {
-	return (pt_entry_t)xpmap_ptom_masked(pa);
+	/* Nothing. */
 }
 
-static __inline paddr_t
-pmap_pte2pa(pt_entry_t pte)
+/*
+ * pmap_update_pg: flush one page from the TLB (or flush the whole thing
+ *	if hardware doesn't support one-page flushing)
+ */
+
+__inline static void
+pmap_update_pg(va)
+	vaddr_t va;
 {
-	return xpmap_mtop_masked(pte & PG_FRAME);
+	invlpg(va);
 }
-static __inline void
-pmap_pte_set(pt_entry_t *pte, pt_entry_t npte)
+
+/*
+ * pmap_update_2pg: flush two pages from the TLB
+ */
+
+__inline static void
+pmap_update_2pg(va, vb)
+	vaddr_t va, vb;
 {
-	int s = splvm();
-	xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(pte), npte);
-	splx(s);
+	invlpg(va);
+	invlpg(vb);
 }
 
-static __inline pt_entry_t
-pmap_pte_testset(volatile pt_entry_t *pte, pt_entry_t npte)
+/*
+ * pmap_page_protect: change the protection of all recorded mappings
+ *	of a managed page
+ *
+ * => this function is a frontend for pmap_page_remove/pmap_clear_attrs
+ * => we only have to worry about making the page more protected.
+ *	unprotecting a page is done on-demand at fault time.
+ */
+
+__inline static void
+pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 {
-	int s = splvm();
-	pt_entry_t opte = *pte;
-	xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(__UNVOLATILE(pte)),
-	    npte);
-	xpq_flush_queue();
-	splx(s);
-	return opte;
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
+			(void) pmap_clear_attrs(pg, PG_RW);
+		} else {
+			pmap_page_remove(pg);
+		}
+	}
 }
 
-static __inline void
-pmap_pte_setbits(volatile pt_entry_t *pte, pt_entry_t bits)
+/*
+ * pmap_protect: change the protection of pages in a pmap
+ *
+ * => this function is a frontend for pmap_remove/pmap_write_protect
+ * => we only have to worry about making the page more protected.
+ *	unprotecting a page is done on-demand at fault time.
+ */
+
+__inline static void
+pmap_protect(pmap, sva, eva, prot)
+	struct pmap *pmap;
+	vaddr_t sva, eva;
+	vm_prot_t prot;
 {
-	int s = splvm();
-	xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(__UNVOLATILE(pte)),
-	    (*pte) | bits);
-	xpq_flush_queue();
-	splx(s);
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
+			pmap_write_protect(pmap, sva, eva, prot);
+		} else {
+			pmap_remove(pmap, sva, eva);
+		}
+	}
 }
 
-static __inline void
-pmap_pte_clearbits(volatile pt_entry_t *pte, pt_entry_t bits)
-{	
-	int s = splvm();
-	xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(__UNVOLATILE(pte)),
-	    (*pte) & ~bits);
-	xpq_flush_queue();
-	splx(s);
-}
+/*
+ * various address inlines
+ *
+ *  vtopte: return a pointer to the PTE mapping a VA, works only for
+ *  user and PT addresses
+ *
+ *  kvtopte: return a pointer to the PTE mapping a kernel VA
+ */
 
-static __inline void
-pmap_pte_flush(void)
+#include <lib/libkern/libkern.h>
+
+static __inline pt_entry_t *
+vtopte(vaddr_t va)
 {
-	int s = splvm();
-	xpq_flush_queue();
-	splx(s);
+
+	KASSERT(va < (L4_SLOT_KERN * NBPD_L4));
+
+	return (PTE_BASE + pl1_i(va));
 }
+
+static __inline pt_entry_t *
+kvtopte(vaddr_t va)
+{
+
+	KASSERT(va >= (L4_SLOT_KERN * NBPD_L4));
+
+#ifdef LARGEPAGES
+	{
+		pd_entry_t *pde;
+
+		pde = L2_BASE + pl2_i(va);
+		if (*pde & PG_PS)
+			return ((pt_entry_t *)pde);
+	}
 #endif
 
-void pmap_prealloc_lowmem_ptps(void);
-void pmap_changeprot_local(vaddr_t, vm_prot_t);
+	return (PTE_BASE + pl1_i(va));
+}
+
+#define pmap_pte_set(p, n)		x86_atomic_testset_u64(p, n)
+#define pmap_pte_setbits(p, b)		x86_atomic_setbits_u64(p, b)
+#define pmap_pte_clearbits(p, b)	x86_atomic_clearbits_u64(p, b)
+#define pmap_cpu_has_pg_n()		(1)
+#define pmap_cpu_has_invlpg		(1)
+
+paddr_t vtophys __P((vaddr_t));
+vaddr_t	pmap_map __P((vaddr_t, paddr_t, paddr_t, vm_prot_t));
+void	pmap_cpu_init_early(struct cpu_info *);
+void	pmap_cpu_init_late(struct cpu_info *);
+void	sse2_zero_page(void *);
+void	sse2_copy_page(void *, void *);
+
+#if 0   /* XXXfvdl was USER_LDT, need to check if that can be supported */
+void	pmap_ldt_cleanup __P((struct lwp *));
+#define	PMAP_FORK
+#endif /* USER_LDT */
+
+/* 
+ * Hooks for the pool allocator.
+ */
+#define	POOL_VTOPHYS(va)	vtophys((vaddr_t) (va))
+
+/*
+ * TLB shootdown mailbox.
+ */
+
+struct pmap_mbox {
+	volatile void		*mb_pointer;
+	volatile uintptr_t	mb_addr1;
+	volatile uintptr_t	mb_addr2;
+	volatile uintptr_t	mb_head;
+	volatile uintptr_t	mb_tail;
+	volatile uintptr_t	mb_global;
+};
 
+#endif /* _KERNEL && !_LOCORE */
 #endif	/* _AMD64_PMAP_H_ */