Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v rcsdiff: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.50.2.2 retrieving revision 1.122 diff -u -p -r1.50.2.2 -r1.122 --- src/sys/arch/i386/i386/locore.S 2007/09/24 10:56:48 1.50.2.2 +++ src/sys/arch/i386/i386/locore.S 2016/05/14 09:51:56 1.122 @@ -1,10 +1,35 @@ -/* $NetBSD: locore.S,v 1.50.2.2 2007/09/24 10:56:48 yamt Exp $ */ +/* $NetBSD: locore.S,v 1.122 2016/05/14 09:51:56 maxv Exp $ */ /* * Copyright-o-rama! */ /* + * Copyright (c) 2006 Manuel Bouyer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* * Copyright (c) 2001 Wasabi Systems, Inc. * All rights reserved. * @@ -39,13 +64,12 @@ * POSSIBILITY OF SUCH DAMAGE. */ - /*- - * Copyright (c) 1998, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc. + * Copyright (c) 1998, 2000, 2004, 2006, 2007, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum. + * by Charles M. Hannum, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -55,13 +79,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED @@ -110,13 +127,18 @@ * @(#)locore.s 7.3 (Berkeley) 5/13/91 */ +#include +__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.122 2016/05/14 09:51:56 maxv Exp $"); + #include "opt_compat_oldboot.h" -#include "opt_cputype.h" +#include "opt_copy_symtab.h" #include "opt_ddb.h" +#include "opt_modular.h" +#include "opt_multiboot.h" #include "opt_realmem.h" #include "opt_vm86.h" +#include "opt_xen.h" -#include "npx.h" #include "assym.h" #include "lapic.h" #include "ioapic.h" @@ -130,24 +152,112 @@ #include #include #include -#include -#include #include #include +#ifndef XEN +#include +#endif -/* XXX temporary kluge; these should not be here */ /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ #include +#ifndef XEN +#define _RELOC(x) ((x) - KERNBASE) +#else +#define _RELOC(x) ((x)) +#endif /* XEN */ +#define RELOC(x) _RELOC(_C_LABEL(x)) + +/* 32bit version of PG_NX */ +#define PG_NX32 0x80000000 + +#ifndef PAE +#define PROC0_PDIR_OFF 0 +#else +#define PROC0_L3_OFF 0 +#define PROC0_PDIR_OFF 1 * PAGE_SIZE +#endif + +#define PROC0_STK_OFF (PROC0_PDIR_OFF + PDP_SIZE * PAGE_SIZE) +#define PROC0_PTP1_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE) + +/* + * fillkpt - Fill in a kernel page table + * eax = pte (page frame | control | status) + * ebx = page table address + * ecx = number of pages to map + * + * For PAE, each entry is 8 bytes long: we must set the 4 upper bytes to 0. + * This is done by the first instruction of fillkpt. In the non-PAE case, this + * instruction just clears the page table entry. + */ + +#define fillkpt \ +1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \ + movl %eax,(%ebx) ; /* store phys addr */ \ + addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ + addl $PAGE_SIZE,%eax ; /* next phys page */ \ + loop 1b ; + +/* + * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit. + */ +#define fillkpt_nox \ + pushl %ebp ; \ + movl RELOC(nox_flag),%ebp ; \ +1: movl %ebp,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: NX */ \ + movl %eax,(%ebx) ; /* store phys addr */ \ + addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ + addl $PAGE_SIZE,%eax ; /* next phys page */ \ + loop 1b ; \ + popl %ebp ; + +/* + * killkpt - Destroy a kernel page table + * ebx = page table address + * ecx = number of pages to destroy + */ +#define killkpt \ +1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper bits (for PAE) */ \ + movl $0,(%ebx) ; \ + addl $PDE_SIZE,%ebx ; \ + loop 1b ; + + +#ifdef XEN +/* + * Xen guest identifier and loader selection + */ +.section __xen_guest + .ascii "GUEST_OS=netbsd,GUEST_VER=3.0,XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0xc0000000" /* KERNBASE */ + .ascii ",ELF_PADDR_OFFSET=0xc0000000" /* KERNBASE */ + .ascii ",VIRT_ENTRY=0xc0100000" /* KERNTEXTOFF */ + .ascii ",HYPERCALL_PAGE=0x00000101" + /* (???+HYPERCALL_PAGE_OFFSET)/PAGE_SIZE) */ +#ifdef PAE + .ascii ",PAE=yes[extended-cr3]" +#endif + .ascii ",LOADER=generic" +#if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB) + .ascii ",BSD_SYMTAB=yes" +#endif + .byte 0 +#endif /* XEN */ + /* * Initialization */ .data - .globl _C_LABEL(cpu) + .globl _C_LABEL(nox_flag) + .globl _C_LABEL(cputype) + .globl _C_LABEL(cpuid_level) .globl _C_LABEL(esym) + .globl _C_LABEL(eblob) .globl _C_LABEL(atdevbase) - .globl _C_LABEL(proc0uarea),_C_LABEL(PDPpaddr) + .globl _C_LABEL(lwp0uarea) + .globl _C_LABEL(PDPpaddr) .globl _C_LABEL(gdt) .globl _C_LABEL(idt) .globl _C_LABEL(lapic_tpr) @@ -159,41 +269,76 @@ .align 12 #endif .globl _C_LABEL(local_apic), _C_LABEL(lapic_id) -_C_LABEL(local_apic): + .type _C_LABEL(local_apic), @object +LABEL(local_apic) .space LAPIC_ID -_C_LABEL(lapic_id): +END(local_apic) + .type _C_LABEL(lapic_id), @object +LABEL(lapic_id) .long 0x00000000 - .space LAPIC_TPRI-(LAPIC_ID+4) -_C_LABEL(lapic_tpr): - .space LAPIC_PPRI-LAPIC_TPRI + .space LAPIC_TPRI-(LAPIC_ID+4) +END(lapic_id) + .type _C_LABEL(lapic_tpr), @object +LABEL(lapic_tpr) + .space LAPIC_PPRI-LAPIC_TPRI +END(lapic_tpr) + .type _C_LABEL(lapic_ppr), @object _C_LABEL(lapic_ppr): .space LAPIC_ISR-LAPIC_PPRI +END(lapic_ppr) + .type _C_LABEL(lapic_isr), @object _C_LABEL(lapic_isr): .space PAGE_SIZE-LAPIC_ISR +END(lapic_isr) #else -_C_LABEL(lapic_tpr): + .type _C_LABEL(lapic_tpr), @object +LABEL(lapic_tpr) .long 0 +END(lapic_tpr) #endif -_C_LABEL(cpu): .long 0 # are we 80486, Pentium, or.. -_C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual -_C_LABEL(proc0uarea): .long 0 -_C_LABEL(PDPpaddr): .long 0 # paddr of PDP, for libkvm + .type _C_LABEL(nox_flag), @object +LABEL(nox_flag) .long 0 /* 32bit NOX flag, set if supported */ +END(nox_flag) + .type _C_LABEL(cputype), @object +LABEL(cputype) .long 0 /* are we 80486, Pentium, or.. */ +END(cputype) + .type _C_LABEL(cpuid_level), @object +LABEL(cpuid_level) .long 0 +END(cpuid_level) + .type _C_LABEL(atdevbase), @object +LABEL(atdevbase) .long 0 /* location of start of iomem in virt */ +END(atdevbase) + .type _C_LABEL(lwp0uarea), @object +LABEL(lwp0uarea) .long 0 +END(lwp0uarea) + .type _C_LABEL(PDPpaddr), @object +LABEL(PDPpaddr) .long 0 /* paddr of PDP, for libkvm */ +END(PDPpaddr) + .type _C_LABEL(tablesize), @object _C_LABEL(tablesize): .long 0 +END(tablesize) - .space 512 + /* Space for the temporary stack */ + .size tmpstk, tmpstk - . + .space 512 tmpstk: - - -#define _RELOC(x) ((x) - KERNBASE) -#define RELOC(x) _RELOC(_C_LABEL(x)) +#ifdef XEN + .align PAGE_SIZE, 0x0 /* Align on page boundary */ +LABEL(tmpgdt) + .space PAGE_SIZE /* Xen expects a page */ +END(tmpgdt) +#endif /* XEN */ .text .globl _C_LABEL(kernel_text) .set _C_LABEL(kernel_text),KERNTEXTOFF - .globl start -start: movw $0x1234,0x472 # warm boot +ENTRY(start) +#ifndef XEN + + /* Warm boot */ + movw $0x1234,0x472 #if defined(MULTIBOOT) jmp 1f @@ -218,7 +363,7 @@ _C_LABEL(Multiboot_Header): * space to process it later -- after we are relocated. It will * be safer to run complex C code than doing it at this point. */ - pushl %ebx # Address of Multiboot information + pushl %ebx /* Address of Multiboot information */ call _C_LABEL(multiboot_pre_reloc) addl $4,%esp jmp 2f @@ -227,11 +372,14 @@ _C_LABEL(Multiboot_Header): 1: /* * At this point, we know that a NetBSD-specific boot loader - * booted this kernel. The stack carries the following parameters: - * (boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem), - * 4 bytes each. + * booted this kernel. + * + * Load parameters from the stack (32 bits): + * boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem + * We are not interested in 'bootdev'. */ - addl $4,%esp # Discard return address to boot loader + + addl $4,%esp /* Discard return address to boot loader */ call _C_LABEL(native_loader) addl $24,%esp @@ -245,7 +393,7 @@ _C_LABEL(Multiboot_Header): movw %ax,%fs movw %ax,%gs decl %eax - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL + movl %eax,RELOC(cpuid_level) /* Find out our CPU type. */ @@ -282,11 +430,11 @@ isnx586: * Don't try cpuid, as Nx586s reportedly don't support the * PSL_ID bit. */ - movl $CPU_NX586,RELOC(cpu) + movl $CPU_NX586,RELOC(cputype) jmp 2f is386: - movl $CPU_386,RELOC(cpu) + movl $CPU_386,RELOC(cputype) jmp 2f try486: /* Try to toggle identification flag; does not exist on early 486s. */ @@ -305,7 +453,7 @@ try486: /* Try to toggle identification testl %eax,%eax jnz try586 -is486: movl $CPU_486,RELOC(cpu) +is486: movl $CPU_486,RELOC(cputype) /* * Check Cyrix CPU * Cyrix CPUs do not change the undefined flags following @@ -323,30 +471,30 @@ is486: movl $CPU_486,RELOC(cpu) popfl jmp 2f trycyrix486: - movl $CPU_6x86,RELOC(cpu) # set CPU type + movl $CPU_6x86,RELOC(cputype) /* set CPU type */ /* * Check for Cyrix 486 CPU by seeing if the flags change during a * divide. This is documented in the Cx486SLC/e SMM Programmer's * Guide. */ xorl %edx,%edx - cmpl %edx,%edx # set flags to known state + cmpl %edx,%edx /* set flags to known state */ pushfl - popl %ecx # store flags in ecx + popl %ecx /* store flags in ecx */ movl $-1,%eax movl $4,%ebx - divl %ebx # do a long division + divl %ebx /* do a long division */ pushfl popl %eax - xorl %ecx,%eax # are the flags different? - testl $0x8d5,%eax # only check C|PF|AF|Z|N|V - jne 2f # yes; must be Cyrix 6x86 CPU - movl $CPU_486DLC,RELOC(cpu) # set CPU type + xorl %ecx,%eax /* are the flags different? */ + testl $0x8d5,%eax /* only check C|PF|AF|Z|N|V */ + jne 2f /* yes; must be Cyrix 6x86 CPU */ + movl $CPU_486DLC,RELOC(cputype) /* set CPU type */ #ifndef CYRIX_CACHE_WORKS /* Disable caching of the ISA hole only. */ invd - movb $CCR0,%al # Configuration Register index (CCR0) + movb $CCR0,%al /* Configuration Register index (CCR0) */ outb %al,$0x22 inb $0x23,%al orb $(CCR0_NC1|CCR0_BARB),%al @@ -358,8 +506,8 @@ trycyrix486: invd #else /* CYRIX_CACHE_WORKS */ /* Set cache parameters */ - invd # Start with guaranteed clean cache - movb $CCR0,%al # Configuration Register index (CCR0) + invd /* Start with guaranteed clean cache */ + movb $CCR0,%al /* Configuration Register index (CCR0) */ outb %al,$0x22 inb $0x23,%al andb $~CCR0_NC0,%al @@ -405,7 +553,17 @@ trycyrix486: try586: /* Use the `cpuid' instruction. */ xorl %eax,%eax cpuid - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL + movl %eax,RELOC(cpuid_level) + + /* + * Retrieve the NX/XD flag. We use the 32bit version of PG_NX. + */ + movl $0x80000001,%eax + cpuid + andl $CPUID_NOX,%edx + jz no_NOX + movl $PG_NX32,RELOC(nox_flag) +no_NOX: 2: /* @@ -417,40 +575,50 @@ try586: /* Use the `cpuid' instruction. * The boot program should check: * text+data <= &stack_variable - more_space_for_stack * text+data+bss+pad+space_for_page_tables <= end_of_memory - * Oops, the gdt is in the carcass of the boot program so clearing + * + * XXX: the gdt is in the carcass of the boot program so clearing * the rest of memory is still not possible. */ - movl $_RELOC(tmpstk),%esp # bootstrap stack end location + movl $_RELOC(tmpstk),%esp /* - * Virtual address space of kernel: + * There are two different layouts possible, depending on whether PAE is + * enabled or not. * - * text | data | bss | [syms] | page dir | proc0 kstack | L1 ptp - * 0 1 2 3 - */ - -#define PROC0_PDIR_OFF 0 -#define PROC0_STK_OFF (PROC0_PDIR_OFF + PAGE_SIZE) -#define PROC0_PTP1_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE) - -/* - * fillkpt - * eax = pte (page frame | control | status) - * ebx = page table address - * ecx = number of pages to map + * If PAE is not enabled, there are two levels of pages: PD -> PT. They will + * be referred to as: L2 -> L1. L2 is 1 page long. The BOOTSTRAP TABLES have + * the following layout: + * +-----+------------+----+ + * | L2 -> PROC0 STK -> L1 | + * +-----+------------+----+ + * + * If PAE is enabled, there are three levels of pages: PDP -> PD -> PT. They + * will be referred to as: L3 -> L2 -> L1. L3 is 1 page long, L2 is 4 page + * long. The BOOTSTRAP TABLES have the following layout: + * +-----+-----+------------+----+ + * | L3 -> L2 -> PROC0 STK -> L1 | + * +-----+-----+------------+----+ + * + * Virtual address space of the kernel in both cases: + * +------+--------+------+-----+--------+---------------------+----------- + * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | BOOTSTRAP + * +------+--------+------+-----+--------+---------------------+----------- + * (1) (2) (3) + * + * -------+------------+ + * TABLES | ISA IO MEM | + * -------+------------+ + * (4) + * + * PROC0 STK is obviously not linked as a page level. It just happens to be + * caught between L2 and L1. */ -#define fillkpt \ -1: movl %eax,(%ebx) ; /* store phys addr */ \ - addl $4,%ebx ; /* next pte/pde */ \ - addl $PAGE_SIZE,%eax ; /* next phys page */ \ - loop 1b ; \ - - - /* Find end of kernel image. */ + /* Find end of kernel image; brings us on (1). */ movl $RELOC(end),%edi -#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE) - /* Save the symbols (if loaded). */ + +#if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB) + /* Save the symbols (if loaded); brinds us on (2). */ movl RELOC(esym),%eax testl %eax,%eax jz 1f @@ -459,128 +627,198 @@ try586: /* Use the `cpuid' instruction. 1: #endif - /* Compute sizes */ - movl %edi,%esi # edi = esym ? esym : end - addl $PGOFSET,%esi # page align up + /* Skip over any modules/blobs; brings us on (3). */ + movl RELOC(eblob),%eax + testl %eax,%eax + jz 1f + subl $KERNBASE,%eax + movl %eax,%edi +1: + + /* We are on (3). Align up for BOOTSTRAP TABLES. */ + movl %edi,%esi + addl $PGOFSET,%esi andl $~PGOFSET,%esi + /* nkptp[1] = (esi + ~L2_FRAME) >> L2_SHIFT + 1; */ movl %esi,%eax - addl $-L2_FRAME,%eax + addl $~L2_FRAME,%eax shrl $L2_SHIFT,%eax - movl %eax,RELOC(nkptp)+1*4 + incl %eax /* one more PTP for VAs stolen by bootstrap */ +1: movl %eax,RELOC(nkptp)+1*4 - addl $(1+UPAGES),%eax + /* tablesize = (PDP_SIZE + UPAGES + nkptp) << PGSHIFT; */ + addl $(PDP_SIZE+UPAGES),%eax +#ifdef PAE + incl %eax /* one more page for L3 */ + shll $PGSHIFT+1,%eax /* PTP tables are twice larger with PAE */ +#else shll $PGSHIFT,%eax +#endif movl %eax,RELOC(tablesize) - /* Clear tables */ - movl %esi,%edi + /* Ensure that nkptp covers BOOTSTRAP TABLES. */ + addl %esi,%eax + addl $~L2_FRAME,%eax + shrl $L2_SHIFT,%eax + incl %eax + cmpl %eax,RELOC(nkptp)+1*4 + jnz 1b + + /* Now, zero out the BOOTSTRAP TABLES (before filling them in). */ + movl %esi,%edi xorl %eax,%eax cld - movl RELOC(tablesize),%ecx - shrl $2,%ecx + movl RELOC(tablesize),%ecx + shrl $2,%ecx rep - stosl - - leal (PROC0_PTP1_OFF)(%esi), %ebx + stosl /* copy eax -> edi */ /* - * Build initial page tables. + * Build the page tables and levels. We go from L1 to L2/L3, and link the levels + * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't + * be > 4G, or we can't deal with it anyway, since we are in 32bit mode. */ - /* - * Compute etext - KERNBASE. This can't be > 4G, or we can't deal - * with it anyway, since we can't load it in 32 bit mode. So use - * the bottom 32 bits. - */ - movl $RELOC(etext),%edx - addl $PGOFSET,%edx - andl $~PGOFSET,%edx - - /* - * Skip the first MB. - */ - movl $_RELOC(KERNTEXTOFF),%eax + /* + * Build L1. + */ + leal (PROC0_PTP1_OFF)(%esi),%ebx + + /* Skip the first MB. */ + movl $(KERNTEXTOFF - KERNBASE),%eax movl %eax,%ecx - shrl $(PGSHIFT-2),%ecx /* ((n >> PGSHIFT) << 2) for # pdes */ - addl %ecx,%ebx + shrl $(PGSHIFT-2),%ecx /* ((n >> PGSHIFT) << 2) for # PDEs */ +#ifdef PAE + shll $1,%ecx /* PDEs are twice larger with PAE */ +#endif + addl %ecx,%ebx - /* Map the kernel text read-only. */ - movl %edx,%ecx + /* Map the kernel text RX. */ + movl $RELOC(__rodata_start),%ecx subl %eax,%ecx shrl $PGSHIFT,%ecx orl $(PG_V|PG_KR),%eax fillkpt - /* Map the data, BSS, and bootstrap tables read-write. */ - leal (PG_V|PG_KW)(%edx),%eax - movl RELOC(tablesize),%ecx - addl %esi,%ecx # end of tables - subl %edx,%ecx # subtract end of text + /* Map the kernel rodata R. */ + movl $RELOC(__rodata_start),%eax + movl $RELOC(__data_start),%ecx + subl %eax,%ecx shrl $PGSHIFT,%ecx + orl $(PG_V|PG_KR),%eax + fillkpt_nox + + /* Map the kernel data+bss RW. */ + movl $RELOC(__data_start),%eax + movl $RELOC(__kernel_end),%ecx + subl %eax,%ecx + shrl $PGSHIFT,%ecx + orl $(PG_V|PG_KW),%eax + fillkpt_nox + + /* + * We actually have to be careful here. The memory layout is as + * follows: + * +----------+---------------------+------------------+ + * | DATA+BSS < [PRELOADED MODULES] | BOOTSTRAP TABLES > + * +----------+---------------------+------------------+ + * We just map everything from < to > with RWX rights. + */ + movl $RELOC(__kernel_end),%eax + movl %esi,%ecx /* start of BOOTSTRAP TABLES */ + addl RELOC(tablesize),%ecx /* end of BOOTSTRAP TABLES */ + subl %eax,%ecx /* subtract end of kernel image */ + shrl $PGSHIFT,%ecx + orl $(PG_V|PG_KW),%eax fillkpt /* Map ISA I/O mem (later atdevbase) */ - movl $(IOM_BEGIN|PG_V|PG_KW/*|PG_N*/),%eax # having these bits set - movl $(IOM_SIZE>>PGSHIFT),%ecx # for this many pte s, + movl $(IOM_BEGIN|PG_V|PG_KW/*|PG_N*/),%eax + movl $(IOM_SIZE>>PGSHIFT),%ecx fillkpt -/* - * Construct a page table directory. - */ - /* Set up top level entries for identity mapping */ - leal (PROC0_PDIR_OFF)(%esi),%ebx - leal (PROC0_PTP1_OFF)(%esi),%eax - orl $(PG_V|PG_KW), %eax - movl RELOC(nkptp)+1*4,%ecx + /* + * Build L2 for identity mapping. Linked to L1. + */ + leal (PROC0_PDIR_OFF)(%esi),%ebx + leal (PROC0_PTP1_OFF)(%esi),%eax + orl $(PG_V|PG_KW),%eax + movl RELOC(nkptp)+1*4,%ecx fillkpt - /* Set up top level entries for actual kernel mapping */ - leal (PROC0_PDIR_OFF + L2_SLOT_KERNBASE*4)(%esi),%ebx - leal (PROC0_PTP1_OFF)(%esi),%eax - orl $(PG_V|PG_KW), %eax - movl RELOC(nkptp)+1*4,%ecx + /* Set up L2 entries for actual kernel mapping */ + leal (PROC0_PDIR_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx + leal (PROC0_PTP1_OFF)(%esi),%eax + orl $(PG_V|PG_KW),%eax + movl RELOC(nkptp)+1*4,%ecx fillkpt /* Install a PDE recursively mapping page directory as a page table! */ - leal (PROC0_PDIR_OFF + PDIR_SLOT_PTE*4)(%esi),%ebx - leal (PROC0_PDIR_OFF)(%esi),%eax - orl $(PG_V|PG_KW),%eax - movl %eax,(%ebx) - - - /* Save phys. addr of PDP, for libkvm. */ - movl %esi,RELOC(PDPpaddr) - - /* - * Startup checklist: - * 1. Load %cr3 with pointer to PDIR. - */ - movl %esi,%eax # phys address of ptd in proc 0 - movl %eax,%cr3 # load ptd addr into mmu - - /* - * 2. Enable paging and the rest of it. - */ - movl %cr0,%eax # get control word - # enable paging & NPX emulation - orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP),%eax - movl %eax,%cr0 # and let's page NOW! + leal (PROC0_PDIR_OFF + PDIR_SLOT_PTE * PDE_SIZE)(%esi),%ebx + leal (PROC0_PDIR_OFF)(%esi),%eax + orl $(PG_V|PG_KW),%eax + movl $PDP_SIZE,%ecx + fillkpt + +#ifdef PAE + /* + * Build L3. Linked to L2. + */ + leal (PROC0_L3_OFF)(%esi),%ebx + leal (PROC0_PDIR_OFF)(%esi),%eax + orl $(PG_V),%eax + movl $PDP_SIZE,%ecx + fillkpt + + /* Enable PAE mode */ + movl %cr4,%eax + orl $CR4_PAE,%eax + movl %eax,%cr4 +#endif + + /* Save physical address of L2. */ + leal (PROC0_PDIR_OFF)(%esi),%eax + movl %eax,RELOC(PDPpaddr) + + /* + * Startup checklist: + * 1. Load %cr3 with pointer to L2 (or L3 for PAE). + */ + movl %esi,%eax + movl %eax,%cr3 - pushl $begin # jump to high mem + /* + * 2. Set NOX in EFER, if available. + */ + movl RELOC(nox_flag),%ebx + cmpl $0,%ebx + je skip_NOX + movl $MSR_EFER,%ecx + rdmsr + xorl %eax,%eax + orl $(EFER_NXE),%eax + wrmsr +skip_NOX: + + /* + * 3. Enable paging and the rest of it. + */ + movl %cr0,%eax + orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax + movl %eax,%cr0 + + pushl $begin /* jump to high mem */ ret begin: - /* - * We have arrived. - * There's no need anymore for the identity mapping in low - * memory, remove it. - */ - movl _C_LABEL(nkptp)+1*4,%ecx - leal (PROC0_PDIR_OFF)(%esi),%ebx # old, phys address of PDIR - addl $(KERNBASE), %ebx # new, virtual address of PDIR -1: movl $0,(%ebx) - addl $4,%ebx - loop 1b + /* + * We have arrived. There's no need anymore for the identity mapping in + * low memory, remove it. + */ + movl _C_LABEL(nkptp)+1*4,%ecx + leal (PROC0_PDIR_OFF)(%esi),%ebx /* old, phys address of PDIR */ + addl $(KERNBASE), %ebx /* new, virt address of PDIR */ + killkpt /* Relocate atdevbase. */ movl $KERNBASE,%edx @@ -589,11 +827,11 @@ begin: movl %edx,_C_LABEL(atdevbase) /* Set up bootstrap stack. */ - leal (PROC0_STK_OFF+KERNBASE)(%esi),%eax - movl %eax,_C_LABEL(proc0uarea) - leal (KSTACK_SIZE-FRAMESIZE)(%eax),%esp - movl %esi,(KSTACK_SIZE+PCB_CR3)(%eax) # pcb->pcb_cr3 - xorl %ebp,%ebp # mark end of frames + leal (PROC0_STK_OFF+KERNBASE)(%esi),%eax + movl %eax,_C_LABEL(lwp0uarea) + leal (USPACE-FRAMESIZE)(%eax),%esp + movl %esi,PCB_CR3(%eax) /* pcb->pcb_cr3 */ + xorl %ebp,%ebp /* mark end of frames */ #if defined(MULTIBOOT) /* It is now safe to parse the Multiboot information structure @@ -603,17 +841,21 @@ begin: call _C_LABEL(multiboot_post_reloc) #endif - subl $NGDT*8, %esp # space for temporary gdt + subl $NGDT*8, %esp /* space for temporary gdt */ pushl %esp call _C_LABEL(initgdt) addl $4,%esp - movl _C_LABEL(tablesize),%eax - addl %esi,%eax # skip past stack and page tables + movl _C_LABEL(tablesize),%eax + addl %esi,%eax /* skip past stack and page tables */ +#ifdef PAE + pushl $0 /* init386() expects a 64 bits paddr_t with PAE */ +#endif pushl %eax - call _C_LABEL(init386) # wire 386 chip for unix operation - addl $4+NGDT*8,%esp # pop temporary gdt + call _C_LABEL(init386) /* wire 386 chip for unix operation */ + addl $PDE_SIZE,%esp /* pop paddr_t */ + addl $NGDT*8,%esp /* pop temporary gdt */ #ifdef SAFARI_FIFO_HACK movb $5,%al @@ -634,28 +876,130 @@ begin: #endif /* SAFARI_FIFO_HACK */ call _C_LABEL(main) +#else /* XEN */ + /* First, reset the PSL. */ + pushl $PSL_MBO + popfl + + cld + movl %esp, %ebx /* save start of available space */ + movl $_RELOC(tmpstk),%esp /* bootstrap stack end location */ + + /* Clear BSS. */ + xorl %eax,%eax + movl $RELOC(__bss_start),%edi + movl $RELOC(_end),%ecx + subl %edi,%ecx + rep stosb + + /* Copy the necessary stuff from start_info structure. */ + /* We need to copy shared_info early, so that sti/cli work */ + movl $RELOC(start_info_union),%edi + movl $128,%ecx + rep movsl + + /* Clear segment registers; always null in proc0. */ + xorl %eax,%eax + movw %ax,%fs + movw %ax,%gs + decl %eax + movl %eax,RELOC(cpuid_level) + + xorl %eax,%eax + cpuid + movl %eax,RELOC(cpuid_level) + + /* + * Use a temp page. We'll re- add it to uvm(9) once we're + * done using it. + */ + movl $RELOC(tmpgdt), %eax + pushl %eax /* start of temporary gdt */ + call _C_LABEL(initgdt) + addl $4,%esp + + call xen_pmap_bootstrap + + /* + * First avail returned by xen_pmap_bootstrap in %eax + */ + movl %eax, %esi; + movl %esi, _C_LABEL(lwp0uarea) + + /* Set up bootstrap stack. */ + leal (USPACE-FRAMESIZE)(%eax),%esp + xorl %ebp,%ebp /* mark end of frames */ + + addl $USPACE, %esi + subl $KERNBASE, %esi /* init386 wants a physical address */ + +#ifdef PAE + pushl $0 /* init386() expects a 64 bits paddr_t with PAE */ +#endif + pushl %esi + call _C_LABEL(init386) /* wire 386 chip for unix operation */ + addl $PDE_SIZE,%esp /* pop paddr_t */ + call _C_LABEL(main) +#endif /* XEN */ +END(start) + +#if defined(XEN) +/* space for the hypercall call page */ +#define HYPERCALL_PAGE_OFFSET 0x1000 +.org HYPERCALL_PAGE_OFFSET +ENTRY(hypercall_page) +.skip 0x1000 +END(hypercall_page) + +/* + * void lgdt_finish(void); + * Finish load a new GDT pointer (do any necessary cleanup). + * XXX It's somewhat questionable whether reloading all the segment registers + * is necessary, since the actual descriptor data is not changed except by + * process creation and exit, both of which clean up via task switches. OTOH, + * this only happens at run time when the GDT is resized. + */ +/* LINTSTUB: Func: void lgdt_finish(void) */ +NENTRY(lgdt_finish) + movl $GSEL(GDATA_SEL, SEL_KPL),%eax + movw %ax,%ds + movw %ax,%es + movw %ax,%gs + movw %ax,%ss + movl $GSEL(GCPU_SEL, SEL_KPL),%eax + movw %ax,%fs + /* Reload code selector by doing intersegment return. */ + popl %eax + pushl $GSEL(GCODE_SEL, SEL_KPL) + pushl %eax + lret +END(lgdt_finish) + +#endif /* XEN */ /* * void lwp_trampoline(void); * * This is a trampoline function pushed onto the stack of a newly created * process in order to do some additional setup. The trampoline is entered by - * cpu_switch()ing to the process, so we abuse the callee-saved registers used - * by cpu_switch() to store the information about the stub to call. + * cpu_switchto()ing to the process, so we abuse the callee-saved + * registers used by cpu_switchto() to store the information about the + * stub to call. * NOTE: This function does not have a normal calling sequence! */ NENTRY(lwp_trampoline) - pushl %ebp + movl %ebp,%edi /* for .Lsyscall_checkast */ xorl %ebp,%ebp + pushl %edi pushl %eax call _C_LABEL(lwp_startup) addl $8,%esp pushl %ebx call *%esi addl $4,%esp - DO_DEFERRED_SWITCH(%eax) - INTRFASTEXIT + jmp .Lsyscall_checkast /* NOTREACHED */ +END(lwp_trampoline) /* * sigcode() @@ -668,56 +1012,16 @@ NENTRY(sigcode) * Handler has returned here as if we called it. The sigcontext * is on the stack after the 3 args "we" pushed. */ - leal 12(%esp),%eax # get pointer to sigcontext - movl %eax,4(%esp) # put it in the argument slot - # fake return address already there + leal 12(%esp),%eax /* get pointer to sigcontext */ + movl %eax,4(%esp) /* put it in the argument slot */ + /* fake return address already there */ movl $SYS_compat_16___sigreturn14,%eax - int $0x80 # enter kernel with args on stack + int $0x80 /* enter kernel with args on stack */ movl $SYS_exit,%eax - int $0x80 # exit if sigreturn fails + int $0x80 /* exit if sigreturn fails */ .globl _C_LABEL(esigcode) _C_LABEL(esigcode): - -/* - * void lgdt(struct region_descriptor *rdp); - * - * Load a new GDT pointer (and do any necessary cleanup). - * XXX It's somewhat questionable whether reloading all the segment registers - * is necessary, since the actual descriptor data is not changed except by - * process creation and exit, both of which clean up via task switches. OTOH, - * this only happens at run time when the GDT is resized. - */ -NENTRY(lgdt) - /* Reload the descriptor table. */ - movl 4(%esp),%eax - lgdt (%eax) - /* Flush the prefetch queue. */ - jmp 1f - nop -1: /* Reload "stale" selectors. */ - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movw %ax,%ds - movw %ax,%es - movw %ax,%gs - movw %ax,%ss - movl $GSEL(GCPU_SEL, SEL_KPL),%eax - movw %ax,%fs - /* Reload code selector by doing intersegment return. */ - popl %eax - pushl $GSEL(GCODE_SEL, SEL_KPL) - pushl %eax - lret - -/* - * void x86_flush() - * - * Flush instruction pipelines by doing an intersegment (far) return. - */ -NENTRY(x86_flush) - popl %eax - pushl $GSEL(GCODE_SEL, SEL_KPL) - pushl %eax - lret +END(sigcode) /* * int setjmp(label_t *) @@ -726,15 +1030,16 @@ NENTRY(x86_flush) */ ENTRY(setjmp) movl 4(%esp),%eax - movl %ebx,(%eax) # save ebx - movl %esp,4(%eax) # save esp - movl %ebp,8(%eax) # save ebp - movl %esi,12(%eax) # save esi - movl %edi,16(%eax) # save edi - movl (%esp),%edx # get rta - movl %edx,20(%eax) # save eip - xorl %eax,%eax # return 0 + movl %ebx,(%eax) /* save ebx */ + movl %esp,4(%eax) /* save esp */ + movl %ebp,8(%eax) /* save ebp */ + movl %esi,12(%eax) /* save esi */ + movl %edi,16(%eax) /* save edi */ + movl (%esp),%edx /* get rta */ + movl %edx,20(%eax) /* save eip */ + xorl %eax,%eax /* return 0 */ ret +END(setjmp) /* * int longjmp(label_t *) @@ -743,18 +1048,36 @@ ENTRY(setjmp) */ ENTRY(longjmp) movl 4(%esp),%eax - movl (%eax),%ebx # restore ebx - movl 4(%eax),%esp # restore esp - movl 8(%eax),%ebp # restore ebp - movl 12(%eax),%esi # restore esi - movl 16(%eax),%edi # restore edi - movl 20(%eax),%edx # get rta - movl %edx,(%esp) # put in return frame - movl $1,%eax # return 1 + movl (%eax),%ebx /* restore ebx */ + movl 4(%eax),%esp /* restore esp */ + movl 8(%eax),%ebp /* restore ebp */ + movl 12(%eax),%esi /* restore esi */ + movl 16(%eax),%edi /* restore edi */ + movl 20(%eax),%edx /* get rta */ + movl %edx,(%esp) /* put in return frame */ + movl $1,%eax /* return 1 */ ret +END(longjmp) /* - * struct lwp *cpu_switchto(struct lwp *oldlwp, struct newlwp) + * void dumpsys(void) + * + * Mimic cpu_switchto() for postmortem debugging. + */ +ENTRY(dumpsys) + pushl %ebx /* set up fake switchframe */ + pushl %esi /* and save context */ + pushl %edi + movl %esp,_C_LABEL(dumppcb)+PCB_ESP + movl %ebp,_C_LABEL(dumppcb)+PCB_EBP + call _C_LABEL(dodumpsys) /* dump! */ + addl $(3*4), %esp /* unwind switchframe */ + ret +END(dumpsys) + +/* + * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp, + * bool returning) * * 1. if (oldlwp != NULL), save its context. * 2. then, restore context of newlwp. @@ -768,56 +1091,103 @@ ENTRY(cpu_switchto) pushl %esi pushl %edi - movl 16(%esp),%esi # oldlwp - movl 20(%esp),%edi # newlwp +#if defined(DIAGNOSTIC) && !defined(XEN) + cmpl $IPL_SCHED,CPUVAR(ILEVEL) + jbe 0f + pushl CPUVAR(ILEVEL) + pushl $.Lstr + call _C_LABEL(panic) + addl $8,%esp +.Lstr: .string "cpu_switchto: switching above IPL_SCHED (%d)\0" +0: +#endif + + movl 16(%esp),%esi /* oldlwp */ + movl 20(%esp),%edi /* newlwp */ + movl 24(%esp),%edx /* returning */ testl %esi,%esi jz 1f /* Save old context. */ - movl L_ADDR(%esi),%eax + movl L_PCB(%esi),%eax movl %esp,PCB_ESP(%eax) movl %ebp,PCB_EBP(%eax) /* Switch to newlwp's stack. */ -1: movl L_ADDR(%edi),%ebx +1: movl L_PCB(%edi),%ebx movl PCB_EBP(%ebx),%ebp movl PCB_ESP(%ebx),%esp - /* Switch TSS. Reset "task busy" flag before loading. */ - movl %cr3,%eax - movl %eax,PCB_CR3(%ebx) # for TSS gates - movl CPUVAR(GDT),%eax - movl L_MD_TSS_SEL(%edi),%edx - andl $~0x0200,4(%eax,%edx, 1) - ltr %dx + /* + * Set curlwp. This must be globally visible in order to permit + * non-interlocked mutex release. + */ + movl %edi,%ecx + xchgl %ecx,CPUVAR(CURLWP) + + /* Skip the rest if returning to a pinned LWP. */ + testl %edx,%edx + jnz 4f - /* Set curlwp. */ - movl %edi,CPUVAR(CURLWP) +#ifdef XEN + pushl %edi + call _C_LABEL(i386_switch_context) + addl $4,%esp +#else /* !XEN */ + /* Switch ring0 esp */ + movl PCB_ESP0(%ebx),%eax + movl %eax,CPUVAR(ESP0) +#endif /* !XEN */ /* Don't bother with the rest if switching to a system process. */ testl $LW_SYSTEM,L_FLAG(%edi) jnz 4f +#ifndef XEN + /* Restore thread-private %fs/%gs descriptors. */ + movl CPUVAR(GDT),%ecx + movl PCB_FSD(%ebx), %eax + movl PCB_FSD+4(%ebx), %edx + movl %eax, (GUFS_SEL*8)(%ecx) + movl %edx, (GUFS_SEL*8+4)(%ecx) + movl PCB_GSD(%ebx), %eax + movl PCB_GSD+4(%ebx), %edx + movl %eax, (GUGS_SEL*8)(%ecx) + movl %edx, (GUGS_SEL*8+4)(%ecx) +#endif /* !XEN */ + + /* Switch I/O bitmap */ + movl PCB_IOMAP(%ebx),%eax + orl %eax,%eax + jnz .Lcopy_iobitmap + movl $(IOMAP_INVALOFF << 16),CPUVAR(IOBASE) +.Liobitmap_done: + /* Is this process using RAS (restartable atomic sequences)? */ movl L_PROC(%edi),%eax cmpl $0,P_RASLIST(%eax) jne 5f /* - * Restore cr0 (including FPU state). Raise the IPL to IPL_IPI. + * Restore cr0 (including FPU state). Raise the IPL to IPL_HIGH. * FPU IPIs can alter the LWP's saved cr0. Dropping the priority * is deferred until mi_switch(), when cpu_switchto() returns. */ -2: movl $IPL_IPI,CPUVAR(ILEVEL) - movl PCB_CR0(%ebx),%ecx +2: +#ifdef XEN + pushl %edi + call _C_LABEL(i386_tls_switch) + addl $4,%esp +#else /* !XEN */ + movl $IPL_HIGH,CPUVAR(ILEVEL) + movl PCB_CR0(%ebx),%ecx /* has CR0_TS clear */ movl %cr0,%edx /* * If our floating point registers are on a different CPU, * set CR0_TS so we'll trap rather than reuse bogus state. */ - movl PCB_FPCPU(%ebx),%eax - cmpl CPUVAR(SELF),%eax + cmpl CPUVAR(FPCURLWP),%edi je 3f orl $CR0_TS,%ecx @@ -825,6 +1195,7 @@ ENTRY(cpu_switchto) 3: cmpl %edx,%ecx je 4f movl %ecx,%cr0 +#endif /* !XEN */ /* Return to the new LWP, returning 'oldlwp' in %eax. */ 4: movl %esi,%eax @@ -845,16 +1216,34 @@ ENTRY(cpu_switchto) movl %eax,TF_EIP(%ecx) jmp 2b +.Lcopy_iobitmap: + /* Copy I/O bitmap. */ + incl _C_LABEL(pmap_iobmp_evcnt)+EV_COUNT + movl $(IOMAPSIZE/4),%ecx + pushl %esi + pushl %edi + movl %eax,%esi /* pcb_iomap */ + movl CPUVAR(SELF),%edi + leal CPU_INFO_IOMAP(%edi),%edi + rep + movsl + popl %edi + popl %esi + movl $((CPU_INFO_IOMAP - CPU_INFO_TSS) << 16),CPUVAR(IOBASE) + jmp .Liobitmap_done +END(cpu_switchto) + /* * void savectx(struct pcb *pcb); * * Update pcb, saving current processor state. */ ENTRY(savectx) - movl 4(%esp),%edx # edx = pcb + movl 4(%esp),%edx /* edx = pcb */ movl %esp,PCB_ESP(%edx) movl %ebp,PCB_EBP(%edx) ret +END(savectx) /* * osyscall() @@ -862,10 +1251,16 @@ ENTRY(savectx) * Old call gate entry for syscall */ IDTVEC(osyscall) - pushfl # set eflags in trap frame +#ifndef XEN + /* XXX we are in trouble! interrupts be off here. */ + cli /* must be first instruction */ +#endif + pushfl /* set eflags in trap frame */ popl 8(%esp) - pushl $7 # size of instruction for restart + orl $PSL_I,8(%esp) /* re-enable ints on return to user */ + pushl $7 /* size of instruction for restart */ jmp syscall1 +IDTVEC_END(osyscall) /* * syscall() @@ -873,37 +1268,108 @@ IDTVEC(osyscall) * Trap gate entry for syscall */ IDTVEC(syscall) - pushl $2 # size of instruction for restart + pushl $2 /* size of instruction for restart */ syscall1: - pushl $T_ASTFLT # trap # for doing ASTs + pushl $T_ASTFLT /* trap # for doing ASTs */ INTRENTRY - + STI(%eax) #ifdef DIAGNOSTIC - cmpl $0, CPUVAR(WANT_PMAPLOAD) - jz 1f - pushl $6f - call _C_LABEL(printf) - addl $4, %esp -1: movl CPUVAR(ILEVEL),%ebx testl %ebx,%ebx jz 1f pushl $5f - call _C_LABEL(printf) + call _C_LABEL(panic) addl $4,%esp #ifdef DDB int $3 #endif 1: #endif /* DIAGNOSTIC */ - movl CPUVAR(CURLWP),%edx - movl %esp,L_MD_REGS(%edx) # save pointer to frame - movl L_PROC(%edx),%edx + addl $1,CPUVAR(NSYSCALL) /* count it atomically */ + adcl $0,CPUVAR(NSYSCALL)+4 /* count it atomically */ + movl CPUVAR(CURLWP),%edi + movl L_PROC(%edi),%edx + movl %esp,L_MD_REGS(%edi) /* save pointer to frame */ pushl %esp - call *P_MD_SYSCALL(%edx) # get pointer to syscall() function + call *P_MD_SYSCALL(%edx) /* get pointer to syscall() function */ addl $4,%esp .Lsyscall_checkast: /* Check for ASTs on exit to user mode. */ + CLI(%eax) + movl L_MD_ASTPENDING(%edi), %eax + orl CPUVAR(WANT_PMAPLOAD), %eax + jnz 9f +#ifdef XEN + STIC(%eax) + jz 14f + call _C_LABEL(stipending) + testl %eax,%eax + jz 14f + /* process pending interrupts */ + CLI(%eax) + movl CPUVAR(ILEVEL), %ebx + movl $.Lsyscall_resume, %esi /* address to resume loop at */ +.Lsyscall_resume: + movl %ebx,%eax /* get cpl */ + movl CPUVAR(IUNMASK)(,%eax,4),%eax + andl CPUVAR(IPENDING),%eax /* any non-masked bits left? */ + jz 17f + bsrl %eax,%eax + btrl %eax,CPUVAR(IPENDING) + movl CPUVAR(ISOURCES)(,%eax,4),%eax + jmp *IS_RESUME(%eax) +17: movl %ebx, CPUVAR(ILEVEL) /* restore cpl */ + jmp .Lsyscall_checkast +14: +#endif /* XEN */ +#ifndef DIAGNOSTIC + INTRFASTEXIT +#else /* DIAGNOSTIC */ + cmpl $IPL_NONE,CPUVAR(ILEVEL) + jne 3f + INTRFASTEXIT +3: STI(%eax) + pushl $4f + call _C_LABEL(panic) + addl $4,%esp + pushl $IPL_NONE + call _C_LABEL(spllower) + addl $4,%esp + jmp .Lsyscall_checkast +4: .asciz "SPL NOT LOWERED ON SYSCALL EXIT\n" +5: .asciz "SPL NOT ZERO ON SYSCALL ENTRY\n" +#endif /* DIAGNOSTIC */ +9: + cmpl $0, CPUVAR(WANT_PMAPLOAD) + jz 10f + STI(%eax) + call _C_LABEL(pmap_load) + jmp .Lsyscall_checkast /* re-check ASTs */ +10: + /* Always returning to user mode here. */ + movl $0, L_MD_ASTPENDING(%edi) + STI(%eax) + /* Pushed T_ASTFLT into tf_trapno on entry. */ + pushl %esp + call _C_LABEL(trap) + addl $4,%esp + jmp .Lsyscall_checkast /* re-check ASTs */ +IDTVEC_END(syscall) + +IDTVEC(svr4_fasttrap) + pushl $2 /* size of instruction for restart */ + pushl $T_ASTFLT /* trap # for doing ASTs */ + INTRENTRY + STI(%eax) + pushl $RW_READER + pushl $_C_LABEL(svr4_fasttrap_lock) + call _C_LABEL(rw_enter) + addl $8,%esp + call *_C_LABEL(svr4_fasttrap_vec) + pushl $_C_LABEL(svr4_fasttrap_lock) + call _C_LABEL(rw_exit) + addl $4,%esp +2: /* Check for ASTs on exit to user mode. */ cli CHECK_ASTPENDING(%eax) je 1f @@ -914,92 +1380,49 @@ syscall1: pushl %esp call _C_LABEL(trap) addl $4,%esp - jmp .Lsyscall_checkast /* re-check ASTs */ -1: CHECK_DEFERRED_SWITCH(%eax) + jmp 2b +1: CHECK_DEFERRED_SWITCH jnz 9f -#ifndef DIAGNOSTIC INTRFASTEXIT -#else /* DIAGNOSTIC */ - cmpl $IPL_NONE,CPUVAR(ILEVEL) - jne 3f - INTRFASTEXIT -3: sti - pushl $4f - call _C_LABEL(printf) - addl $4,%esp -#ifdef DDB - int $3 -#endif /* DDB */ - movl $IPL_NONE,CPUVAR(ILEVEL) - jmp 2b -4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n" -5: .asciz "WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n" -6: .asciz "WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n" -#endif /* DIAGNOSTIC */ 9: sti call _C_LABEL(pmap_load) - jmp .Lsyscall_checkast /* re-check ASTs */ - -#if NNPX > 0 -/* - * Special interrupt handlers. Someday intr0-intr15 will be used to count - * interrupts. We'll still need a special exception 16 handler. The busy - * latch stuff in probintr() can be moved to npxprobe(). - */ - -/* - * void probeintr(void) - */ -NENTRY(probeintr) - ss - incl _C_LABEL(npx_intrs_while_probing) - pushl %eax - movb $0x20,%al # EOI (asm in strings loses cpp features) - outb %al,$0xa0 # IO_ICU2 - outb %al,$0x20 # IO_ICU1 - movb $0,%al - outb %al,$0xf0 # clear BUSY# latch - popl %eax - iret - -/* - * void probetrap(void) - */ -NENTRY(probetrap) - ss - incl _C_LABEL(npx_traps_while_probing) - fnclex - iret + cli + jmp 2b /* * int npx586bug1(int a, int b) + * Used when checking for the FDIV bug on first generations pentiums. + * Anything 120MHz or above is fine. */ NENTRY(npx586bug1) - fildl 4(%esp) # x - fildl 8(%esp) # y + fildl 4(%esp) /* x */ + fildl 8(%esp) /* y */ fld %st(1) - fdiv %st(1),%st # x/y - fmulp %st,%st(1) # (x/y)*y - fsubrp %st,%st(1) # x-(x/y)*y + fdiv %st(1),%st /* x/y */ + fmulp %st,%st(1) /* (x/y)*y */ + fsubrp %st,%st(1) /* x-(x/y)*y */ pushl $0 fistpl (%esp) popl %eax ret -#endif /* NNPX > 0 */ +END(npx586bug1) /* - * void sse2_zero_page(void *pg) + * void sse2_idlezero_page(void *pg) * - * Zero a page without polluting the cache. + * Zero a page without polluting the cache. Preemption must be + * disabled by the caller. Abort if a preemption is pending. */ -ENTRY(sse2_zero_page) +ENTRY(sse2_idlezero_page) pushl %ebp movl %esp,%ebp movl 8(%esp), %edx - movl $PAGE_SIZE, %ecx + movl $(PAGE_SIZE/32), %ecx xorl %eax, %eax .align 16 1: + testl $RESCHED_KPREEMPT, CPUVAR(RESCHED) + jnz 2f movnti %eax, 0(%edx) movnti %eax, 4(%edx) movnti %eax, 8(%edx) @@ -1008,43 +1431,15 @@ ENTRY(sse2_zero_page) movnti %eax, 20(%edx) movnti %eax, 24(%edx) movnti %eax, 28(%edx) - subl $32, %ecx - leal 32(%edx), %edx + addl $32, %edx + decl %ecx jnz 1b sfence + incl %eax pop %ebp ret - -/* - * void sse2_copy_page(void *src, void *dst) - * - * Copy a page without polluting the cache. - */ -ENTRY(sse2_copy_page) - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 20(%esp), %esi - movl 24(%esp), %edi - movl $PAGE_SIZE, %ebp - .align 16 -1: - movl 0(%esi), %eax - movl 4(%esi), %ebx - movl 8(%esi), %ecx - movl 12(%esi), %edx - movnti %eax, 0(%edi) - movnti %ebx, 4(%edi) - movnti %ecx, 8(%edi) - movnti %edx, 12(%edi) - subl $16, %ebp - leal 16(%esi), %esi - leal 16(%edi), %edi - jnz 1b +2: sfence - popl %edi - popl %esi - popl %ebx popl %ebp ret +END(sse2_idlezero_page)