Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v rcsdiff: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.19 retrieving revision 1.156 diff -u -p -r1.19 -r1.156 --- src/sys/arch/i386/i386/locore.S 2003/09/12 16:51:04 1.19 +++ src/sys/arch/i386/i386/locore.S 2018/01/04 14:02:23 1.156 @@ -1,11 +1,15 @@ -/* $NetBSD: locore.S,v 1.19 2003/09/12 16:51:04 christos Exp $ */ +/* $NetBSD: locore.S,v 1.156 2018/01/04 14:02:23 maxv Exp $ */ -/*- - * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. - * All rights reserved. +/* + * Copyright-o-rama! + */ + +/* + * Copyright (c) 1998, 2000, 2004, 2006, 2007, 2009, 2016 + * The NetBSD Foundation, Inc., All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum. + * by Charles M. Hannum, by Andrew Doran and by Maxime Villard. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -15,13 +19,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED @@ -36,6 +33,66 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2006 Manuel Bouyer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Copyright (c) 2001 Wasabi Systems, Inc. + * All rights reserved. + * + * Written by Frank van der Linden for Wasabi Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed for the NetBSD Project by + * Wasabi Systems, Inc. + * 4. The name of Wasabi Systems, Inc. may not be used to endorse + * or promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. @@ -70,21 +127,17 @@ * @(#)locore.s 7.3 (Berkeley) 5/13/91 */ -#include "opt_compat_netbsd.h" -#include "opt_compat_oldboot.h" -#include "opt_cputype.h" +#include +__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.156 2018/01/04 14:02:23 maxv Exp $"); + +#include "opt_copy_symtab.h" #include "opt_ddb.h" -#include "opt_dummy_nops.h" -#include "opt_ipkdb.h" -#include "opt_lockdebug.h" -#include "opt_multiprocessor.h" +#include "opt_modular.h" +#include "opt_multiboot.h" #include "opt_realmem.h" -#include "opt_user_ldt.h" -#include "opt_vm86.h" +#include "opt_xen.h" -#include "npx.h" #include "assym.h" -#include "apm.h" #include "lapic.h" #include "ioapic.h" #include "ksyms.h" @@ -92,223 +145,230 @@ #include #include -#include -#include -#include #include #include #include -#include - -#if NLAPIC > 0 #include -#endif - -/* LINTSTUB: include */ -/* LINTSTUB: include */ -/* LINTSTUB: include */ - -#include - -#if defined(MULTIPROCESSOR) - -#define SET_CURLWP(lwp,cpu) \ - movl CPUVAR(SELF),cpu ; \ - movl lwp,CPUVAR(CURLWP) ; \ - movl cpu,L_CPU(lwp) - -#else - -#define SET_CURLWP(lwp,tcpu) movl lwp,CPUVAR(CURLWP) -#define GET_CURLWP(reg) movl CPUVAR(CURLWP),reg +#include +#include +#include +#ifndef XEN +#include #endif -#define GET_CURPCB(reg) movl CPUVAR(CURPCB),reg -#define SET_CURPCB(reg) movl reg,CPUVAR(CURPCB) - -#define CLEAR_RESCHED(reg) movl reg,CPUVAR(RESCHED) - -/* XXX temporary kluge; these should not be here */ /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ #include +#ifndef XEN +#define _RELOC(x) ((x) - KERNBASE) +#else +#define _RELOC(x) ((x)) +#endif /* XEN */ +#define RELOC(x) _RELOC(_C_LABEL(x)) -/* Disallow old names for REALBASEMEM */ -#ifdef BIOSBASEMEM -#error BIOSBASEMEM option deprecated; use REALBASEMEM only if memory size reported by latest boot block is incorrect -#endif +/* 32bit version of PG_NX */ +#define PG_NX32 0x80000000 -/* Disallow old names for REALEXTMEM */ -#ifdef EXTMEM_SIZE -#error EXTMEM_SIZE option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect -#endif -#ifdef BIOSEXTMEM -#error BIOSEXTMEM option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect +#ifndef PAE +#define PROC0_PDIR_OFF 0 +#else +#define PROC0_L3_OFF 0 +#define PROC0_PDIR_OFF 1 * PAGE_SIZE #endif -#include - +#define PROC0_STK_OFF (PROC0_PDIR_OFF + PDP_SIZE * PAGE_SIZE) +#define PROC0_PTP1_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE) -#ifdef MULTIPROCESSOR -#include -#endif - /* - * PTmap is recursive pagemap at top of virtual address space. - * Within PTmap, the page directory can be found (third indirection). + * fillkpt - Fill in a kernel page table + * eax = pte (page frame | control | status) + * ebx = page table address + * ecx = number of pages to map * - * XXX 4 == sizeof pde - */ - .set _C_LABEL(PTmap),(PDSLOT_PTE << PDSHIFT) - .set _C_LABEL(PTD),(_C_LABEL(PTmap) + PDSLOT_PTE * PAGE_SIZE) - .set _C_LABEL(PTDpde),(_C_LABEL(PTD) + PDSLOT_PTE * 4) + * For PAE, each entry is 8 bytes long: we must set the 4 upper bytes to 0. + * This is done by the first instruction of fillkpt. In the non-PAE case, this + * instruction just clears the page table entry. + */ +#define fillkpt \ + cmpl $0,%ecx ; /* zero-sized? */ \ + je 2f ; \ +1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \ + movl %eax,(%ebx) ; /* store phys addr */ \ + addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ + addl $PAGE_SIZE,%eax ; /* next phys page */ \ + loop 1b ; \ +2: ; + +/* + * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit. + */ +#define fillkpt_nox \ + cmpl $0,%ecx ; /* zero-sized? */ \ + je 2f ; \ + pushl %ebp ; \ + movl RELOC(nox_flag),%ebp ; \ +1: movl %ebp,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: NX */ \ + movl %eax,(%ebx) ; /* store phys addr */ \ + addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ + addl $PAGE_SIZE,%eax ; /* next phys page */ \ + loop 1b ; \ + popl %ebp ; \ +2: ; /* - * APTmap, APTD is the alternate recursive pagemap. - * It's used when modifying another process's page tables. - * - * XXX 4 == sizeof pde + * fillkpt_blank - Fill in a kernel page table with blank entries + * ebx = page table address + * ecx = number of pages to map */ - .set _C_LABEL(APTmap),(PDSLOT_APTE << PDSHIFT) - .set _C_LABEL(APTD),(_C_LABEL(APTmap) + PDSLOT_APTE * PAGE_SIZE) - .set _C_LABEL(APTDpde),(_C_LABEL(PTD) + PDSLOT_APTE * 4) +#define fillkpt_blank \ + cmpl $0,%ecx ; /* zero-sized? */ \ + je 2f ; \ +1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \ + movl $0,(%ebx) ; /* lower 32 bits: 0 */ \ + addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ + loop 1b ; \ +2: ; +/* + * killkpt - Destroy a kernel page table + * ebx = page table address + * ecx = number of pages to destroy + */ +#define killkpt \ +1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper bits (for PAE) */ \ + movl $0,(%ebx) ; \ + addl $PDE_SIZE,%ebx ; \ + loop 1b ; + + +#ifdef XEN +/* + * Xen guest identifier and loader selection + */ +.section __xen_guest + .ascii "GUEST_OS=netbsd,GUEST_VER=3.0,XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0xc0000000" /* KERNBASE */ + .ascii ",ELF_PADDR_OFFSET=0xc0000000" /* KERNBASE */ + .ascii ",VIRT_ENTRY=0xc0100000" /* KERNTEXTOFF */ + .ascii ",HYPERCALL_PAGE=0x00000101" + /* (???+HYPERCALL_PAGE_OFFSET)/PAGE_SIZE) */ +#ifdef PAE + .ascii ",PAE=yes[extended-cr3]" +#endif + .ascii ",LOADER=generic" +#if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB) + .ascii ",BSD_SYMTAB=yes" +#endif + .byte 0 +#endif /* XEN */ /* * Initialization */ .data - .globl _C_LABEL(cpu) - .globl _C_LABEL(esym),_C_LABEL(boothowto) - .globl _C_LABEL(bootinfo),_C_LABEL(atdevbase) -#ifdef COMPAT_OLDBOOT - .globl _C_LABEL(bootdev) -#endif - .globl _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr) - .globl _C_LABEL(biosbasemem),_C_LABEL(biosextmem) + .globl _C_LABEL(tablesize) + .globl _C_LABEL(nox_flag) + .globl _C_LABEL(cputype) + .globl _C_LABEL(cpuid_level) + .globl _C_LABEL(esym) + .globl _C_LABEL(eblob) + .globl _C_LABEL(atdevbase) + .globl _C_LABEL(PDPpaddr) + .globl _C_LABEL(lwp0uarea) .globl _C_LABEL(gdt) -#ifdef I586_CPU .globl _C_LABEL(idt) -#endif - .globl _C_LABEL(lapic_tpr) - -#if NLAPIC > 0 -#ifdef __ELF__ - .align PAGE_SIZE -#else - .align 12 -#endif - .globl _C_LABEL(local_apic), _C_LABEL(lapic_id) -_C_LABEL(local_apic): - .space LAPIC_ID -_C_LABEL(lapic_id): - .long 0x00000000 - .space LAPIC_TPRI-(LAPIC_ID+4) -_C_LABEL(lapic_tpr): - .space LAPIC_PPRI-LAPIC_TPRI -_C_LABEL(lapic_ppr): - .space LAPIC_ISR-LAPIC_PPRI -_C_LABEL(lapic_isr): - .space PAGE_SIZE-LAPIC_ISR -#else -_C_LABEL(lapic_tpr): - .long 0 -#endif - -_C_LABEL(cpu): .long 0 # are we 386, 386sx, or 486, - # or Pentium, or.. -_C_LABEL(esym): .long 0 # ptr to end of syms -_C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual -_C_LABEL(proc0paddr): .long 0 -_C_LABEL(PTDpaddr): .long 0 # paddr of PTD, for libkvm -#ifndef REALBASEMEM -_C_LABEL(biosbasemem): .long 0 # base memory reported by BIOS -#else -_C_LABEL(biosbasemem): .long REALBASEMEM -#endif -#ifndef REALEXTMEM -_C_LABEL(biosextmem): .long 0 # extended memory reported by BIOS -#else -_C_LABEL(biosextmem): .long REALEXTMEM -#endif - - .space 512 + .type _C_LABEL(tablesize), @object +_C_LABEL(tablesize): .long 0 +END(tablesize) + .type _C_LABEL(nox_flag), @object +LABEL(nox_flag) .long 0 /* 32bit NOX flag, set if supported */ +END(nox_flag) + .type _C_LABEL(cputype), @object +LABEL(cputype) .long 0 /* are we 80486, Pentium, or.. */ +END(cputype) + .type _C_LABEL(cpuid_level), @object +LABEL(cpuid_level) .long -1 /* max. level accepted by cpuid instr */ +END(cpuid_level) + .type _C_LABEL(atdevbase), @object +LABEL(atdevbase) .long 0 /* location of start of iomem in virt */ +END(atdevbase) + .type _C_LABEL(lwp0uarea), @object +LABEL(lwp0uarea) .long 0 +END(lwp0uarea) + .type _C_LABEL(PDPpaddr), @object +LABEL(PDPpaddr) .long 0 /* paddr of PDP, for libkvm */ +END(PDPpaddr) + + /* Space for the temporary stack */ + .size tmpstk, tmpstk - . + .space 512 tmpstk: - - -#define _RELOC(x) ((x) - KERNBASE_LOCORE) -#define RELOC(x) _RELOC(_C_LABEL(x)) +#ifdef XEN + .align PAGE_SIZE, 0x0 /* Align on page boundary */ +LABEL(tmpgdt) + .space PAGE_SIZE /* Xen expects a page */ +END(tmpgdt) +#endif /* XEN */ .text .globl _C_LABEL(kernel_text) .set _C_LABEL(kernel_text),KERNTEXTOFF - .globl start -start: movw $0x1234,0x472 # warm boot +ENTRY(start) +#ifndef XEN - /* - * Load parameters from stack - * (howto, [bootdev], bootinfo, esym, basemem, extmem). - */ - movl 4(%esp),%eax - movl %eax,RELOC(boothowto) -#ifdef COMPAT_OLDBOOT - movl 8(%esp),%eax - movl %eax,RELOC(bootdev) -#endif - movl 12(%esp),%eax + /* Warm boot */ + movw $0x1234,0x472 - testl %eax, %eax - jz 1f - movl (%eax), %ebx /* number of entries */ - movl $RELOC(bootinfo), %edi - movl %ebx, (%edi) - addl $4, %edi -2: - testl %ebx, %ebx - jz 1f - addl $4, %eax - movl (%eax), %ecx /* address of entry */ - pushl %eax - pushl (%ecx) /* len */ - pushl %ecx - pushl %edi - addl (%ecx), %edi /* update dest pointer */ - cmpl $_RELOC(_C_LABEL(bootinfo) + BOOTINFO_MAXSIZE), %edi - jg 2f - call _C_LABEL(memcpy) - addl $12, %esp - popl %eax - subl $1, %ebx - jmp 2b -2: /* cleanup for overflow case */ - addl $16, %esp - movl $RELOC(bootinfo), %edi - subl %ebx, (%edi) /* correct number of entries */ -1: +#if defined(MULTIBOOT) + jmp 1f - movl 16(%esp),%eax - testl %eax,%eax - jz 1f - addl $KERNBASE_LOCORE,%eax -1: movl %eax,RELOC(esym) + .align 4 + .globl Multiboot_Header +_C_LABEL(Multiboot_Header): +#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_WANT_MEMORY) + .long MULTIBOOT_HEADER_MAGIC + .long MULTIBOOT_HEADER_FLAGS + .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) - movl RELOC(biosextmem),%eax - testl %eax,%eax - jnz 1f - movl 20(%esp),%eax - movl %eax,RELOC(biosextmem) 1: - movl RELOC(biosbasemem),%eax - testl %eax,%eax - jnz 1f - movl 24(%esp),%eax - movl %eax,RELOC(biosbasemem) + /* Check if we are being executed by a Multiboot-compliant boot + * loader. */ + cmpl $MULTIBOOT_INFO_MAGIC,%eax + jne 1f + + /* + * Indeed, a multiboot-compliant boot loader executed us. We switch + * to the temporary stack, and copy the received Multiboot information + * structure into kernel's data space to process it later -- after we + * are relocated. It will be safer to run complex C code than doing it + * at this point. + */ + movl $_RELOC(tmpstk),%esp + pushl %ebx /* Address of Multiboot information */ + call _C_LABEL(multiboot_pre_reloc) + addl $4,%esp + jmp 2f +#endif + 1: + /* + * At this point, we know that a NetBSD-specific boot loader + * booted this kernel. + * + * Load parameters from the stack (32 bits): + * boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem + * We are not interested in 'bootdev'. + */ + + addl $4,%esp /* Discard return address to boot loader */ + call _C_LABEL(native_loader) + addl $24,%esp +2: /* First, reset the PSL. */ pushl $PSL_MBO popfl @@ -317,8 +377,6 @@ start: movw $0x1234,0x472 # warm boot xorl %eax,%eax movw %ax,%fs movw %ax,%gs - decl %eax - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL /* Find out our CPU type. */ @@ -355,11 +413,11 @@ isnx586: * Don't try cpuid, as Nx586s reportedly don't support the * PSL_ID bit. */ - movl $CPU_NX586,RELOC(cpu) + movl $CPU_NX586,RELOC(cputype) jmp 2f is386: - movl $CPU_386,RELOC(cpu) + movl $CPU_386,RELOC(cputype) jmp 2f try486: /* Try to toggle identification flag; does not exist on early 486s. */ @@ -378,7 +436,7 @@ try486: /* Try to toggle identification testl %eax,%eax jnz try586 -is486: movl $CPU_486,RELOC(cpu) +is486: movl $CPU_486,RELOC(cputype) /* * Check Cyrix CPU * Cyrix CPUs do not change the undefined flags following @@ -396,30 +454,30 @@ is486: movl $CPU_486,RELOC(cpu) popfl jmp 2f trycyrix486: - movl $CPU_6x86,RELOC(cpu) # set CPU type + movl $CPU_6x86,RELOC(cputype) /* set CPU type */ /* * Check for Cyrix 486 CPU by seeing if the flags change during a * divide. This is documented in the Cx486SLC/e SMM Programmer's * Guide. */ xorl %edx,%edx - cmpl %edx,%edx # set flags to known state + cmpl %edx,%edx /* set flags to known state */ pushfl - popl %ecx # store flags in ecx + popl %ecx /* store flags in ecx */ movl $-1,%eax movl $4,%ebx - divl %ebx # do a long division + divl %ebx /* do a long division */ pushfl popl %eax - xorl %ecx,%eax # are the flags different? - testl $0x8d5,%eax # only check C|PF|AF|Z|N|V - jne 2f # yes; must be Cyrix 6x86 CPU - movl $CPU_486DLC,RELOC(cpu) # set CPU type + xorl %ecx,%eax /* are the flags different? */ + testl $0x8d5,%eax /* only check C|PF|AF|Z|N|V */ + jne 2f /* yes; must be Cyrix 6x86 CPU */ + movl $CPU_486DLC,RELOC(cputype) /* set CPU type */ #ifndef CYRIX_CACHE_WORKS /* Disable caching of the ISA hole only. */ invd - movb $CCR0,%al # Configuration Register index (CCR0) + movb $CCR0,%al /* Configuration Register index (CCR0) */ outb %al,$0x22 inb $0x23,%al orb $(CCR0_NC1|CCR0_BARB),%al @@ -431,8 +489,8 @@ trycyrix486: invd #else /* CYRIX_CACHE_WORKS */ /* Set cache parameters */ - invd # Start with guaranteed clean cache - movb $CCR0,%al # Configuration Register index (CCR0) + invd /* Start with guaranteed clean cache */ + movb $CCR0,%al /* Configuration Register index (CCR0) */ outb %al,$0x22 inb $0x23,%al andb $~CCR0_NC0,%al @@ -478,7 +536,17 @@ trycyrix486: try586: /* Use the `cpuid' instruction. */ xorl %eax,%eax cpuid - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL + movl %eax,RELOC(cpuid_level) + + /* + * Retrieve the NX/XD flag. We use the 32bit version of PG_NX. + */ + movl $0x80000001,%eax + cpuid + andl $CPUID_NOX,%edx + jz no_NOX + movl $PG_NX32,RELOC(nox_flag) +no_NOX: 2: /* @@ -490,1117 +558,383 @@ try586: /* Use the `cpuid' instruction. * The boot program should check: * text+data <= &stack_variable - more_space_for_stack * text+data+bss+pad+space_for_page_tables <= end_of_memory - * Oops, the gdt is in the carcass of the boot program so clearing + * + * XXX: the gdt is in the carcass of the boot program so clearing * the rest of memory is still not possible. */ - movl $_RELOC(tmpstk),%esp # bootstrap stack end location + movl $_RELOC(tmpstk),%esp /* - * Virtual address space of kernel: + * There are two different layouts possible, depending on whether PAE is + * enabled or not. * - * text | data | bss | [syms] | page dir | proc0 kstack - * 0 1 2 3 + * If PAE is not enabled, there are two levels of pages: PD -> PT. They will + * be referred to as: L2 -> L1. L2 is 1 page long. The BOOTSTRAP TABLES have + * the following layout: + * +-----+------------+----+ + * | L2 -> PROC0 STK -> L1 | + * +-----+------------+----+ + * + * If PAE is enabled, there are three levels of pages: PDP -> PD -> PT. They + * will be referred to as: L3 -> L2 -> L1. L3 is 1 page long, L2 is 4 page + * long. The BOOTSTRAP TABLES have the following layout: + * +-----+-----+------------+----+ + * | L3 -> L2 -> PROC0 STK -> L1 | + * +-----+-----+------------+----+ + * + * Virtual address space of the kernel in both cases: + * +------+--------+------+-----+--------+---------------------+----------- + * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | BOOTSTRAP + * +------+--------+------+-----+--------+---------------------+----------- + * (1) (2) (3) + * + * -------+-------------+ + * TABLES | ISA I/O MEM | + * -------+-------------+ + * (4) + * + * PROC0 STK is obviously not linked as a page level. It just happens to be + * caught between L2 and L1. + * + * Important note: the kernel segments are properly 4k-aligned + * (see kern.ldscript), so there's no need to enforce alignment. */ -#define PROC0PDIR ((0) * PAGE_SIZE) -#define PROC0STACK ((1) * PAGE_SIZE) -#define SYSMAP ((1+UPAGES) * PAGE_SIZE) -#define TABLESIZE ((1+UPAGES) * PAGE_SIZE) /* + nkpde * PAGE_SIZE */ - /* Find end of kernel image. */ - movl $RELOC(end),%edi -#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE) - /* Save the symbols (if loaded). */ + /* Find end of kernel image; brings us on (1). */ + movl $RELOC(__kernel_end),%edi + +#if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB) + /* Save the symbols (if loaded); brinds us on (2). */ movl RELOC(esym),%eax testl %eax,%eax jz 1f - subl $KERNBASE_LOCORE,%eax + subl $KERNBASE,%eax movl %eax,%edi 1: #endif - /* Calculate where to start the bootstrap tables. */ - movl %edi,%esi # edi = esym ? esym : end - addl $PGOFSET,%esi # page align up + /* Skip over any modules/blobs; brings us on (3). */ + movl RELOC(eblob),%eax + testl %eax,%eax + jz 1f + subl $KERNBASE,%eax + movl %eax,%edi +1: + + /* We are on (3). Align up for BOOTSTRAP TABLES. */ + movl %edi,%esi + addl $PGOFSET,%esi andl $~PGOFSET,%esi - /* - * Calculate the size of the kernel page table directory, and - * how many entries it will have. - */ - movl RELOC(nkpde),%ecx # get nkpde - cmpl $NKPTP_MIN,%ecx # larger than min? - jge 1f - movl $NKPTP_MIN,%ecx # set at min - jmp 2f -1: cmpl $NKPTP_MAX,%ecx # larger than max? - jle 2f - movl $NKPTP_MAX,%ecx -2: + /* nkptp[1] = (esi + ~L2_FRAME) >> L2_SHIFT + 1; */ + movl %esi,%eax + addl $~L2_FRAME,%eax + shrl $L2_SHIFT,%eax + incl %eax /* one more PTP for VAs stolen by bootstrap */ +1: movl %eax,RELOC(nkptp)+1*4 + + /* tablesize = (PDP_SIZE + UPAGES + nkptp[1]) << PGSHIFT; */ + addl $(PDP_SIZE+UPAGES),%eax +#ifdef PAE + incl %eax /* one more page for L3 */ + shll $PGSHIFT+1,%eax /* PTP tables are twice larger with PAE */ +#else + shll $PGSHIFT,%eax +#endif + movl %eax,RELOC(tablesize) - /* Clear memory for bootstrap tables. */ - shll $PGSHIFT,%ecx - addl $TABLESIZE,%ecx - addl %esi,%ecx # end of tables - subl %edi,%ecx # size of tables - shrl $2,%ecx + /* Ensure that nkptp[1] covers BOOTSTRAP TABLES, ie: + * (esi + tablesize) >> L2_SHIFT + 1 < nkptp[1] */ + addl %esi,%eax + addl $~L2_FRAME,%eax + shrl $L2_SHIFT,%eax + incl %eax + cmpl %eax,RELOC(nkptp)+1*4 + jnz 1b + + /* Now, zero out the BOOTSTRAP TABLES (before filling them in). */ + movl %esi,%edi xorl %eax,%eax cld + movl RELOC(tablesize),%ecx + shrl $2,%ecx rep - stosl + stosl /* copy eax -> edi */ /* - * fillkpt - * eax = pte (page frame | control | status) - * ebx = page table address - * ecx = number of pages to map + * Build the page tables and levels. We go from L1 to L2/L3, and link the levels + * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't + * be > 4G, or we can't deal with it anyway, since we are in 32bit mode. */ -#define fillkpt \ -1: movl %eax,(%ebx) ; \ - addl $PAGE_SIZE,%eax ; /* increment physical address */ \ - addl $4,%ebx ; /* next pte */ \ - loop 1b ; + /* + * Build L1. + */ + leal (PROC0_PTP1_OFF)(%esi),%ebx -/* - * Build initial page tables. - */ - /* Calculate end of text segment, rounded to a page. */ - leal (RELOC(etext)+PGOFSET),%edx - andl $~PGOFSET,%edx - - /* Skip over the first 1MB. */ - movl $_RELOC(KERNTEXTOFF),%eax - movl %eax,%ecx + /* Skip the area below the kernel text. */ + movl $(KERNTEXTOFF - KERNBASE),%ecx shrl $PGSHIFT,%ecx - leal (SYSMAP)(%esi,%ecx,4),%ebx + fillkpt_blank - /* Map the kernel text read-only. */ - movl %edx,%ecx + /* Map the kernel text RX. */ + movl $(KERNTEXTOFF - KERNBASE),%eax /* start of TEXT */ + movl $RELOC(__rodata_start),%ecx subl %eax,%ecx shrl $PGSHIFT,%ecx orl $(PG_V|PG_KR),%eax fillkpt - /* Map the data, BSS, and bootstrap tables read-write. */ - leal (PG_V|PG_KW)(%edx),%eax - movl RELOC(nkpde),%ecx - shll $PGSHIFT,%ecx - addl $TABLESIZE,%ecx - addl %esi,%ecx # end of tables - subl %edx,%ecx # subtract end of text + /* Map the kernel rodata R. */ + movl $RELOC(__rodata_start),%eax + movl $RELOC(__data_start),%ecx + subl %eax,%ecx shrl $PGSHIFT,%ecx - fillkpt + orl $(PG_V|PG_KR),%eax + fillkpt_nox + + /* Map the kernel data+bss RW. */ + movl $RELOC(__data_start),%eax + movl $RELOC(__kernel_end),%ecx + subl %eax,%ecx + shrl $PGSHIFT,%ecx + orl $(PG_V|PG_KW),%eax + fillkpt_nox + + /* Map [SYMS]+[PRELOADED MODULES] RW. */ + movl $RELOC(__kernel_end),%eax + movl %esi,%ecx /* start of BOOTSTRAP TABLES */ + subl %eax,%ecx + shrl $PGSHIFT,%ecx + orl $(PG_V|PG_KW),%eax + fillkpt_nox + + /* Map the BOOTSTRAP TABLES RW. */ + movl %esi,%eax /* start of BOOTSTRAP TABLES */ + movl RELOC(tablesize),%ecx /* length of BOOTSTRAP TABLES */ + shrl $PGSHIFT,%ecx + orl $(PG_V|PG_KW),%eax + fillkpt_nox + + /* We are on (4). Map ISA I/O MEM RW. */ + movl $IOM_BEGIN,%eax + movl $IOM_SIZE,%ecx /* size of ISA I/O MEM */ + shrl $PGSHIFT,%ecx + orl $(PG_V|PG_KW/*|PG_N*/),%eax + fillkpt_nox - /* Map ISA I/O memory. */ - movl $(IOM_BEGIN|PG_V|PG_KW/*|PG_N*/),%eax # having these bits set - movl $(IOM_SIZE>>PGSHIFT),%ecx # for this many pte s, + /* + * Build L2 for identity mapping. Linked to L1. + */ + leal (PROC0_PDIR_OFF)(%esi),%ebx + leal (PROC0_PTP1_OFF)(%esi),%eax + orl $(PG_V|PG_KW),%eax + movl RELOC(nkptp)+1*4,%ecx fillkpt -/* - * Construct a page table directory. - */ - /* Install PDEs for temporary double map of kernel. */ - movl RELOC(nkpde),%ecx # for this many pde s, - leal (PROC0PDIR+0*4)(%esi),%ebx # which is where temp maps! - leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0, + /* Set up L2 entries for actual kernel mapping */ + leal (PROC0_PDIR_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx + leal (PROC0_PTP1_OFF)(%esi),%eax + orl $(PG_V|PG_KW),%eax + movl RELOC(nkptp)+1*4,%ecx fillkpt - /* Map kernel PDEs. */ - movl RELOC(nkpde),%ecx # for this many pde s, - leal (PROC0PDIR+PDSLOT_KERN*4)(%esi),%ebx # kernel pde offset - leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0, + /* Install recursive top level PDE */ + leal (PROC0_PDIR_OFF + PDIR_SLOT_PTE * PDE_SIZE)(%esi),%ebx + leal (PROC0_PDIR_OFF)(%esi),%eax + orl $(PG_V|PG_KW),%eax + movl $PDP_SIZE,%ecx + fillkpt_nox + +#ifdef PAE + /* + * Build L3. Linked to L2. + */ + leal (PROC0_L3_OFF)(%esi),%ebx + leal (PROC0_PDIR_OFF)(%esi),%eax + orl $(PG_V),%eax + movl $PDP_SIZE,%ecx fillkpt - /* Install a PDE recursively mapping page directory as a page table! */ - leal (PROC0PDIR+PG_V|PG_KW)(%esi),%eax # pte for ptd - movl %eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi) # recursive PD slot - - /* Save phys. addr of PTD, for libkvm. */ - movl %esi,RELOC(PTDpaddr) - - /* Load base of page directory and enable mapping. */ - movl %esi,%eax # phys address of ptd in proc 0 - movl %eax,%cr3 # load ptd addr into mmu - movl %cr0,%eax # get control word - # enable paging & NPX emulation - orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP),%eax - movl %eax,%cr0 # and let's page NOW! + /* Enable PAE mode */ + movl %cr4,%eax + orl $CR4_PAE,%eax + movl %eax,%cr4 +#endif + + /* Save physical address of L2. */ + leal (PROC0_PDIR_OFF)(%esi),%eax + movl %eax,RELOC(PDPpaddr) + + /* + * Startup checklist: + * 1. Load %cr3 with pointer to L2 (or L3 for PAE). + */ + movl %esi,%eax + movl %eax,%cr3 + + /* + * 2. Set NOX in EFER, if available. + */ + movl RELOC(nox_flag),%ebx + cmpl $0,%ebx + je skip_NOX + movl $MSR_EFER,%ecx + rdmsr + xorl %eax,%eax + orl $(EFER_NXE),%eax + wrmsr +skip_NOX: + + /* + * 3. Enable paging and the rest of it. + */ + movl %cr0,%eax + orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax + movl %eax,%cr0 - pushl $begin # jump to high mem + pushl $begin /* jump to high mem */ ret begin: - /* Now running relocated at KERNBASE_LOCORE. Remove double mapping. */ - movl _C_LABEL(nkpde),%ecx # for this many pde s, - leal (PROC0PDIR+0*4)(%esi),%ebx # which is where temp maps! - addl $(KERNBASE_LOCORE), %ebx # now use relocated address -1: movl $0,(%ebx) - addl $4,%ebx # next pde - loop 1b + /* + * We have arrived. There's no need anymore for the identity mapping in + * low memory, remove it. + */ + movl _C_LABEL(nkptp)+1*4,%ecx + leal (PROC0_PDIR_OFF)(%esi),%ebx /* old, phys address of PDIR */ + addl $(KERNBASE), %ebx /* new, virt address of PDIR */ + killkpt /* Relocate atdevbase. */ - movl _C_LABEL(nkpde),%edx - shll $PGSHIFT,%edx - addl $(TABLESIZE+KERNBASE_LOCORE),%edx + movl $KERNBASE,%edx + addl _C_LABEL(tablesize),%edx addl %esi,%edx movl %edx,_C_LABEL(atdevbase) /* Set up bootstrap stack. */ - leal (PROC0STACK+KERNBASE_LOCORE)(%esi),%eax - movl %eax,_C_LABEL(proc0paddr) + leal (PROC0_STK_OFF+KERNBASE)(%esi),%eax + movl %eax,_C_LABEL(lwp0uarea) leal (USPACE-FRAMESIZE)(%eax),%esp - movl %esi,PCB_CR3(%eax) # pcb->pcb_cr3 - xorl %ebp,%ebp # mark end of frames + movl %esi,PCB_CR3(%eax) /* pcb->pcb_cr3 */ + xorl %ebp,%ebp /* mark end of frames */ + +#if defined(MULTIBOOT) + /* It is now safe to parse the Multiboot information structure + * we saved before from C code. Note that we cannot delay its + * parsing any more because initgdt (called below) needs to make + * use of this information. */ + call _C_LABEL(multiboot_post_reloc) +#endif - subl $NGDT*8, %esp # space for temporary gdt + subl $NGDT*8, %esp /* space for temporary gdt */ pushl %esp call _C_LABEL(initgdt) addl $4,%esp - - movl _C_LABEL(nkpde),%eax - shll $PGSHIFT,%eax - addl $TABLESIZE,%eax - addl %esi,%eax # skip past stack and page tables - pushl %eax - call _C_LABEL(init386) # wire 386 chip for unix operation - addl $4+NGDT*8,%esp # pop temporary gdt + movl _C_LABEL(tablesize),%eax + addl %esi,%eax /* skip past stack and page tables */ -#ifdef SAFARI_FIFO_HACK - movb $5,%al - movw $0x37b,%dx - outb %al,%dx - movw $0x37f,%dx - inb %dx,%al - movb %al,%cl - - orb $1,%cl - - movb $5,%al - movw $0x37b,%dx - outb %al,%dx - movw $0x37f,%dx - movb %cl,%al - outb %al,%dx -#endif /* SAFARI_FIFO_HACK */ +#ifdef PAE + pushl $0 /* init386() expects a 64 bits paddr_t with PAE */ +#endif + pushl %eax + call _C_LABEL(init_bootspace) + call _C_LABEL(init386) + addl $PDE_SIZE,%esp /* pop paddr_t */ + addl $NGDT*8,%esp /* pop temporary gdt */ call _C_LABEL(main) +#else /* XEN */ + /* First, reset the PSL. */ + pushl $PSL_MBO + popfl -/* - * void proc_trampoline(void); - * This is a trampoline function pushed onto the stack of a newly created - * process in order to do some additional setup. The trampoline is entered by - * cpu_switch()ing to the process, so we abuse the callee-saved registers used - * by cpu_switch() to store the information about the stub to call. - * NOTE: This function does not have a normal calling sequence! - */ -/* LINTSTUB: Func: void proc_trampoline(void) */ -NENTRY(proc_trampoline) -#ifdef MULTIPROCESSOR - call _C_LABEL(proc_trampoline_mp) -#endif - movl $IPL_NONE,CPUVAR(ILEVEL) - pushl %ebx - call *%esi - addl $4,%esp - INTRFASTEXIT - /* NOTREACHED */ + cld -/*****************************************************************************/ -#ifdef COMPAT_16 -/* - * Signal trampoline; copied to top of user stack. - */ -/* LINTSTUB: Var: char sigcode[1], esigcode[1]; */ -NENTRY(sigcode) /* - * Handler has returned here as if we called it. The sigcontext - * is on the stack after the 3 args "we" pushed. + * Xen info: + * - %esp -> stack, *theoretically* the last used page by Xen bootstrap */ - leal 12(%esp),%eax # get pointer to sigcontext - movl %eax,4(%esp) # put it in the argument slot - # fake return address already there -#if defined(SYS_compat_16___sigreturn14) - movl $SYS_compat_16___sigreturn14,%eax -#elif defined(SYS___sigreturn14) - movl $SYS___sigreturn14,%eax -#else - #error "no sigreturn14 syscall" -#endif - int $0x80 # enter kernel with args on stack - movl $SYS_exit,%eax - int $0x80 # exit if sigreturn fails - .globl _C_LABEL(esigcode) -_C_LABEL(esigcode): -#endif + movl %esp,%ebx + movl $_RELOC(tmpstk),%esp -/*****************************************************************************/ - -/* - * The following primitives are used to fill and copy regions of memory. - */ - -/* - * XXX No section 9 man page for fillw. - * fillw seems to be very sparsely used (only in pccons it seems.) - * One wonders if it couldn't be done without. - * -- Perry Metzger, May 7, 2001 - */ -/* - * void fillw(short pattern, void *addr, size_t len); - * Write len copies of pattern at addr. - */ -/* LINTSTUB: Func: void fillw(short pattern, void *addr, size_t len) */ -ENTRY(fillw) - pushl %edi - movl 8(%esp),%eax - movl 12(%esp),%edi - movw %ax,%cx - rorl $16,%eax - movw %cx,%ax - cld - movl 16(%esp),%ecx - shrl %ecx # do longwords - rep - stosl - movl 16(%esp),%ecx - andl $1,%ecx # do remainder + /* Clear BSS. */ + xorl %eax,%eax + movl $RELOC(__bss_start),%edi + movl $RELOC(_end),%ecx + subl %edi,%ecx rep - stosw - popl %edi - ret + stosb -/* - * int kcopy(const void *from, void *to, size_t len); - * Copy len bytes, abort on fault. - */ -/* LINTSTUB: Func: int kcopy(const void *from, void *to, size_t len) */ -ENTRY(kcopy) - pushl %esi - pushl %edi - GET_CURPCB(%eax) # load curpcb into eax and set on-fault - pushl PCB_ONFAULT(%eax) - movl $_C_LABEL(copy_fault), PCB_ONFAULT(%eax) - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ecx - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax # overlapping? - jb 1f - cld # nope, copy forward - shrl $2,%ecx # copy by 32-bit words + /* Copy the necessary stuff from start_info structure. */ + /* We need to copy shared_info early, so that sti/cli work */ + movl $RELOC(start_info_union),%edi + movl $128,%ecx rep movsl - movl 24(%esp),%ecx - andl $3,%ecx # any bytes left? - rep - movsb - GET_CURPCB(%edx) # XXX save curpcb? - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi + /* Clear segment registers. */ xorl %eax,%eax - ret + movw %ax,%fs + movw %ax,%gs - ALIGN_TEXT -1: addl %ecx,%edi # copy backward - addl %ecx,%esi - std - andl $3,%ecx # any fractional bytes? - decl %edi - decl %esi - rep - movsb - movl 24(%esp),%ecx # copy remainder by 32-bit words - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - cld - - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret - -/*****************************************************************************/ - -/* - * The following primitives are used to copy data in and out of the user's - * address space. - */ - -/* - * Default to the lowest-common-denominator. We will improve it - * later. - */ -#if defined(I386_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i386_copyout) -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) -#elif defined(I486_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) -#elif defined(I586_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) /* XXX */ -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) /* XXX */ -#elif defined(I686_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) /* XXX */ -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) /* XXX */ -#endif - - .data - - .globl _C_LABEL(copyout_func) -_C_LABEL(copyout_func): - .long DEFAULT_COPYOUT - - .globl _C_LABEL(copyin_func) -_C_LABEL(copyin_func): - .long DEFAULT_COPYIN - - .text - -/* - * int copyout(const void *from, void *to, size_t len); - * Copy len bytes into the user's address space. - * see copyout(9) - */ -/* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */ -ENTRY(copyout) - jmp *_C_LABEL(copyout_func) - -#if defined(I386_CPU) -/* LINTSTUB: Func: int i386_copyout(const void *kaddr, void *uaddr, size_t len) */ -ENTRY(i386_copyout) - pushl %esi - pushl %edi - pushl $0 - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax - - /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. If it's not, then we only need to - * check that each page is writable. The 486 will do this for us; the - * 386 will not. (We assume that pages in user space that are not - * writable by the user are not writable by the kernel either.) - */ - movl %edi,%edx - addl %eax,%edx - jc _C_LABEL(copy_efault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_efault) - - testl %eax,%eax # anything to do? - jz 3f - - /* - * We have to check each PTE for (write) permission, since the CPU - * doesn't do it for us. - */ - - /* Compute number of pages. */ - movl %edi,%ecx - andl $PGOFSET,%ecx - addl %eax,%ecx - decl %ecx - shrl $PGSHIFT,%ecx - - /* Compute PTE offset for start address. */ - shrl $PGSHIFT,%edi - - GET_CURPCB(%edx) - movl $2f,PCB_ONFAULT(%edx) - -1: /* Check PTE for each page. */ - testb $PG_RW,_C_LABEL(PTmap)(,%edi,4) - jz 2f - -4: incl %edi - decl %ecx - jns 1b - - movl 20(%esp),%edi - movl 24(%esp),%eax - jmp 3f - -2: /* Simulate a trap. */ - pushl %ecx - movl %edi,%eax - shll $PGSHIFT,%eax - pushl %eax - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # pop argument - popl %ecx - testl %eax,%eax # if not ok, return EFAULT - jz 4b - jmp _C_LABEL(copy_efault) - -3: GET_CURPCB(%edx) - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) - - /* bcopy(%esi, %edi, %eax); */ - cld - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movl %eax,%ecx - andl $3,%ecx - rep - movsb - - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret -#endif /* I386_CPU */ - -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) -/* LINTSTUB: Func: int i486_copyout(const void *kaddr, void *uaddr, size_t len) */ -ENTRY(i486_copyout) - pushl %esi - pushl %edi - pushl $0 - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax + xorl %eax,%eax + cpuid + movl %eax,RELOC(cpuid_level) /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. + * Use a temporary GDT page. We'll re-add it to uvm(9) once we're done + * using it. */ - movl %edi,%edx - addl %eax,%edx - jc _C_LABEL(copy_efault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_efault) - - GET_CURPCB(%edx) - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) - - /* bcopy(%esi, %edi, %eax); */ - cld - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movl %eax,%ecx - andl $3,%ecx - rep - movsb - - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret -#endif /* I486_CPU || I586_CPU || I686_CPU */ - -/* - * int copyin(const void *from, void *to, size_t len); - * Copy len bytes from the user's address space. - * see copyin(9) - */ -/* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */ -ENTRY(copyin) - jmp *_C_LABEL(copyin_func) - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I586_CPU) || \ - defined(I686_CPU) -/* LINTSTUB: Func: int i386_copyin(const void *uaddr, void *kaddr, size_t len) */ -ENTRY(i386_copyin) - pushl %esi - pushl %edi - GET_CURPCB(%eax) - pushl $0 - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax) - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax - - /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. If it's not, then we only need to - * check that each page is readable, and the CPU will do that for us. - */ - movl %esi,%edx - addl %eax,%edx - jc _C_LABEL(copy_efault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_efault) - - /* bcopy(%esi, %edi, %eax); */ - cld - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movl %eax,%ecx - andl $3,%ecx - rep - movsb - - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret -#endif /* I386_CPU || I486_CPU || I586_CPU || I686_CPU */ - -/* LINTSTUB: Ignore */ -NENTRY(copy_efault) - movl $EFAULT,%eax - -/* LINTSTUB: Ignore */ -NENTRY(copy_fault) - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - ret - -/* - * int copyoutstr(const void *from, void *to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long, into the - * user's address space. Return the number of characters copied (including the - * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else - * return 0 or EFAULT. - * see copyoutstr(9) - */ -/* LINTSTUB: Func: int copyoutstr(const void *kaddr, void *uaddr, size_t len, size_t *done) */ -ENTRY(copyoutstr) - pushl %esi - pushl %edi - - movl 12(%esp),%esi # esi = from - movl 16(%esp),%edi # edi = to - movl 20(%esp),%edx # edx = maxlen - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 5f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - /* Compute number of bytes in first page. */ - movl %edi,%eax - andl $PGOFSET,%eax - movl $PAGE_SIZE,%ecx - subl %eax,%ecx # ecx = PAGE_SIZE - (src % PAGE_SIZE) - - GET_CURPCB(%eax) - movl $6f,PCB_ONFAULT(%eax) - -1: /* - * Once per page, check that we are still within the bounds of user - * space, and check for a write fault. - */ - cmpl $VM_MAXUSER_ADDRESS,%edi - jae _C_LABEL(copystr_efault) - - /* Compute PTE offset. */ - movl %edi,%eax - shrl $PGSHIFT,%eax # calculate pte address - - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 2f - -6: /* Simulate a trap. */ - pushl %edx - pushl %edi - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear argument from stack - popl %edx - testl %eax,%eax - jnz _C_LABEL(copystr_efault) - -2: /* Copy up to end of this page. */ - subl %ecx,%edx # predecrement total count - jnc 3f - addl %edx,%ecx # ecx += (edx - ecx) = edx - xorl %edx,%edx - -3: decl %ecx - js 4f - lodsb - stosb - testb %al,%al - jnz 3b - - /* Success -- 0 byte reached. */ - addl %ecx,%edx # add back residual for this page - xorl %eax,%eax - jmp copystr_return - -4: /* Go to next page, if any. */ - movl $PAGE_SIZE,%ecx - testl %edx,%edx - jnz 1b - - /* edx is zero -- return ENAMETOOLONG. */ - movl $ENAMETOOLONG,%eax - jmp copystr_return -#endif /* I386_CPU */ - -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) -5: GET_CURPCB(%eax) - movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax) - /* - * Get min(%edx, VM_MAXUSER_ADDRESS-%edi). - */ - movl $VM_MAXUSER_ADDRESS,%eax - subl %edi,%eax - cmpl %edx,%eax - jae 1f - movl %eax,%edx - movl %eax,20(%esp) - -1: incl %edx - cld - -1: decl %edx - jz 2f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp copystr_return - -2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ - cmpl $VM_MAXUSER_ADDRESS,%edi - jae _C_LABEL(copystr_efault) - movl $ENAMETOOLONG,%eax - jmp copystr_return -#endif /* I486_CPU || I586_CPU || I686_CPU */ - -/* - * int copyinstr(const void *from, void *to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long, from the - * user's address space. Return the number of characters copied (including the - * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else - * return 0 or EFAULT. - * see copyinstr(9) - */ -/* LINTSTUB: Func: int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) */ -ENTRY(copyinstr) - pushl %esi - pushl %edi - GET_CURPCB(%ecx) - movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) + movl $RELOC(tmpgdt),%eax + pushl %eax /* start of temporary gdt */ + call _C_LABEL(initgdt) + addl $4,%esp - movl 12(%esp),%esi # %esi = from - movl 16(%esp),%edi # %edi = to - movl 20(%esp),%edx # %edx = maxlen + call xen_locore /* - * Get min(%edx, VM_MAXUSER_ADDRESS-%esi). + * The first VA available is returned by xen_locore in %eax. We + * use it as the UAREA, and set up the stack here. */ - movl $VM_MAXUSER_ADDRESS,%eax - subl %esi,%eax - cmpl %edx,%eax - jae 1f - movl %eax,%edx - movl %eax,20(%esp) - -1: incl %edx - cld - -1: decl %edx - jz 2f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp copystr_return - -2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ - cmpl $VM_MAXUSER_ADDRESS,%esi - jae _C_LABEL(copystr_efault) - movl $ENAMETOOLONG,%eax - jmp copystr_return - -/* LINTSTUB: Ignore */ -NENTRY(copystr_efault) - movl $EFAULT,%eax - -/* LINTSTUB: Ignore */ -NENTRY(copystr_fault) -copystr_return: - /* Set *lencopied and return %eax. */ - GET_CURPCB(%ecx) - movl $0,PCB_ONFAULT(%ecx) - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx - jz 8f - movl %ecx,(%edx) - -8: popl %edi - popl %esi - ret - -/* - * int copystr(const void *from, void *to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long. Return the - * number of characters copied (including the NUL) in *lencopied. If the - * string is too long, return ENAMETOOLONG; else return 0. - * see copystr(9) - */ -/* LINTSTUB: Func: int copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done) */ -ENTRY(copystr) - pushl %esi - pushl %edi - - movl 12(%esp),%esi # esi = from - movl 16(%esp),%edi # edi = to - movl 20(%esp),%edx # edx = maxlen - incl %edx - cld - -1: decl %edx - jz 4f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp 6f - -4: /* edx is zero -- return ENAMETOOLONG. */ - movl $ENAMETOOLONG,%eax - -6: /* Set *lencopied and return %eax. */ - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx - jz 7f - movl %ecx,(%edx) - -7: popl %edi - popl %esi - ret - -/* - * long fuword(const void *uaddr); - * Fetch an int from the user's address space. - * see fuword(9) - */ -/* LINTSTUB: Func: long fuword(const void *base) */ -ENTRY(fuword) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-4,%edx - ja _C_LABEL(fusuaddrfault) - GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - movl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * int fusword(const void *uaddr); - * Fetch a short from the user's address space. - * see fusword(9) - */ -/* LINTSTUB: Func: int fusword(const void *base) */ -ENTRY(fusword) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - movzwl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * int fuswintr(const void *uaddr); - * Fetch a short from the user's address space. Can be called during an - * interrupt. - * see fuswintr(9) - */ -/* LINTSTUB: Func: int fuswintr(const void *base) */ -ENTRY(fuswintr) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - movl CPUVAR(CURLWP),%ecx - movl L_ADDR(%ecx),%ecx - movl $_C_LABEL(fusubail),PCB_ONFAULT(%ecx) - movzwl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * int fubyte(const void *uaddr); - * Fetch a byte from the user's address space. - * see fubyte(9) - */ -/* LINTSTUB: Func: int fubyte(const void *base) */ -ENTRY(fubyte) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-1,%edx - ja _C_LABEL(fusuaddrfault) - GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - movzbl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * Handle faults from [fs]u*(). Clean up and return -1. - */ -/* LINTSTUB: Ignore */ -NENTRY(fusufault) - movl $0,PCB_ONFAULT(%ecx) - movl $-1,%eax - ret - -/* - * Handle faults from [fs]u*(). Clean up and return -1. This differs from - * fusufault() in that trap() will recognize it and return immediately rather - * than trying to page fault. - */ -/* LINTSTUB: Ignore */ -NENTRY(fusubail) - movl $0,PCB_ONFAULT(%ecx) - movl $-1,%eax - ret - -/* - * Handle earlier faults from [fs]u*(), due to our of range addresses. - */ -/* LINTSTUB: Ignore */ -NENTRY(fusuaddrfault) - movl $-1,%eax - ret - -/* - * int suword(void *uaddr, long x); - * Store an int in the user's address space. - * see suword(9) - */ -/* LINTSTUB: Func: int suword(void *base, long c) */ -ENTRY(suword) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-4,%edx - ja _C_LABEL(fusuaddrfault) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - GET_CURPCB(%eax) - movl $3f,PCB_ONFAULT(%eax) - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - -3: /* Simulate a trap. */ - pushl %edx - pushl %edx - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear parameter from the stack - popl %edx - GET_CURPCB(%ecx) - testl %eax,%eax - jnz _C_LABEL(fusufault) - -1: /* XXX also need to check the following 3 bytes for validity! */ -#endif - -2: GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - - movl 8(%esp),%eax - movl %eax,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret - -/* - * int susword(void *uaddr, short x); - * Store a short in the user's address space. - * see susword(9) - */ -/* LINTSTUB: Func: int susword(void *base, short c) */ -ENTRY(susword) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - GET_CURPCB(%eax) - movl $3f,PCB_ONFAULT(%eax) - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - -3: /* Simulate a trap. */ - pushl %edx - pushl %edx - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear parameter from the stack - popl %edx - GET_CURPCB(%ecx) - testl %eax,%eax - jnz _C_LABEL(fusufault) - -1: /* XXX also need to check the following byte for validity! */ -#endif - -2: GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - - movl 8(%esp),%eax - movw %ax,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret - -/* - * int suswintr(void *uaddr, short x); - * Store a short in the user's address space. Can be called during an - * interrupt. - * see suswintr(9) - */ -/* LINTSTUB: Func: int suswintr(void *base, short c) */ -ENTRY(suswintr) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - movl CPUVAR(CURLWP),%ecx - movl L_ADDR(%ecx),%ecx - movl $_C_LABEL(fusubail),PCB_ONFAULT(%ecx) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - - /* Simulate a trap. */ - jmp _C_LABEL(fusubail) - -1: /* XXX also need to check the following byte for validity! */ -#endif - -2: movl 8(%esp),%eax - movw %ax,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret + movl %eax,%esi + movl %esi,_C_LABEL(lwp0uarea) + leal (USPACE-FRAMESIZE)(%eax),%esp + xorl %ebp,%ebp /* mark end of frames */ -/* - * int subyte(void *uaddr, char x); - * Store a byte in the user's address space. - * see subyte(9) - */ -/* LINTSTUB: Func: int subyte(void *base, int c) */ -ENTRY(subyte) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-1,%edx - ja _C_LABEL(fusuaddrfault) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - GET_CURPCB(%eax) - movl $3f,PCB_ONFAULT(%eax) - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - -3: /* Simulate a trap. */ - pushl %edx - pushl %edx - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear parameter from the stack - popl %edx - GET_CURPCB(%ecx) - testl %eax,%eax - jnz _C_LABEL(fusufault) + /* Set first_avail after the DUMMY PAGE (see xen_locore). */ + addl $(USPACE+PAGE_SIZE),%esi + subl $KERNBASE,%esi /* init386 wants a physical address */ -1: +#ifdef PAE + pushl $0 /* init386() expects a 64 bits paddr_t with PAE */ #endif + pushl %esi + call _C_LABEL(init_bootspace) + call _C_LABEL(init386) + addl $PDE_SIZE,%esp /* pop paddr_t */ + call _C_LABEL(main) +#endif /* XEN */ +END(start) -2: GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - - movb 8(%esp),%al - movb %al,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret - -/*****************************************************************************/ - -/* - * The following is i386-specific nonsense. - */ +#if defined(XEN) +/* space for the hypercall call page */ +#define HYPERCALL_PAGE_OFFSET 0x1000 +.org HYPERCALL_PAGE_OFFSET +ENTRY(hypercall_page) +.skip 0x1000 +END(hypercall_page) /* - * void lgdt(struct region_descriptor *rdp); - * Load a new GDT pointer (and do any necessary cleanup). + * void lgdt_finish(void); + * Finish load a new GDT pointer (do any necessary cleanup). * XXX It's somewhat questionable whether reloading all the segment registers * is necessary, since the actual descriptor data is not changed except by * process creation and exit, both of which clean up via task switches. OTOH, * this only happens at run time when the GDT is resized. */ -/* LINTSTUB: Func: void lgdt(struct region_descriptor *rdp) */ -NENTRY(lgdt) - /* Reload the descriptor table. */ - movl 4(%esp),%eax - lgdt (%eax) - /* Flush the prefetch queue. */ - jmp 1f - nop -1: /* Reload "stale" selectors. */ +/* LINTSTUB: Func: void lgdt_finish(void) */ +NENTRY(lgdt_finish) movl $GSEL(GDATA_SEL, SEL_KPL),%eax movw %ax,%ds movw %ax,%es @@ -1613,615 +947,445 @@ NENTRY(lgdt) pushl $GSEL(GCODE_SEL, SEL_KPL) pushl %eax lret +END(lgdt_finish) -/*****************************************************************************/ +#endif /* XEN */ + +/* + * void lwp_trampoline(void); + * + * This is a trampoline function pushed onto the stack of a newly created + * process in order to do some additional setup. The trampoline is entered by + * cpu_switchto()ing to the process, so we abuse the callee-saved + * registers used by cpu_switchto() to store the information about the + * stub to call. + * NOTE: This function does not have a normal calling sequence! + */ +NENTRY(lwp_trampoline) + movl %ebp,%edi /* for .Lsyscall_checkast */ + xorl %ebp,%ebp + pushl %edi + pushl %eax + call _C_LABEL(lwp_startup) + addl $8,%esp + pushl %ebx + call *%esi + addl $4,%esp + jmp .Lsyscall_checkast + /* NOTREACHED */ +END(lwp_trampoline) /* - * These functions are primarily used by DDB. + * sigcode() + * + * Signal trampoline; copied to top of user stack. Used only for + * compatibility with old releases of NetBSD. */ +NENTRY(sigcode) + /* + * Handler has returned here as if we called it. The sigcontext + * is on the stack after the 3 args "we" pushed. + */ + leal 12(%esp),%eax /* get pointer to sigcontext */ + movl %eax,4(%esp) /* put it in the argument slot */ + /* fake return address already there */ + movl $SYS_compat_16___sigreturn14,%eax + int $0x80 /* enter kernel with args on stack */ + movl $SYS_exit,%eax + int $0x80 /* exit if sigreturn fails */ + .globl _C_LABEL(esigcode) +_C_LABEL(esigcode): +END(sigcode) -/* LINTSTUB: Func: int setjmp (label_t *l) */ +/* + * int setjmp(label_t *) + * + * Used primarily by DDB. + */ ENTRY(setjmp) movl 4(%esp),%eax - movl %ebx,(%eax) # save ebx - movl %esp,4(%eax) # save esp - movl %ebp,8(%eax) # save ebp - movl %esi,12(%eax) # save esi - movl %edi,16(%eax) # save edi - movl (%esp),%edx # get rta - movl %edx,20(%eax) # save eip - xorl %eax,%eax # return (0); + movl %ebx,(%eax) /* save ebx */ + movl %esp,4(%eax) /* save esp */ + movl %ebp,8(%eax) /* save ebp */ + movl %esi,12(%eax) /* save esi */ + movl %edi,16(%eax) /* save edi */ + movl (%esp),%edx /* get rta */ + movl %edx,20(%eax) /* save eip */ + xorl %eax,%eax /* return 0 */ ret +END(setjmp) -/* LINTSTUB: Func: void longjmp (label_t *l) */ +/* + * int longjmp(label_t *) + * + * Used primarily by DDB. + */ ENTRY(longjmp) movl 4(%esp),%eax - movl (%eax),%ebx # restore ebx - movl 4(%eax),%esp # restore esp - movl 8(%eax),%ebp # restore ebp - movl 12(%eax),%esi # restore esi - movl 16(%eax),%edi # restore edi - movl 20(%eax),%edx # get rta - movl %edx,(%esp) # put in return frame - xorl %eax,%eax # return (1); - incl %eax + movl (%eax),%ebx /* restore ebx */ + movl 4(%eax),%esp /* restore esp */ + movl 8(%eax),%ebp /* restore ebp */ + movl 12(%eax),%esi /* restore esi */ + movl 16(%eax),%edi /* restore edi */ + movl 20(%eax),%edx /* get rta */ + movl %edx,(%esp) /* put in return frame */ + movl $1,%eax /* return 1 */ ret +END(longjmp) -/*****************************************************************************/ - - .globl _C_LABEL(sched_whichqs),_C_LABEL(sched_qs) - .globl _C_LABEL(uvmexp),_C_LABEL(panic) - -#ifdef DIAGNOSTIC -NENTRY(switch_error) - pushl $1f - call _C_LABEL(panic) - /* NOTREACHED */ -1: .asciz "cpu_switch" -#endif /* DIAGNOSTIC */ +/* + * void dumpsys(void) + * + * Mimic cpu_switchto() for postmortem debugging. + */ +ENTRY(dumpsys) + pushl %ebx /* set up fake switchframe */ + pushl %esi /* and save context */ + pushl %edi + movl %esp,_C_LABEL(dumppcb)+PCB_ESP + movl %ebp,_C_LABEL(dumppcb)+PCB_EBP + call _C_LABEL(dodumpsys) /* dump! */ + addl $(3*4), %esp /* unwind switchframe */ + ret +END(dumpsys) /* - * void cpu_switch(struct lwp *) - * Find a runnable process and switch to it. Wait if necessary. If the new - * process is the same as the old one, we short-circuit the context save and - * restore. - * - * Note that the stack frame layout is known to "struct switchframe" - * in and to the code in cpu_fork() which initializes + * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp, + * bool returning) + * + * 1. if (oldlwp != NULL), save its context. + * 2. then, restore context of newlwp. + * + * Note that the stack frame layout is known to "struct switchframe" in + * and to the code in cpu_lwp_fork() which initializes * it for a new lwp. */ -ENTRY(cpu_switch) +ENTRY(cpu_switchto) pushl %ebx pushl %esi pushl %edi -#ifdef DEBUG +#if defined(DIAGNOSTIC) && !defined(XEN) cmpl $IPL_SCHED,CPUVAR(ILEVEL) - jae 1f - pushl $2f + jbe 0f + pushl CPUVAR(ILEVEL) + pushl $.Lstr call _C_LABEL(panic) - /* NOTREACHED */ -2: .asciz "not splsched() in cpu_switch!" -1: -#endif /* DEBUG */ - - movl 16(%esp),%esi # current - - /* - * Clear curlwp so that we don't accumulate system time while idle. - * This also insures that schedcpu() will move the old lwp to - * the correct queue if it happens to get called from the spllower() - * below and changes the priority. (See corresponding comment in - * userret()). - */ - movl $0,CPUVAR(CURLWP) - /* - * First phase: find new lwp. - * - * Registers: - * %eax - queue head, scratch, then zero - * %ebx - queue number - * %ecx - cached value of whichqs - * %edx - next lwp in queue - * %esi - old lwp - * %edi - new lwp - */ - - /* Look for new lwp. */ - cli # splhigh doesn't do a cli - movl _C_LABEL(sched_whichqs),%ecx - bsfl %ecx,%ebx # find a full q - jnz switch_dequeue - - /* - * idling: save old context. - * - * Registers: - * %eax, %ecx - scratch - * %esi - old lwp, then old pcb - * %edi - idle pcb - */ - - pushl %esi - call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc) - addl $4,%esp - - movl L_ADDR(%esi),%esi - - /* Save stack pointers. */ - movl %esp,PCB_ESP(%esi) - movl %ebp,PCB_EBP(%esi) - - /* Find idle PCB for this CPU */ -#ifndef MULTIPROCESSOR - movl $_C_LABEL(lwp0),%ebx - movl L_ADDR(%ebx),%edi - movl L_MD_TSS_SEL(%ebx),%edx -#else - movl CPUVAR(IDLE_PCB),%edi - movl CPUVAR(IDLE_TSS_SEL),%edx -#endif - movl $0,CPUVAR(CURLWP) /* In case we fault... */ - - /* Restore the idle context (avoid interrupts) */ - cli - - /* Restore stack pointers. */ - movl PCB_ESP(%edi),%esp - movl PCB_EBP(%edi),%ebp - - - /* Switch address space. */ - movl PCB_CR3(%edi),%ecx - movl %ecx,%cr3 - - /* Switch TSS. Reset "task busy" flag before loading. */ -#ifdef MULTIPROCESSOR - movl CPUVAR(GDT),%eax -#else - movl _C_LABEL(gdt),%eax -#endif - andl $~0x0200,4-SEL_KPL(%eax,%edx,1) - ltr %dx - - /* We're always in the kernel, so we don't need the LDT. */ - - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%edi),%ecx - movl %ecx,%cr0 - - /* Record new pcb. */ - SET_CURPCB(%edi) - - xorl %esi,%esi - sti -idle_unlock: -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_unlock_idle) -#endif - /* Interrupts are okay again. */ - pushl $IPL_NONE # spl0() - call _C_LABEL(Xspllower) # process pending interrupts - addl $4,%esp - jmp idle_start -idle_zero: - sti - call _C_LABEL(uvm_pageidlezero) - cli - cmpl $0,_C_LABEL(sched_whichqs) - jnz idle_exit -idle_loop: - /* Try to zero some pages. */ - movl _C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO,%ecx - testl %ecx,%ecx - jnz idle_zero - sti - hlt -NENTRY(mpidle) -idle_start: - cli - cmpl $0,_C_LABEL(sched_whichqs) - jz idle_loop -idle_exit: - movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh - sti -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_lock_idle) + addl $8,%esp +.Lstr: .string "cpu_switchto: switching above IPL_SCHED (%d)\0" +0: #endif - movl _C_LABEL(sched_whichqs),%ecx - bsfl %ecx,%ebx - jz idle_unlock - -switch_dequeue: - /* - * we're running at splhigh(), but it's otherwise okay to take - * interrupts here. - */ - sti - leal _C_LABEL(sched_qs)(,%ebx,8),%eax # select q - - movl L_FORW(%eax),%edi # unlink from front of process q -#ifdef DIAGNOSTIC - cmpl %edi,%eax # linked to self (i.e. nothing queued)? - je _C_LABEL(switch_error) # not possible -#endif /* DIAGNOSTIC */ - movl L_FORW(%edi),%edx - movl %edx,L_FORW(%eax) - movl %eax,L_BACK(%edx) - - cmpl %edx,%eax # q empty? - jne 3f - - btrl %ebx,%ecx # yes, clear to indicate empty - movl %ecx,_C_LABEL(sched_whichqs) # update q status -3: /* We just did it. */ - xorl %eax,%eax - CLEAR_RESCHED(%eax) - -switch_resume: -#ifdef DIAGNOSTIC - cmpl %eax,L_WCHAN(%edi) # Waiting for something? - jne _C_LABEL(switch_error) # Yes; shouldn't be queued. - cmpb $LSRUN,L_STAT(%edi) # In run state? - jne _C_LABEL(switch_error) # No; shouldn't be queued. -#endif /* DIAGNOSTIC */ + movl 16(%esp),%esi /* oldlwp */ + movl 20(%esp),%edi /* newlwp */ + movl 24(%esp),%edx /* returning */ - /* Isolate lwp. XXX Is this necessary? */ - movl %eax,L_BACK(%edi) + testl %esi,%esi /* oldlwp = NULL ? */ + jz skip_save - /* Record new lwp. */ - movb $LSONPROC,L_STAT(%edi) # l->l_stat = LSONPROC - SET_CURLWP(%edi,%ecx) - - /* Skip context switch if same lwp. */ - xorl %ebx,%ebx - cmpl %edi,%esi - je switch_return + /* Save old context. */ + movl L_PCB(%esi),%eax + movl %esp,PCB_ESP(%eax) + movl %ebp,PCB_EBP(%eax) +skip_save: - /* If old lwp exited, don't bother. */ - testl %esi,%esi - jz switch_exited + /* Switch to newlwp's stack. */ + movl L_PCB(%edi),%ebx + movl PCB_EBP(%ebx),%ebp + movl PCB_ESP(%ebx),%esp /* - * Second phase: save old context. - * - * Registers: - * %eax, %ecx - scratch - * %esi - old lwp, then old pcb - * %edi - new lwp + * Set curlwp. This must be globally visible in order to permit + * non-interlocked mutex release. */ + movl %edi,%ecx + xchgl %ecx,CPUVAR(CURLWP) - pushl %esi - call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc) + /* Skip the rest if returning to a pinned LWP. */ + testl %edx,%edx + jnz switch_return + + /* Switch ring0 stack */ +#ifdef XEN + pushl %edi + call _C_LABEL(i386_switch_context) addl $4,%esp +#else + movl PCB_ESP0(%ebx),%eax + movl CPUVAR(TSS),%ecx + movl %eax,TSS_ESP0(%ecx) +#endif - movl L_ADDR(%esi),%esi + /* Don't bother with the rest if switching to a system process. */ + testl $LW_SYSTEM,L_FLAG(%edi) + jnz switch_return - /* Save stack pointers. */ - movl %esp,PCB_ESP(%esi) - movl %ebp,PCB_EBP(%esi) +#ifndef XEN + /* Restore thread-private %fs/%gs descriptors. */ + movl CPUVAR(GDT),%ecx + movl PCB_FSD(%ebx),%eax + movl PCB_FSD+4(%ebx),%edx + movl %eax,(GUFS_SEL*8)(%ecx) + movl %edx,(GUFS_SEL*8+4)(%ecx) + movl PCB_GSD(%ebx),%eax + movl PCB_GSD+4(%ebx),%edx + movl %eax,(GUGS_SEL*8)(%ecx) + movl %edx,(GUGS_SEL*8+4)(%ecx) +#endif /* !XEN */ + + /* Switch I/O bitmap */ + movl PCB_IOMAP(%ebx),%eax + orl %eax,%eax + jnz .Lcopy_iobitmap + movl CPUVAR(TSS),%eax + movl $(IOMAP_INVALOFF << 16),TSS_IOBASE(%eax) +.Liobitmap_done: + + /* Is this process using RAS (restartable atomic sequences)? */ + movl L_PROC(%edi),%eax + cmpl $0,P_RASLIST(%eax) + je no_RAS + + /* Handle restartable atomic sequences (RAS). */ + movl L_MD_REGS(%edi),%ecx + pushl TF_EIP(%ecx) + pushl %eax + call _C_LABEL(ras_lookup) + addl $8,%esp + cmpl $-1,%eax + je no_RAS + movl L_MD_REGS(%edi),%ecx + movl %eax,TF_EIP(%ecx) +no_RAS: -switch_exited: /* - * Third phase: restore saved context. - * - * Registers: - * %eax, %ebx, %ecx, %edx - scratch - * %esi - new pcb - * %edi - new lwp + * Restore cr0 (including FPU state). Raise the IPL to IPL_HIGH. + * FPU IPIs can alter the LWP's saved cr0. Dropping the priority + * is deferred until mi_switch(), when cpu_switchto() returns. */ - - /* No interrupts while loading new state. */ - cli - movl L_ADDR(%edi),%esi - - /* Restore stack pointers. */ - movl PCB_ESP(%esi),%esp - movl PCB_EBP(%esi),%ebp - -#if 0 - /* Don't bother with the rest if switching to a system process. */ - testl $P_SYSTEM,L_FLAG(%edi); XXX NJWLWP lwp's don't have P_SYSTEM! - jnz switch_restored -#endif - -#ifdef MULTIPROCESSOR - movl CPUVAR(GDT),%eax -#else - /* Load TSS info. */ - movl _C_LABEL(gdt),%eax -#endif - movl L_MD_TSS_SEL(%edi),%edx - - /* Switch TSS. Reset "task busy" flag before loading. */ - andl $~0x0200,4(%eax,%edx, 1) - ltr %dx - +#ifdef XEN pushl %edi - call _C_LABEL(pmap_activate) # pmap_activate(p) + call _C_LABEL(i386_tls_switch) addl $4,%esp +#else /* !XEN */ + movl $IPL_HIGH,CPUVAR(ILEVEL) + movl PCB_CR0(%ebx),%ecx /* has CR0_TS clear */ + movl %cr0,%edx -#if 0 -switch_restored: -#endif - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%esi),%ecx -#ifdef MULTIPROCESSOR - /* - * If our floating point registers are on a different cpu, - * clear CR0_TS so we'll trap rather than reuse bogus state. + /* + * If our floating point registers are on a different CPU, + * set CR0_TS so we'll trap rather than reuse bogus state. */ - movl PCB_FPCPU(%esi),%ebx - cmpl CPUVAR(SELF),%ebx - jz 1f + cmpl CPUVAR(FPCURLWP),%edi + je skip_TS orl $CR0_TS,%ecx -1: -#endif - movl %ecx,%cr0 +skip_TS: - /* Record new pcb. */ - SET_CURPCB(%esi) - - /* Interrupts are okay again. */ - sti - -/* - * Check for restartable atomic sequences (RAS) - */ - movl CPUVAR(CURLWP),%edi - movl L_PROC(%edi),%esi - cmpl $0,P_NRAS(%esi) - je 1f - movl L_MD_REGS(%edi),%ebx - movl TF_EIP(%ebx),%eax - pushl %eax - pushl %esi - call _C_LABEL(ras_lookup) - addl $8,%esp - cmpl $-1,%eax - je 1f - movl %eax,TF_EIP(%ebx) -1: - movl $1,%ebx + /* Reloading CR0 is very expensive - avoid if possible. */ + cmpl %edx,%ecx + je switch_return + movl %ecx,%cr0 +#endif /* !XEN */ switch_return: -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_unlock_idle) -#endif - pushl $IPL_NONE # spl0() - call _C_LABEL(Xspllower) # process pending interrupts - addl $4,%esp - movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh() - - movl %ebx,%eax - + /* Return to the new LWP, returning 'oldlwp' in %eax. */ + movl %esi,%eax popl %edi popl %esi popl %ebx ret -/* - * void cpu_switchto(struct lwp *current, struct lwp *next) - * Switch to the specified next LWP. - */ -ENTRY(cpu_switchto) - pushl %ebx +.Lcopy_iobitmap: + /* Copy I/O bitmap. */ + incl _C_LABEL(pmap_iobmp_evcnt)+EV_COUNT + movl $(IOMAPSIZE/4),%ecx pushl %esi pushl %edi - -#ifdef DEBUG - cmpl $IPL_SCHED,CPUVAR(ILEVEL) - jae 1f - pushl $2f - call _C_LABEL(panic) - /* NOTREACHED */ -2: .asciz "not splsched() in cpu_switchto!" -1: -#endif /* DEBUG */ - - movl 16(%esp),%esi # current - movl 20(%esp),%edi # next - - /* - * Clear curlwp so that we don't accumulate system time while idle. - * This also insures that schedcpu() will move the old process to - * the correct queue if it happens to get called from the spllower() - * below and changes the priority. (See corresponding comment in - * usrret()). - * - * XXX Is this necessary? We know we won't go idle. - */ - movl $0,CPUVAR(CURLWP) - - /* - * We're running at splhigh(), but it's otherwise okay to take - * interrupts here. - */ - sti - - /* Jump into the middle of cpu_switch */ - xorl %eax,%eax - jmp switch_resume - -/* - * void switch_exit(struct lwp *l, void (*exit)(struct lwp *)); - * Switch to the appropriate idle context (lwp0's if uniprocessor; the cpu's - * if multiprocessor) and deallocate the address space and kernel stack for p. - * Then jump into cpu_switch(), as if we were in the idle proc all along. - */ -#ifndef MULTIPROCESSOR - .globl _C_LABEL(lwp0) -#endif - .globl _C_LABEL(uvmspace_free),_C_LABEL(kernel_map) - .globl _C_LABEL(uvm_km_free),_C_LABEL(tss_free) -/* LINTSTUB: Func: void switch_exit(struct lwp *l, void (*exit)(struct lwp *)) */ -ENTRY(switch_exit) - movl 4(%esp),%edi # old process - movl 8(%esp),%eax # exit func -#ifndef MULTIPROCESSOR - movl $_C_LABEL(lwp0),%ebx - movl L_ADDR(%ebx),%esi - movl L_MD_TSS_SEL(%ebx),%edx -#else - movl CPUVAR(IDLE_PCB),%esi - movl CPUVAR(IDLE_TSS_SEL),%edx -#endif - /* In case we fault... */ - movl $0,CPUVAR(CURLWP) - - /* Restore the idle context. */ - cli - - /* Restore stack pointers. */ - movl PCB_ESP(%esi),%esp - movl PCB_EBP(%esi),%ebp - - /* Save exit func. */ - pushl %eax - - /* Load TSS info. */ -#ifdef MULTIPROCESSOR - movl CPUVAR(GDT),%eax -#else - /* Load TSS info. */ - movl _C_LABEL(gdt),%eax -#endif - - /* Switch address space. */ - movl PCB_CR3(%esi),%ecx - movl %ecx,%cr3 - - /* Switch TSS. */ - andl $~0x0200,4-SEL_KPL(%eax,%edx,1) - ltr %dx - - /* We're always in the kernel, so we don't need the LDT. */ - - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%esi),%ecx - movl %ecx,%cr0 - - /* Record new pcb. */ - SET_CURPCB(%esi) - - /* Interrupts are okay again. */ - sti - - /* - * Schedule the dead process's vmspace and stack to be freed. - */ - movl 0(%esp),%eax /* %eax = exit func */ - movl %edi,0(%esp) /* {lwp_}exit2(l) */ - call *%eax - addl $4,%esp - - /* Jump into cpu_switch() with the right state. */ - xorl %esi,%esi - movl %esi,CPUVAR(CURLWP) - jmp idle_start + movl %eax,%esi /* pcb_iomap */ + movl CPUVAR(TSS),%edi + leal TSS_IOMAP(%edi),%edi + rep + movsl + popl %edi + popl %esi + movl CPUVAR(TSS),%eax + movl $(IOMAP_VALIDOFF << 16),TSS_IOBASE(%eax) + jmp .Liobitmap_done +END(cpu_switchto) /* * void savectx(struct pcb *pcb); + * * Update pcb, saving current processor state. */ -/* LINTSTUB: Func: void savectx(struct pcb *pcb) */ ENTRY(savectx) - movl 4(%esp),%edx # edx = p->p_addr - - /* Save stack pointers. */ + movl 4(%esp),%edx /* edx = pcb */ movl %esp,PCB_ESP(%edx) movl %ebp,PCB_EBP(%edx) - ret +END(savectx) /* - * Old call gate entry for syscall - */ -/* LINTSTUB: Var: char Xosyscall[1]; */ -IDTVEC(osyscall) - /* Set eflags in trap frame. */ - pushfl - popl 8(%esp) - pushl $7 # size of instruction for restart - jmp syscall1 - -/* + * syscall() + * * Trap gate entry for syscall */ -/* LINTSTUB: Var: char Xsyscall[1]; */ IDTVEC(syscall) - pushl $2 # size of instruction for restart -syscall1: - pushl $T_ASTFLT # trap # for doing ASTs + pushl $2 /* size of instruction for restart */ + pushl $T_ASTFLT /* trap # for doing ASTs */ INTRENTRY - + STI(%eax) #ifdef DIAGNOSTIC movl CPUVAR(ILEVEL),%ebx testl %ebx,%ebx jz 1f pushl $5f - call _C_LABEL(printf) + call _C_LABEL(panic) addl $4,%esp #ifdef DDB int $3 #endif -1: +1: #endif /* DIAGNOSTIC */ - movl CPUVAR(CURLWP),%edx - movl %esp,L_MD_REGS(%edx) # save pointer to frame - movl L_PROC(%edx),%edx - pushl %esp - call *P_MD_SYSCALL(%edx) # get pointer to syscall() function - addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ - cli - CHECK_ASTPENDING(%eax) - je 1f - /* Always returning to user mode here. */ - CLEAR_ASTPENDING(%eax) - sti - /* Pushed T_ASTFLT into tf_trapno on entry. */ + addl $1,CPUVAR(NSYSCALL) /* count it atomically */ + adcl $0,CPUVAR(NSYSCALL)+4 /* count it atomically */ + movl CPUVAR(CURLWP),%edi + movl L_PROC(%edi),%edx + movl %esp,L_MD_REGS(%edi) /* save pointer to frame */ pushl %esp - call _C_LABEL(trap) + call *P_MD_SYSCALL(%edx) /* get pointer to syscall() function */ addl $4,%esp - jmp 2b +.Lsyscall_checkast: + /* Check for ASTs on exit to user mode. */ + CLI(%eax) + movl L_MD_ASTPENDING(%edi), %eax + orl CPUVAR(WANT_PMAPLOAD), %eax + jnz 9f +#ifdef XEN + STIC(%eax) + jz 14f + call _C_LABEL(stipending) + testl %eax,%eax + jz 14f + /* process pending interrupts */ + CLI(%eax) + movl CPUVAR(ILEVEL), %ebx + movl $.Lsyscall_resume, %esi /* address to resume loop at */ +.Lsyscall_resume: + movl %ebx,%eax /* get cpl */ + movl CPUVAR(IUNMASK)(,%eax,4),%eax + andl CPUVAR(IPENDING),%eax /* any non-masked bits left? */ + jz 17f + bsrl %eax,%eax + btrl %eax,CPUVAR(IPENDING) + movl CPUVAR(ISOURCES)(,%eax,4),%eax + jmp *IS_RESUME(%eax) +17: movl %ebx, CPUVAR(ILEVEL) /* restore cpl */ + jmp .Lsyscall_checkast +14: +#endif /* XEN */ #ifndef DIAGNOSTIC -1: INTRFASTEXIT + INTRFASTEXIT #else /* DIAGNOSTIC */ -1: cmpl $IPL_NONE,CPUVAR(ILEVEL) + cmpl $IPL_NONE,CPUVAR(ILEVEL) jne 3f INTRFASTEXIT -3: sti +3: STI(%eax) pushl $4f - call _C_LABEL(printf) + call _C_LABEL(panic) addl $4,%esp -#ifdef DDB - int $3 -#endif /* DDB */ - movl $IPL_NONE,CPUVAR(ILEVEL) - jmp 2b -4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n" -5: .asciz "WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n" + pushl $IPL_NONE + call _C_LABEL(spllower) + addl $4,%esp + jmp .Lsyscall_checkast +4: .asciz "SPL NOT LOWERED ON SYSCALL EXIT\n" +5: .asciz "SPL NOT ZERO ON SYSCALL ENTRY\n" #endif /* DIAGNOSTIC */ +9: + cmpl $0, CPUVAR(WANT_PMAPLOAD) + jz 10f + STI(%eax) + call _C_LABEL(pmap_load) + jmp .Lsyscall_checkast /* re-check ASTs */ +10: + /* Always returning to user mode here. */ + movl $0, L_MD_ASTPENDING(%edi) + STI(%eax) + /* Pushed T_ASTFLT into tf_trapno on entry. */ + pushl %esp + call _C_LABEL(trap) + addl $4,%esp + jmp .Lsyscall_checkast /* re-check ASTs */ +IDTVEC_END(syscall) -#if NNPX > 0 /* - * Special interrupt handlers. Someday intr0-intr15 will be used to count - * interrupts. We'll still need a special exception 16 handler. The busy - * latch stuff in probintr() can be moved to npxprobe(). + * int npx586bug1(int a, int b) + * Used when checking for the FDIV bug on first generations pentiums. + * Anything 120MHz or above is fine. */ - -/* LINTSTUB: Func: void probeintr(void) */ -NENTRY(probeintr) - ss - incl _C_LABEL(npx_intrs_while_probing) - pushl %eax - movb $0x20,%al # EOI (asm in strings loses cpp features) - outb %al,$0xa0 # IO_ICU2 - outb %al,$0x20 # IO_ICU1 - movb $0,%al - outb %al,$0xf0 # clear BUSY# latch - popl %eax - iret - -/* LINTSTUB: Func: void probetrap(void) */ -NENTRY(probetrap) - ss - incl _C_LABEL(npx_traps_while_probing) - fnclex - iret - -/* LINTSTUB: Func: int npx586bug1(int a, int b) */ NENTRY(npx586bug1) - fildl 4(%esp) # x - fildl 8(%esp) # y + fildl 4(%esp) /* x */ + fildl 8(%esp) /* y */ fld %st(1) - fdiv %st(1),%st # x/y - fmulp %st,%st(1) # (x/y)*y - fsubrp %st,%st(1) # x-(x/y)*y + fdiv %st(1),%st /* x/y */ + fmulp %st,%st(1) /* (x/y)*y */ + fsubrp %st,%st(1) /* x-(x/y)*y */ pushl $0 fistpl (%esp) popl %eax ret -#endif /* NNPX > 0 */ +END(npx586bug1) + +/* + * void sse2_idlezero_page(void *pg) + * + * Zero a page without polluting the cache. Preemption must be + * disabled by the caller. Abort if a preemption is pending. + */ +ENTRY(sse2_idlezero_page) + pushl %ebp + movl %esp,%ebp + movl 8(%esp), %edx + movl $(PAGE_SIZE/32), %ecx + xorl %eax, %eax + .align 16 +1: + testl $RESCHED_KPREEMPT, CPUVAR(RESCHED) + jnz 2f + movnti %eax, 0(%edx) + movnti %eax, 4(%edx) + movnti %eax, 8(%edx) + movnti %eax, 12(%edx) + movnti %eax, 16(%edx) + movnti %eax, 20(%edx) + movnti %eax, 24(%edx) + movnti %eax, 28(%edx) + addl $32, %edx + decl %ecx + jnz 1b + sfence + incl %eax + pop %ebp + ret +2: + sfence + popl %ebp + ret +END(sse2_idlezero_page) + +ENTRY(intrfastexit) + movw TF_GS(%esp),%gs + movw TF_FS(%esp),%fs + movw TF_ES(%esp),%es + movw TF_DS(%esp),%ds + movl TF_EDI(%esp),%edi + movl TF_ESI(%esp),%esi + movl TF_EBP(%esp),%ebp + movl TF_EBX(%esp),%ebx + movl TF_EDX(%esp),%edx + movl TF_ECX(%esp),%ecx + movl TF_EAX(%esp),%eax + addl $(TF_PUSHSIZE+8),%esp + iret +END(intrfastexit) +