Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v rcsdiff: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.51.2.2 retrieving revision 1.66.6.2 diff -u -p -r1.51.2.2 -r1.66.6.2 --- src/sys/arch/i386/i386/locore.S 2007/11/13 15:58:33 1.51.2.2 +++ src/sys/arch/i386/i386/locore.S 2008/06/02 13:22:15 1.66.6.2 @@ -1,10 +1,40 @@ -/* $NetBSD: locore.S,v 1.51.2.2 2007/11/13 15:58:33 bouyer Exp $ */ +/* $NetBSD: locore.S,v 1.66.6.2 2008/06/02 13:22:15 mjf Exp $ */ /* * Copyright-o-rama! */ /* + * Copyright (c) 2006 Manuel Bouyer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Manuel Bouyer. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* * Copyright (c) 2001 Wasabi Systems, Inc. * All rights reserved. * @@ -55,13 +85,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED @@ -110,11 +133,14 @@ * @(#)locore.s 7.3 (Berkeley) 5/13/91 */ +#include +__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.66.6.2 2008/06/02 13:22:15 mjf Exp $"); + #include "opt_compat_oldboot.h" -#include "opt_cputype.h" #include "opt_ddb.h" #include "opt_realmem.h" #include "opt_vm86.h" +#include "opt_xen.h" #include "npx.h" #include "assym.h" @@ -130,22 +156,57 @@ #include #include #include -#include -#include #include #include +#ifndef XEN +#include +#endif /* XXX temporary kluge; these should not be here */ /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ #include +#ifdef XEN +/* + * Xen guest identifier and loader selection + */ +.section __xen_guest +#ifdef XEN3 + .ascii "GUEST_OS=netbsd,GUEST_VER=3.0,XEN_VER=xen-3.0" +#if defined(DOM0OPS) || !defined(XEN_COMPAT_030001) + .ascii ",VIRT_BASE=0xc0000000" /* KERNBASE */ + .ascii ",ELF_PADDR_OFFSET=0xc0000000" /* KERNBASE */ +#else + .ascii ",VIRT_BASE=0xc0100000" /* KERNTEXTOFF */ + .ascii ",ELF_PADDR_OFFSET=0xc0100000" /* KERNTEXTOFF */ +#endif + .ascii ",VIRT_ENTRY=0xc0100000" /* KERNTEXTOFF */ +#if !defined(XEN_COMPAT_030001) + .ascii ",HYPERCALL_PAGE=0x00000101" + /* (???+HYPERCALL_PAGE_OFFSET)/PAGE_SIZE) */ +#endif +#ifdef PAE + .ascii ",PAE=yes[extended-cr3]" +#endif +#else /* !XEN3 */ + .ascii "GUEST_OS=netbsd,GUEST_VER=2.0,XEN_VER=2.0" +#endif /* XEN3 */ + .ascii ",LOADER=generic" +#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE) + .ascii ",BSD_SYMTAB=yes" +#endif + .byte 0 +#endif + /* * Initialization */ .data .globl _C_LABEL(cpu) + .globl _C_LABEL(cpuid_level) .globl _C_LABEL(esym) + .globl _C_LABEL(eblob) .globl _C_LABEL(atdevbase) .globl _C_LABEL(proc0uarea),_C_LABEL(PDPpaddr) .globl _C_LABEL(gdt) @@ -176,6 +237,7 @@ _C_LABEL(lapic_tpr): #endif _C_LABEL(cpu): .long 0 # are we 80486, Pentium, or.. +_C_LABEL(cpuid_level): .long 0 _C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual _C_LABEL(proc0uarea): .long 0 _C_LABEL(PDPpaddr): .long 0 # paddr of PDP, for libkvm @@ -184,8 +246,11 @@ _C_LABEL(tablesize): .long 0 .space 512 tmpstk: - +#ifndef XEN #define _RELOC(x) ((x) - KERNBASE) +#else +#define _RELOC(x) ((x)) +#endif /* XEN */ #define RELOC(x) _RELOC(_C_LABEL(x)) .text @@ -193,6 +258,7 @@ tmpstk: .set _C_LABEL(kernel_text),KERNTEXTOFF .globl start +#ifndef XEN start: movw $0x1234,0x472 # warm boot #if defined(MULTIBOOT) @@ -245,7 +311,7 @@ _C_LABEL(Multiboot_Header): movw %ax,%fs movw %ax,%gs decl %eax - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL + movl %eax,RELOC(cpuid_level) /* Find out our CPU type. */ @@ -405,7 +471,7 @@ trycyrix486: try586: /* Use the `cpuid' instruction. */ xorl %eax,%eax cpuid - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL + movl %eax,RELOC(cpuid_level) 2: /* @@ -425,8 +491,8 @@ try586: /* Use the `cpuid' instruction. /* * Virtual address space of kernel: * - * text | data | bss | [syms] | page dir | proc0 kstack | L1 ptp - * 0 1 2 3 + * text | data | bss | [syms] | [blobs] | page dir | proc0 kstack | L1 ptp + * 0 1 2 3 */ #define PROC0_PDIR_OFF 0 @@ -449,6 +515,7 @@ try586: /* Use the `cpuid' instruction. /* Find end of kernel image. */ movl $RELOC(end),%edi + #if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE) /* Save the symbols (if loaded). */ movl RELOC(esym),%eax @@ -459,8 +526,15 @@ try586: /* Use the `cpuid' instruction. 1: #endif + /* Skip over any modules/blobs. */ + movl RELOC(eblob),%eax + testl %eax,%eax + jz 1f + subl $KERNBASE,%eax + movl %eax,%edi +1: /* Compute sizes */ - movl %edi,%esi # edi = esym ? esym : end + movl %edi,%esi addl $PGOFSET,%esi # page align up andl $~PGOFSET,%esi @@ -644,6 +718,98 @@ begin: #endif /* SAFARI_FIFO_HACK */ call _C_LABEL(main) +#else /* XEN */ +start: + /* First, reset the PSL. */ + pushl $PSL_MBO + popfl + + cld +#ifdef XEN3 + movl %esp, %ebx # save start of available space +#else + movl %esi,%ebx # save start_info pointer +#endif + movl $_RELOC(tmpstk),%esp # bootstrap stack end location + + /* Clear BSS first so that there are no surprises... */ + xorl %eax,%eax + movl $RELOC(__bss_start),%edi + movl $RELOC(_end),%ecx + subl %edi,%ecx + rep stosb + + /* Copy the necessary stuff from start_info structure. */ + /* We need to copy shared_info early, so that sti/cli work */ + movl $RELOC(start_info_union),%edi + movl $128,%ecx + rep movsl + + /* Clear segment registers; always null in proc0. */ + xorl %eax,%eax + movw %ax,%fs + movw %ax,%gs + decl %eax + movl %eax,RELOC(cpuid_level) + + xorl %eax,%eax + cpuid + movl %eax,RELOC(cpuid_level) + + call xen_pmap_bootstrap + /* + * First avail returned by xen_pmap_bootstrap in %eax + */ + movl %eax, %esi; + movl %esi, _C_LABEL(proc0uarea) + +#define PROC0PDIR ((0) * PAGE_SIZE) +#define PROC0STACK ((1) * PAGE_SIZE) + + /* Set up bootstrap stack. */ + leal (KSTACK_SIZE-FRAMESIZE)(%eax),%esp + xorl %ebp,%ebp # mark end of frames + + addl $USPACE, %esi + subl $KERNBASE, %esi #init386 want a physical address + pushl %esi + call _C_LABEL(init386) # wire 386 chip for unix operation + addl $4,%esp + call _C_LABEL(main) + +#if defined(XEN3) && !defined(XEN_COMPAT_030001) +/* space for the hypercall call page */ +#define HYPERCALL_PAGE_OFFSET 0x1000 +.org HYPERCALL_PAGE_OFFSET +ENTRY(hypercall_page) +.skip 0x1000 +#endif /* defined(XEN3) && !defined(XEN_COMPAT_030001) */ + +/* + * void lgdt_finish(void); + * Finish load a new GDT pointer (do any necessary cleanup). + * XXX It's somewhat questionable whether reloading all the segment registers + * is necessary, since the actual descriptor data is not changed except by + * process creation and exit, both of which clean up via task switches. OTOH, + * this only happens at run time when the GDT is resized. + */ +/* LINTSTUB: Func: void lgdt_finish(void) */ +NENTRY(lgdt_finish) + movl $GSEL(GDATA_SEL, SEL_KPL),%eax + movw %ax,%ds + movw %ax,%es + movw %ax,%gs + movw %ax,%ss + movl $GSEL(GCPU_SEL, SEL_KPL),%eax + movw %ax,%fs + /* Reload code selector by doing intersegment return. */ + popl %eax + pushl $GSEL(GCODE_SEL, SEL_KPL) + pushl %eax + lret +END(lgdt_finish) + +#endif /* XEN */ /* * void lwp_trampoline(void); @@ -655,17 +821,18 @@ begin: * NOTE: This function does not have a normal calling sequence! */ NENTRY(lwp_trampoline) - pushl %ebp + movl %ebp,%edi /* for .Lsyscall_checkast */ xorl %ebp,%ebp + pushl %edi pushl %eax call _C_LABEL(lwp_startup) addl $8,%esp pushl %ebx call *%esi addl $4,%esp - DO_DEFERRED_SWITCH - INTRFASTEXIT + jmp .Lsyscall_checkast /* NOTREACHED */ +END(lwp_trampoline) /* * sigcode() @@ -687,6 +854,7 @@ NENTRY(sigcode) int $0x80 # exit if sigreturn fails .globl _C_LABEL(esigcode) _C_LABEL(esigcode): +END(sigcode) /* * int setjmp(label_t *) @@ -704,6 +872,7 @@ ENTRY(setjmp) movl %edx,20(%eax) # save eip xorl %eax,%eax # return 0 ret +END(setjmp) /* * int longjmp(label_t *) @@ -721,9 +890,27 @@ ENTRY(longjmp) movl %edx,(%esp) # put in return frame movl $1,%eax # return 1 ret +END(longjmp) /* - * struct lwp *cpu_switchto(struct lwp *oldlwp, struct newlwp) + * void dumpsys(void) + * + * Mimic cpu_switchto() for postmortem debugging. + */ +ENTRY(dumpsys) + pushl %ebx # set up fake switchframe + pushl %esi # and save context + pushl %edi + movl %esp,_C_LABEL(dumppcb)+PCB_ESP + movl %ebp,_C_LABEL(dumppcb)+PCB_EBP + call _C_LABEL(dodumpsys) # dump! + addl $(3*4), %esp # unwind switchframe + ret +END(dumpsys) + +/* + * struct lwp *cpu_switchto(struct lwp *oldlwp, struct newlwp, + * bool returning) * * 1. if (oldlwp != NULL), save its context. * 2. then, restore context of newlwp. @@ -737,8 +924,20 @@ ENTRY(cpu_switchto) pushl %esi pushl %edi +#if defined(DIAGNOSTIC) && !defined(XEN) + cmpl $IPL_SCHED,CPUVAR(ILEVEL) + jbe 0f + pushl CPUVAR(ILEVEL) + pushl $.Lstr + call _C_LABEL(panic) + addl $8,%esp +.Lstr: .string "cpu_switchto: switching above IPL_SCHED (%d)\0" +0: +#endif + movl 16(%esp),%esi # oldlwp movl 20(%esp),%edi # newlwp + movl 24(%esp),%edx # returning testl %esi,%esi jz 1f @@ -752,27 +951,32 @@ ENTRY(cpu_switchto) movl PCB_EBP(%ebx),%ebp movl PCB_ESP(%ebx),%esp - /* Switch TSS. Reset "task busy" flag before loading. */ - movl %cr3,%eax - movl %eax,PCB_CR3(%ebx) # for TSS gates - movl CPUVAR(GDT),%ecx - movl L_MD_TSS_SEL(%edi),%edx - andl $~0x0200,4(%ecx,%edx, 1) - ltr %dx + /* + * Set curlwp. This must be globally visible in order to permit + * non-interlocked mutex release. + */ + movl %edi,%ecx + xchgl %ecx,CPUVAR(CURLWP) - /* Set curlwp. */ - movl %edi,CPUVAR(CURLWP) + /* Skip the rest if returning to a pinned LWP. */ + testl %edx,%edx + jnz 4f + +#ifdef XEN + pushl %edi + call _C_LABEL(i386_switch_context) + addl $4,%esp +#else /* XEN */ + /* Switch ring0 esp */ + movl PCB_ESP0(%ebx),%eax + movl %eax,CPUVAR(ESP0) /* Don't bother with the rest if switching to a system process. */ testl $LW_SYSTEM,L_FLAG(%edi) jnz 4f - /* Is this process using RAS (restartable atomic sequences)? */ - movl L_PROC(%edi),%eax - cmpl $0,P_RASLIST(%eax) - jne 5f - /* Restore thread-private %fs/%gs descriptors. */ + movl CPUVAR(GDT),%ecx movl PCB_FSD(%ebx), %eax movl PCB_FSD+4(%ebx), %edx movl %eax, (GUFS_SEL*8)(%ecx) @@ -781,13 +985,28 @@ ENTRY(cpu_switchto) movl PCB_GSD+4(%ebx), %edx movl %eax, (GUGS_SEL*8)(%ecx) movl %edx, (GUGS_SEL*8+4)(%ecx) +#endif /* XEN */ + + /* Switch I/O bitmap */ + movl PCB_IOMAP(%ebx),%eax + orl %eax,%eax + jnz,pn .Lcopy_iobitmap + movl $(IOMAP_INVALOFF << 16),CPUVAR(IOBASE) +.Liobitmap_done: + + /* Is this process using RAS (restartable atomic sequences)? */ + movl L_PROC(%edi),%eax + cmpl $0,P_RASLIST(%eax) + jne 5f /* * Restore cr0 (including FPU state). Raise the IPL to IPL_IPI. * FPU IPIs can alter the LWP's saved cr0. Dropping the priority * is deferred until mi_switch(), when cpu_switchto() returns. */ -2: movl $IPL_IPI,CPUVAR(ILEVEL) +2: +#ifndef XEN + movl $IPL_IPI,CPUVAR(ILEVEL) movl PCB_CR0(%ebx),%ecx movl %cr0,%edx @@ -804,6 +1023,7 @@ ENTRY(cpu_switchto) 3: cmpl %edx,%ecx je 4f movl %ecx,%cr0 +#endif /* XEN */ /* Return to the new LWP, returning 'oldlwp' in %eax. */ 4: movl %esi,%eax @@ -824,6 +1044,22 @@ ENTRY(cpu_switchto) movl %eax,TF_EIP(%ecx) jmp 2b +.Lcopy_iobitmap: + /* Copy I/O bitmap. */ + movl $(IOMAPSIZE/4),%ecx + pushl %esi + pushl %edi + movl %eax,%esi /* pcb_iomap */ + movl CPUVAR(SELF),%edi + leal CPU_INFO_IOMAP(%edi),%edi + rep + movsl + popl %edi + popl %esi + movl $((CPU_INFO_IOMAP - CPU_INFO_TSS) << 16),CPUVAR(IOBASE) + jmp .Liobitmap_done +END(cpu_switchto) + /* * void savectx(struct pcb *pcb); * @@ -834,6 +1070,7 @@ ENTRY(savectx) movl %esp,PCB_ESP(%edx) movl %ebp,PCB_EBP(%edx) ret +END(savectx) /* * osyscall() @@ -845,6 +1082,7 @@ IDTVEC(osyscall) popl 8(%esp) pushl $7 # size of instruction for restart jmp syscall1 +IDTVEC_END(osyscall) /* * syscall() @@ -856,14 +1094,7 @@ IDTVEC(syscall) syscall1: pushl $T_ASTFLT # trap # for doing ASTs INTRENTRY - #ifdef DIAGNOSTIC - cmpl $0, CPUVAR(WANT_PMAPLOAD) - jz 1f - pushl $6f - call _C_LABEL(printf) - addl $4, %esp -1: movl CPUVAR(ILEVEL),%ebx testl %ebx,%ebx jz 1f @@ -875,49 +1106,75 @@ syscall1: #endif 1: #endif /* DIAGNOSTIC */ - movl CPUVAR(CURLWP),%edx - movl %esp,L_MD_REGS(%edx) # save pointer to frame - movl L_PROC(%edx),%edx + incl CPUVAR(NSYSCALL) # count it atomically + movl CPUVAR(CURLWP),%edi + movl L_PROC(%edi),%edx + movl %esp,L_MD_REGS(%edi) # save pointer to frame pushl %esp call *P_MD_SYSCALL(%edx) # get pointer to syscall() function addl $4,%esp .Lsyscall_checkast: /* Check for ASTs on exit to user mode. */ - cli - CHECK_ASTPENDING(%eax) - je 1f - /* Always returning to user mode here. */ - CLEAR_ASTPENDING(%eax) - sti - /* Pushed T_ASTFLT into tf_trapno on entry. */ - pushl %esp - call _C_LABEL(trap) - addl $4,%esp - jmp .Lsyscall_checkast /* re-check ASTs */ -1: CHECK_DEFERRED_SWITCH + CLI(%eax) + movl L_MD_ASTPENDING(%edi), %eax + orl CPUVAR(WANT_PMAPLOAD), %eax jnz 9f +#ifdef XEN + STIC(%eax) + jz 14f + call _C_LABEL(stipending) + testl %eax,%eax + jz 14f + /* process pending interrupts */ + CLI(%eax) + movl CPUVAR(ILEVEL), %ebx + movl $.Lsyscall_resume, %esi # address to resume loop at +.Lsyscall_resume: + movl %ebx,%eax # get cpl + movl CPUVAR(IUNMASK)(,%eax,4),%eax + andl CPUVAR(IPENDING),%eax # any non-masked bits left? + jz 17f + bsrl %eax,%eax + btrl %eax,CPUVAR(IPENDING) + movl CPUVAR(ISOURCES)(,%eax,4),%eax + jmp *IS_RESUME(%eax) +17: movl %ebx, CPUVAR(ILEVEL) #restore cpl + jmp .Lsyscall_checkast +14: +#endif /* XEN */ #ifndef DIAGNOSTIC INTRFASTEXIT #else /* DIAGNOSTIC */ cmpl $IPL_NONE,CPUVAR(ILEVEL) jne 3f INTRFASTEXIT -3: sti +3: STI(%eax) pushl $4f call _C_LABEL(printf) addl $4,%esp -#ifdef DDB - int $3 -#endif /* DDB */ - movl $IPL_NONE,CPUVAR(ILEVEL) + pushl $IPL_NONE + call _C_LABEL(spllower) + addl $4,%esp jmp .Lsyscall_checkast 4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n" 5: .asciz "WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n" -6: .asciz "WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n" #endif /* DIAGNOSTIC */ -9: sti +9: + cmpl $0, CPUVAR(WANT_PMAPLOAD) + jz 10f + STI(%eax) call _C_LABEL(pmap_load) jmp .Lsyscall_checkast /* re-check ASTs */ +10: + /* Always returning to user mode here. */ + movl $0, L_MD_ASTPENDING(%edi) + STI(%eax) + /* Pushed T_ASTFLT into tf_trapno on entry. */ + pushl %esp + call _C_LABEL(trap) + addl $4,%esp + jmp .Lsyscall_checkast /* re-check ASTs */ +IDTVEC_END(syscall) #if NNPX > 0 /* @@ -940,6 +1197,7 @@ NENTRY(probeintr) outb %al,$0xf0 # clear BUSY# latch popl %eax iret +END(probeintr) /* * void probetrap(void) @@ -949,6 +1207,7 @@ NENTRY(probetrap) incl _C_LABEL(npx_traps_while_probing) fnclex iret +END(probetrap) /* * int npx586bug1(int a, int b) @@ -964,6 +1223,7 @@ NENTRY(npx586bug1) fistpl (%esp) popl %eax ret +END(npx586bug1) #endif /* NNPX > 0 */ /* @@ -975,7 +1235,7 @@ ENTRY(sse2_zero_page) pushl %ebp movl %esp,%ebp movl 8(%esp), %edx - movl $PAGE_SIZE, %ecx + movl $(PAGE_SIZE/64), %ecx xorl %eax, %eax .align 16 1: @@ -987,12 +1247,21 @@ ENTRY(sse2_zero_page) movnti %eax, 20(%edx) movnti %eax, 24(%edx) movnti %eax, 28(%edx) - subl $32, %ecx - leal 32(%edx), %edx + movnti %eax, 32(%edx) + movnti %eax, 36(%edx) + movnti %eax, 40(%edx) + movnti %eax, 44(%edx) + movnti %eax, 48(%edx) + movnti %eax, 52(%edx) + movnti %eax, 56(%edx) + movnti %eax, 60(%edx) + addl $64, %edx + decl %ecx jnz 1b sfence pop %ebp ret +END(sse2_zero_page) /* * void sse2_copy_page(void *src, void *dst) @@ -1006,24 +1275,32 @@ ENTRY(sse2_copy_page) pushl %edi movl 20(%esp), %esi movl 24(%esp), %edi - movl $PAGE_SIZE, %ebp + xorl %ebp, %ebp .align 16 1: - movl 0(%esi), %eax - movl 4(%esi), %ebx - movl 8(%esi), %ecx - movl 12(%esi), %edx - movnti %eax, 0(%edi) - movnti %ebx, 4(%edi) - movnti %ecx, 8(%edi) - movnti %edx, 12(%edi) - subl $16, %ebp - leal 16(%esi), %esi - leal 16(%edi), %edi - jnz 1b + movl 0(%esi,%ebp), %eax + movl 4(%esi,%ebp), %ebx + movl 8(%esi,%ebp), %ecx + movl 12(%esi,%ebp), %edx + movnti %eax, 0(%edi,%ebp) + movnti %ebx, 4(%edi,%ebp) + movnti %ecx, 8(%edi,%ebp) + movnti %edx, 12(%edi,%ebp) + movl 16(%esi,%ebp), %eax + movl 20(%esi,%ebp), %ebx + movl 24(%esi,%ebp), %ecx + movl 28(%esi,%ebp), %edx + movnti %eax, 16(%edi,%ebp) + movnti %ebx, 20(%edi,%ebp) + movnti %ecx, 24(%edi,%ebp) + movnti %edx, 28(%edi,%ebp) + addl $32, %ebp + cmpl $PAGE_SIZE, %ebp + jne 1b sfence popl %edi popl %esi popl %ebx popl %ebp ret +END(sse2_copy_page)