Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v rcsdiff: /ftp/cvs/cvsroot/src/sys/arch/i386/i386/locore.S,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.44 retrieving revision 1.48.8.1 diff -u -p -r1.44 -r1.48.8.1 --- src/sys/arch/i386/i386/locore.S 2006/10/25 13:56:15 1.44 +++ src/sys/arch/i386/i386/locore.S 2007/09/03 16:47:23 1.48.8.1 @@ -1,7 +1,7 @@ -/* $NetBSD: locore.S,v 1.44 2006/10/25 13:56:15 jmmv Exp $ */ +/* $NetBSD: locore.S,v 1.48.8.1 2007/09/03 16:47:23 jmcneill Exp $ */ /*- - * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc. + * Copyright (c) 1998, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -70,15 +70,10 @@ * @(#)locore.s 7.3 (Berkeley) 5/13/91 */ -#include "opt_compat_netbsd.h" #include "opt_compat_oldboot.h" #include "opt_cputype.h" #include "opt_ddb.h" -#include "opt_ipkdb.h" -#include "opt_lockdebug.h" -#include "opt_multiprocessor.h" #include "opt_realmem.h" -#include "opt_user_ldt.h" #include "opt_vm86.h" #include "npx.h" @@ -94,62 +89,16 @@ #include #include #include - -#if NLAPIC > 0 #include -#endif - #include - -/* LINTSTUB: include */ -/* LINTSTUB: include */ -/* LINTSTUB: include */ - #include - -#if defined(MULTIPROCESSOR) - -#define SET_CURLWP(lwp,cpu) \ - movl CPUVAR(SELF),cpu ; \ - movl lwp,CPUVAR(CURLWP) ; \ - movl cpu,L_CPU(lwp) - -#else - -#define SET_CURLWP(lwp,tcpu) movl lwp,CPUVAR(CURLWP) -#define GET_CURLWP(reg) movl CPUVAR(CURLWP),reg - -#endif - -#define SET_CURPCB(reg) movl reg,CPUVAR(CURPCB) - -#define CLEAR_RESCHED(reg) movl reg,CPUVAR(RESCHED) +#include +#include /* XXX temporary kluge; these should not be here */ /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ #include - -/* Disallow old names for REALBASEMEM */ -#ifdef BIOSBASEMEM -#error BIOSBASEMEM option deprecated; use REALBASEMEM only if memory size reported by latest boot block is incorrect -#endif - -/* Disallow old names for REALEXTMEM */ -#ifdef EXTMEM_SIZE -#error EXTMEM_SIZE option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect -#endif -#ifdef BIOSEXTMEM -#error BIOSEXTMEM option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect -#endif - -#include - - -#ifdef MULTIPROCESSOR -#include -#endif - /* * Initialization */ @@ -160,9 +109,7 @@ .globl _C_LABEL(atdevbase) .globl _C_LABEL(proc0uarea),_C_LABEL(PDPpaddr) .globl _C_LABEL(gdt) -#ifdef I586_CPU .globl _C_LABEL(idt) -#endif .globl _C_LABEL(lapic_tpr) #if NLAPIC > 0 @@ -188,9 +135,7 @@ _C_LABEL(lapic_tpr): .long 0 #endif - -_C_LABEL(cpu): .long 0 # are we 386, 386sx, or 486, - # or Pentium, or.. +_C_LABEL(cpu): .long 0 # are we 80486, Pentium, or.. _C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual _C_LABEL(proc0uarea): .long 0 _C_LABEL(PDPpaddr): .long 0 # paddr of PDP, for libkvm @@ -642,19 +587,20 @@ begin: call _C_LABEL(main) /* - * void proc_trampoline(void); + * void lwp_trampoline(void); + * * This is a trampoline function pushed onto the stack of a newly created * process in order to do some additional setup. The trampoline is entered by * cpu_switch()ing to the process, so we abuse the callee-saved registers used * by cpu_switch() to store the information about the stub to call. * NOTE: This function does not have a normal calling sequence! */ -/* LINTSTUB: Func: void proc_trampoline(void) */ -NENTRY(proc_trampoline) -#ifdef MULTIPROCESSOR - call _C_LABEL(proc_trampoline_mp) -#endif - movl $IPL_NONE,CPUVAR(ILEVEL) +NENTRY(lwp_trampoline) + pushl %ebp + xorl %ebp,%ebp + pushl %eax + call _C_LABEL(lwp_startup) + addl $8,%esp pushl %ebx call *%esi addl $4,%esp @@ -662,12 +608,12 @@ NENTRY(proc_trampoline) INTRFASTEXIT /* NOTREACHED */ -/*****************************************************************************/ -#ifdef COMPAT_16 /* - * Signal trampoline; copied to top of user stack. + * sigcode() + * + * Signal trampoline; copied to top of user stack. Used only for + * compatibility with old releases of NetBSD. */ -/* LINTSTUB: Var: char sigcode[1], esigcode[1]; */ NENTRY(sigcode) /* * Handler has returned here as if we called it. The sigcontext @@ -682,23 +628,16 @@ NENTRY(sigcode) int $0x80 # exit if sigreturn fails .globl _C_LABEL(esigcode) _C_LABEL(esigcode): -#endif - -/*****************************************************************************/ - -/* - * The following is i386-specific nonsense. - */ /* * void lgdt(struct region_descriptor *rdp); + * * Load a new GDT pointer (and do any necessary cleanup). * XXX It's somewhat questionable whether reloading all the segment registers * is necessary, since the actual descriptor data is not changed except by * process creation and exit, both of which clean up via task switches. OTOH, * this only happens at run time when the GDT is resized. */ -/* LINTSTUB: Func: void lgdt(struct region_descriptor *rdp) */ NENTRY(lgdt) /* Reload the descriptor table. */ movl 4(%esp),%eax @@ -720,13 +659,22 @@ NENTRY(lgdt) pushl %eax lret -/*****************************************************************************/ - /* - * These functions are primarily used by DDB. + * void x86_flush() + * + * Flush instruction pipelines by doing an intersegment (far) return. */ +NENTRY(x86_flush) + popl %eax + pushl $GSEL(GCODE_SEL, SEL_KPL) + pushl %eax + lret -/* LINTSTUB: Func: int setjmp (label_t *l) */ +/* + * int setjmp(label_t *) + * + * Used primarily by DDB. + */ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) # save ebx @@ -736,10 +684,14 @@ ENTRY(setjmp) movl %edi,16(%eax) # save edi movl (%esp),%edx # get rta movl %edx,20(%eax) # save eip - xorl %eax,%eax # return (0); + xorl %eax,%eax # return 0 ret -/* LINTSTUB: Func: void longjmp (label_t *l) */ +/* + * int longjmp(label_t *) + * + * Used primarily by DDB. + */ ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx # restore ebx @@ -749,488 +701,128 @@ ENTRY(longjmp) movl 16(%eax),%edi # restore edi movl 20(%eax),%edx # get rta movl %edx,(%esp) # put in return frame - xorl %eax,%eax # return (1); - incl %eax + movl $1,%eax # return 1 ret -/*****************************************************************************/ - - .globl _C_LABEL(sched_whichqs),_C_LABEL(sched_qs) - .globl _C_LABEL(uvmexp),_C_LABEL(panic) - -#ifdef DIAGNOSTIC -NENTRY(switch_error) - pushl $1f - call _C_LABEL(panic) - /* NOTREACHED */ -1: .asciz "cpu_switch" -#endif /* DIAGNOSTIC */ - /* - * void cpu_switch(struct lwp *) - * Find a runnable lwp and switch to it. Wait if necessary. If the new - * lwp is the same as the old one, we short-circuit the context save and - * restore. + * struct lwp *cpu_switchto(struct lwp *oldlwp, struct newlwp) + * + * 1. if (oldlwp != NULL), save its context. + * 2. then, restore context of newlwp. * - * Note that the stack frame layout is known to "struct switchframe" - * in and to the code in cpu_fork() which initializes + * Note that the stack frame layout is known to "struct switchframe" in + * and to the code in cpu_lwp_fork() which initializes * it for a new lwp. */ -ENTRY(cpu_switch) +ENTRY(cpu_switchto) pushl %ebx pushl %esi pushl %edi -#ifdef DEBUG - cmpl $IPL_SCHED,CPUVAR(ILEVEL) - jae 1f - pushl $2f - call _C_LABEL(panic) - /* NOTREACHED */ -2: .asciz "not splsched() in cpu_switch!" -1: -#endif /* DEBUG */ - - movl 16(%esp),%esi # current - - /* - * Clear curlwp so that we don't accumulate system time while idle. - * This also insures that schedcpu() will move the old lwp to - * the correct queue if it happens to get called from the spllower() - * below and changes the priority. (See corresponding comment in - * userret()). - */ - movl $0,CPUVAR(CURLWP) - /* - * First phase: find new lwp. - * - * Registers: - * %eax - queue head, scratch, then zero - * %ebx - queue number - * %ecx - cached value of whichqs - * %edx - next lwp in queue - * %esi - old lwp - * %edi - new lwp - */ - - /* Look for new lwp. */ - cli # splhigh doesn't do a cli - movl _C_LABEL(sched_whichqs),%ecx - bsfl %ecx,%ebx # find a full q - jnz switch_dequeue - - /* - * idling: save old context. - * - * Registers: - * %eax, %ecx - scratch - * %esi - old lwp, then old pcb - * %edi - idle pcb - */ - - pushl %esi - call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) - addl $4,%esp - - movl L_ADDR(%esi),%esi - - /* Save stack pointers. */ - movl %esp,PCB_ESP(%esi) - movl %ebp,PCB_EBP(%esi) - - /* Find idle PCB for this CPU */ -#ifndef MULTIPROCESSOR - movl $_C_LABEL(lwp0),%ebx - movl L_ADDR(%ebx),%edi - movl L_MD_TSS_SEL(%ebx),%edx -#else - movl CPUVAR(IDLE_PCB),%edi - movl CPUVAR(IDLE_TSS_SEL),%edx -#endif - movl $0,CPUVAR(CURLWP) /* In case we fault... */ - - /* Restore the idle context (avoid interrupts) */ - cli - - /* Restore stack pointers. */ - movl PCB_ESP(%edi),%esp - movl PCB_EBP(%edi),%ebp - - /* Switch TSS. Reset "task busy" flag before loading. */ - movl %cr3,%eax - movl %eax,PCB_CR3(%edi) -#ifdef MULTIPROCESSOR - movl CPUVAR(GDT),%eax -#else - movl _C_LABEL(gdt),%eax -#endif - andl $~0x0200,4-SEL_KPL(%eax,%edx,1) - ltr %dx - - /* We're always in the kernel, so we don't need the LDT. */ - - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%edi),%ecx - movl %ecx,%cr0 - - /* Record new pcb. */ - SET_CURPCB(%edi) - - xorl %esi,%esi - sti -idle_unlock: -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_unlock_idle) -#endif - /* Interrupts are okay again. */ - pushl $IPL_NONE # spl0() - call _C_LABEL(Xspllower) # process pending interrupts - addl $4,%esp - jmp idle_start -idle_zero: - sti - call _C_LABEL(uvm_pageidlezero) - cli - cmpl $0,_C_LABEL(sched_whichqs) - jnz idle_exit -idle_loop: - /* Try to zero some pages. */ - movl _C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO,%ecx - testl %ecx,%ecx - jnz idle_zero - sti - hlt -NENTRY(mpidle) -idle_start: - cli - cmpl $0,_C_LABEL(sched_whichqs) - jz idle_loop -idle_exit: - movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh - sti -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_lock_idle) -#endif - movl _C_LABEL(sched_whichqs),%ecx - bsfl %ecx,%ebx - jz idle_unlock - -switch_dequeue: - /* - * we're running at splhigh(), but it's otherwise okay to take - * interrupts here. - */ - sti - leal _C_LABEL(sched_qs)(,%ebx,8),%eax # select q - - movl L_FORW(%eax),%edi # unlink from front of process q -#ifdef DIAGNOSTIC - cmpl %edi,%eax # linked to self (i.e. nothing queued)? - je _C_LABEL(switch_error) # not possible -#endif /* DIAGNOSTIC */ - movl L_FORW(%edi),%edx - movl %edx,L_FORW(%eax) - movl %eax,L_BACK(%edx) - - cmpl %edx,%eax # q empty? - jne 3f - - btrl %ebx,%ecx # yes, clear to indicate empty - movl %ecx,_C_LABEL(sched_whichqs) # update q status - -3: /* We just did it. */ - xorl %eax,%eax - CLEAR_RESCHED(%eax) - -switch_resume: -#ifdef DIAGNOSTIC - cmpl %eax,L_WCHAN(%edi) # Waiting for something? - jne _C_LABEL(switch_error) # Yes; shouldn't be queued. - cmpb $LSRUN,L_STAT(%edi) # In run state? - jne _C_LABEL(switch_error) # No; shouldn't be queued. -#endif /* DIAGNOSTIC */ - - /* Isolate lwp. XXX Is this necessary? */ - movl %eax,L_BACK(%edi) - - /* Record new lwp. */ - movb $LSONPROC,L_STAT(%edi) # l->l_stat = LSONPROC - SET_CURLWP(%edi,%ecx) - - /* Skip context switch if same lwp. */ - xorl %ebx,%ebx - cmpl %edi,%esi - je switch_return - - /* If old lwp exited, don't bother. */ + movl 16(%esp),%esi # oldlwp + movl 20(%esp),%edi # newlwp testl %esi,%esi - jz switch_exited - - /* - * Second phase: save old context. - * - * Registers: - * %eax, %ecx - scratch - * %esi - old lwp, then old pcb - * %edi - new lwp - */ - - pushl %esi - call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) - addl $4,%esp - - movl L_ADDR(%esi),%esi - - /* Save stack pointers. */ - movl %esp,PCB_ESP(%esi) - movl %ebp,PCB_EBP(%esi) - -switch_exited: - /* - * Third phase: restore saved context. - * - * Registers: - * %eax, %ebx, %ecx, %edx - scratch - * %esi - new pcb - * %edi - new lwp - */ - - /* No interrupts while loading new state. */ - cli - movl L_ADDR(%edi),%esi + jz 1f - /* Restore stack pointers. */ - movl PCB_ESP(%esi),%esp - movl PCB_EBP(%esi),%ebp + /* Save old context. */ + movl L_ADDR(%esi),%eax + movl %esp,PCB_ESP(%eax) + movl %ebp,PCB_EBP(%eax) + + /* Switch to newlwp's stack. */ +1: movl L_ADDR(%edi),%ebx + movl PCB_EBP(%ebx),%ebp + movl PCB_ESP(%ebx),%esp -#if 0 - /* Don't bother with the rest if switching to a system process. */ - testl $P_SYSTEM,L_FLAG(%edi); XXX NJWLWP lwp's don't have P_SYSTEM! - jnz switch_restored -#endif - - /* Switch TSS. Reset "task busy" flag before loading. */ + /* Switch TSS. Reset "task busy" flag before loading. */ movl %cr3,%eax - movl %eax,PCB_CR3(%esi) /* XXX should be done by pmap_activate? */ -#ifdef MULTIPROCESSOR + movl %eax,PCB_CR3(%ebx) # for TSS gates movl CPUVAR(GDT),%eax -#else - /* Load TSS info. */ - movl _C_LABEL(gdt),%eax -#endif movl L_MD_TSS_SEL(%edi),%edx - andl $~0x0200,4(%eax,%edx, 1) ltr %dx - pushl %edi - call _C_LABEL(pmap_activate) # pmap_activate(p) - addl $4,%esp + /* Set curlwp. */ + movl %edi,CPUVAR(CURLWP) + + /* Don't bother with the rest if switching to a system process. */ + testl $LW_SYSTEM,L_FLAG(%edi) + jnz 4f + + /* Is this process using RAS (restartable atomic sequences)? */ + movl L_PROC(%edi),%eax + cmpl $0,P_RASLIST(%eax) + jne 5f + + /* + * Restore cr0 (including FPU state). Raise the IPL to IPL_IPI. + * FPU IPIs can alter the LWP's saved cr0. Dropping the priority + * is deferred until mi_switch(), when cpu_switchto() returns. + */ +2: movl $IPL_IPI,CPUVAR(ILEVEL) + movl PCB_CR0(%ebx),%ecx + movl %cr0,%edx -#if 0 -switch_restored: -#endif - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%esi),%ecx -#ifdef MULTIPROCESSOR /* * If our floating point registers are on a different CPU, - * clear CR0_TS so we'll trap rather than reuse bogus state. + * set CR0_TS so we'll trap rather than reuse bogus state. */ - movl PCB_FPCPU(%esi),%ebx - cmpl CPUVAR(SELF),%ebx - jz 1f + movl PCB_FPCPU(%ebx),%eax + cmpl CPUVAR(SELF),%eax + je 3f orl $CR0_TS,%ecx -1: -#endif - movl %ecx,%cr0 - /* Record new pcb. */ - SET_CURPCB(%esi) - - /* Interrupts are okay again. */ - sti - -/* - * Check for restartable atomic sequences (RAS) - */ - movl CPUVAR(CURLWP),%edi - movl L_PROC(%edi),%esi - cmpl $0,P_RASLIST(%esi) - jne 2f -1: - movl $1,%ebx - -switch_return: -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_unlock_idle) -#endif - cmpl $0,CPUVAR(IPENDING) - jz 3f - pushl $IPL_NONE # spl0() - call _C_LABEL(Xspllower) # process pending interrupts - addl $4,%esp -3: - movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh() - - movl %ebx,%eax + /* Reloading CR0 is very expensive - avoid if possible. */ +3: cmpl %edx,%ecx + je 4f + movl %ecx,%cr0 + /* Return to the new LWP, returning 'oldlwp' in %eax. */ +4: movl %esi,%eax popl %edi popl %esi popl %ebx ret -2: # check RAS list - movl L_MD_REGS(%edi),%ebx - movl TF_EIP(%ebx),%eax + /* Check for restartable atomic sequences (RAS). */ +5: movl L_MD_REGS(%edi),%ecx + pushl TF_EIP(%ecx) pushl %eax - pushl %esi call _C_LABEL(ras_lookup) addl $8,%esp cmpl $-1,%eax - je 1b - movl %eax,TF_EIP(%ebx) - jmp 1b - -/* - * void cpu_switchto(struct lwp *current, struct lwp *next) - * Switch to the specified next LWP. - */ -ENTRY(cpu_switchto) - pushl %ebx - pushl %esi - pushl %edi - -#ifdef DEBUG - cmpl $IPL_SCHED,CPUVAR(ILEVEL) - jae 1f - pushl $2f - call _C_LABEL(panic) - /* NOTREACHED */ -2: .asciz "not splsched() in cpu_switchto!" -1: -#endif /* DEBUG */ - - movl 16(%esp),%esi # current - movl 20(%esp),%edi # next - - /* - * Clear curlwp so that we don't accumulate system time while idle. - * This also insures that schedcpu() will move the old process to - * the correct queue if it happens to get called from the spllower() - * below and changes the priority. (See corresponding comment in - * usrret()). - * - * XXX Is this necessary? We know we won't go idle. - */ - movl $0,CPUVAR(CURLWP) - - /* - * We're running at splhigh(), but it's otherwise okay to take - * interrupts here. - */ - sti - - /* Jump into the middle of cpu_switch */ - xorl %eax,%eax - jmp switch_resume - -/* - * void cpu_exit(struct lwp *l) - * Switch to the appropriate idle context (lwp0's if uniprocessor; the CPU's - * if multiprocessor) and deallocate the address space and kernel stack for p. - * Then jump into cpu_switch(), as if we were in the idle proc all along. - */ -#ifndef MULTIPROCESSOR - .globl _C_LABEL(lwp0) -#endif -/* LINTSTUB: Func: void cpu_exit(struct lwp *l) */ -ENTRY(cpu_exit) - movl 4(%esp),%edi # old process -#ifndef MULTIPROCESSOR - movl $_C_LABEL(lwp0),%ebx - movl L_ADDR(%ebx),%esi - movl L_MD_TSS_SEL(%ebx),%edx -#else - movl CPUVAR(IDLE_PCB),%esi - movl CPUVAR(IDLE_TSS_SEL),%edx -#endif - /* In case we fault... */ - movl $0,CPUVAR(CURLWP) - - /* Restore the idle context. */ - cli - - /* Restore stack pointers. */ - movl PCB_ESP(%esi),%esp - movl PCB_EBP(%esi),%ebp - - /* Switch TSS. Reset "task busy" flag before loading. */ - movl %cr3,%eax - movl %eax,PCB_CR3(%esi) -#ifdef MULTIPROCESSOR - movl CPUVAR(GDT),%eax -#else - /* Load TSS info. */ - movl _C_LABEL(gdt),%eax -#endif - - andl $~0x0200,4-SEL_KPL(%eax,%edx,1) - ltr %dx - - /* We're always in the kernel, so we don't need the LDT. */ - - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%esi),%ecx - movl %ecx,%cr0 - - /* Record new pcb. */ - SET_CURPCB(%esi) - - /* Interrupts are okay again. */ - sti - - /* - * Schedule the dead LWP's stack to be freed. - */ - pushl %edi - call _C_LABEL(lwp_exit2) - addl $4,%esp - - /* Jump into cpu_switch() with the right state. */ - xorl %esi,%esi - movl %esi,CPUVAR(CURLWP) - jmp idle_start + je 2b + movl L_MD_REGS(%edi),%ecx + movl %eax,TF_EIP(%ecx) + jmp 2b /* * void savectx(struct pcb *pcb); + * * Update pcb, saving current processor state. */ -/* LINTSTUB: Func: void savectx(struct pcb *pcb) */ ENTRY(savectx) - movl 4(%esp),%edx # edx = p->p_addr - - /* Save stack pointers. */ + movl 4(%esp),%edx # edx = pcb movl %esp,PCB_ESP(%edx) movl %ebp,PCB_EBP(%edx) - ret /* + * osyscall() + * * Old call gate entry for syscall */ -/* LINTSTUB: Var: char Xosyscall[1]; */ IDTVEC(osyscall) - /* Set eflags in trap frame. */ - pushfl + pushfl # set eflags in trap frame popl 8(%esp) pushl $7 # size of instruction for restart jmp syscall1 /* + * syscall() + * * Trap gate entry for syscall */ -/* LINTSTUB: Var: char Xsyscall[1]; */ IDTVEC(syscall) pushl $2 # size of instruction for restart syscall1: @@ -1306,7 +898,9 @@ syscall1: * latch stuff in probintr() can be moved to npxprobe(). */ -/* LINTSTUB: Func: void probeintr(void) */ +/* + * void probeintr(void) + */ NENTRY(probeintr) ss incl _C_LABEL(npx_intrs_while_probing) @@ -1319,14 +913,18 @@ NENTRY(probeintr) popl %eax iret -/* LINTSTUB: Func: void probetrap(void) */ +/* + * void probetrap(void) + */ NENTRY(probetrap) ss incl _C_LABEL(npx_traps_while_probing) fnclex iret -/* LINTSTUB: Func: int npx586bug1(int a, int b) */ +/* + * int npx586bug1(int a, int b) + */ NENTRY(npx586bug1) fildl 4(%esp) # x fildl 8(%esp) # y @@ -1339,3 +937,65 @@ NENTRY(npx586bug1) popl %eax ret #endif /* NNPX > 0 */ + +/* + * void sse2_zero_page(void *pg) + * + * Zero a page without polluting the cache. + */ +ENTRY(sse2_zero_page) + pushl %ebp + movl %esp,%ebp + movl 8(%esp), %edx + movl $PAGE_SIZE, %ecx + xorl %eax, %eax + .align 16 +1: + movnti %eax, 0(%edx) + movnti %eax, 4(%edx) + movnti %eax, 8(%edx) + movnti %eax, 12(%edx) + movnti %eax, 16(%edx) + movnti %eax, 20(%edx) + movnti %eax, 24(%edx) + movnti %eax, 28(%edx) + subl $32, %ecx + leal 32(%edx), %edx + jnz 1b + sfence + pop %ebp + ret + +/* + * void sse2_copy_page(void *src, void *dst) + * + * Copy a page without polluting the cache. + */ +ENTRY(sse2_copy_page) + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp), %esi + movl 24(%esp), %edi + movl $PAGE_SIZE, %ebp + .align 16 +1: + movl 0(%esi), %eax + movl 4(%esi), %ebx + movl 8(%esi), %ecx + movl 12(%esi), %edx + movnti %eax, 0(%edi) + movnti %ebx, 4(%edi) + movnti %ecx, 8(%edi) + movnti %edx, 12(%edi) + subl $16, %ebp + leal 16(%esi), %esi + leal 16(%edi), %edi + jnz 1b + sfence + popl %edi + popl %esi + popl %ebx + popl %ebp + ret