version 1.11.10.4, 2010/08/11 22:52:57 |
version 1.12, 2008/04/28 20:23:40 |
|
|
/* $NetBSD$ */ |
/* $NetBSD$ */ |
|
|
/*- |
/*- |
* Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. |
* Copyright (c) 2007 The NetBSD Foundation, Inc. |
* All rights reserved. |
* All rights reserved. |
* |
* |
* This code is derived from software contributed to The NetBSD Foundation |
* This code is derived from software contributed to The NetBSD Foundation |
|
|
__KERNEL_RCSID(0, "$NetBSD$"); |
__KERNEL_RCSID(0, "$NetBSD$"); |
|
|
#include "opt_lockdebug.h" |
#include "opt_lockdebug.h" |
#ifdef i386 |
|
#include "opt_spldebug.h" |
|
#endif |
|
|
|
#include <sys/types.h> |
#include <sys/types.h> |
#include <sys/systm.h> |
#include <sys/systm.h> |
Line 77 void _atomic_cas_cx8(void); |
|
Line 74 void _atomic_cas_cx8(void); |
|
void _atomic_cas_cx8_end(void); |
void _atomic_cas_cx8_end(void); |
|
|
extern void *x86_lockpatch[]; |
extern void *x86_lockpatch[]; |
extern void *x86_retpatch[]; |
|
extern void *atomic_lockpatch[]; |
extern void *atomic_lockpatch[]; |
|
|
#define X86_NOP 0x90 |
#define X86_NOP 0x90 |
Line 87 extern void *atomic_lockpatch[]; |
|
Line 83 extern void *atomic_lockpatch[]; |
|
#define X86_DS 0x3e |
#define X86_DS 0x3e |
#define X86_GROUP_0F 0x0f |
#define X86_GROUP_0F 0x0f |
|
|
static void |
static void __attribute__ ((__unused__)) |
adjust_jumpoff(uint8_t *ptr, void *from_s, void *to_s) |
|
{ |
|
|
|
/* Branch hints */ |
|
if (ptr[0] == X86_CS || ptr[0] == X86_DS) |
|
ptr++; |
|
/* Conditional jumps */ |
|
if (ptr[0] == X86_GROUP_0F) |
|
ptr++; |
|
/* 4-byte relative jump or call */ |
|
*(uint32_t *)(ptr + 1 - (uintptr_t)from_s + (uintptr_t)to_s) += |
|
((uint32_t)(uintptr_t)from_s - (uint32_t)(uintptr_t)to_s); |
|
} |
|
|
|
static void __unused |
|
patchfunc(void *from_s, void *from_e, void *to_s, void *to_e, |
patchfunc(void *from_s, void *from_e, void *to_s, void *to_e, |
void *pcrel) |
void *pcrel) |
{ |
{ |
|
uint8_t *ptr; |
|
|
if ((uintptr_t)from_e - (uintptr_t)from_s != |
if ((uintptr_t)from_e - (uintptr_t)from_s != |
(uintptr_t)to_e - (uintptr_t)to_s) |
(uintptr_t)to_e - (uintptr_t)to_s) |
panic("patchfunc: sizes do not match (from=%p)", from_s); |
panic("patchfunc: sizes do not match (from=%p)", from_s); |
|
|
memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s); |
memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s); |
if (pcrel != NULL) |
if (pcrel != NULL) { |
adjust_jumpoff(pcrel, from_s, to_s); |
ptr = pcrel; |
|
/* Branch hints */ |
#ifdef GPROF |
if (ptr[0] == X86_CS || ptr[0] == X86_DS) |
#ifdef i386 |
ptr++; |
#define MCOUNT_CALL_OFFSET 3 |
/* Conditional jumps */ |
#endif |
if (ptr[0] == X86_GROUP_0F) |
#ifdef __x86_64__ |
ptr++; |
#define MCOUNT_CALL_OFFSET 5 |
/* 4-byte relative jump or call */ |
#endif |
*(uint32_t *)(ptr + 1 - (uintptr_t)from_s + (uintptr_t)to_s) += |
/* Patch mcount call offset */ |
((uint32_t)(uintptr_t)from_s - (uint32_t)(uintptr_t)to_s); |
adjust_jumpoff((uint8_t *)from_s + MCOUNT_CALL_OFFSET, from_s, to_s); |
} |
#endif |
|
} |
} |
|
|
static inline void __unused |
static inline void __attribute__ ((__unused__)) |
patchbytes(void *addr, const int byte1, const int byte2, const int byte3) |
patchbytes(void *addr, const int byte1, const int byte2) |
{ |
{ |
|
|
((uint8_t *)addr)[0] = (uint8_t)byte1; |
((uint8_t *)addr)[0] = (uint8_t)byte1; |
if (byte2 != -1) |
if (byte2 != -1) |
((uint8_t *)addr)[1] = (uint8_t)byte2; |
((uint8_t *)addr)[1] = (uint8_t)byte2; |
if (byte3 != -1) |
|
((uint8_t *)addr)[2] = (uint8_t)byte3; |
|
} |
} |
|
|
void |
void |
x86_patch(bool early) |
x86_patch(void) |
{ |
{ |
static bool first, second; |
#if !defined(GPROF) |
|
static int again; |
u_long psl; |
u_long psl; |
u_long cr0; |
u_long cr0; |
int i; |
|
|
|
if (early) { |
if (again) |
if (first) |
return; |
return; |
again = 1; |
first = true; |
|
} else { |
|
if (second) |
|
return; |
|
second = true; |
|
} |
|
|
|
/* Disable interrupts. */ |
/* Disable interrupts. */ |
psl = x86_read_psl(); |
psl = x86_read_psl(); |
Line 164 x86_patch(bool early) |
|
Line 137 x86_patch(bool early) |
|
cr0 = rcr0(); |
cr0 = rcr0(); |
lcr0(cr0 & ~CR0_WP); |
lcr0(cr0 & ~CR0_WP); |
|
|
#if !defined(GPROF) |
if (ncpu == 1) { |
if (!early && ncpu == 1) { |
|
#ifndef LOCKDEBUG |
#ifndef LOCKDEBUG |
|
int i; |
|
|
/* Uniprocessor: kill LOCK prefixes. */ |
/* Uniprocessor: kill LOCK prefixes. */ |
for (i = 0; x86_lockpatch[i] != 0; i++) |
for (i = 0; x86_lockpatch[i] != 0; i++) |
patchbytes(x86_lockpatch[i], X86_NOP, -1, -1); |
patchbytes(x86_lockpatch[i], X86_NOP, -1); |
for (i = 0; atomic_lockpatch[i] != 0; i++) |
for (i = 0; atomic_lockpatch[i] != 0; i++) |
patchbytes(atomic_lockpatch[i], X86_NOP, -1, -1); |
patchbytes(atomic_lockpatch[i], X86_NOP, -1); |
#endif /* !LOCKDEBUG */ |
/* |
} |
* Uniprocessor: kill kernel_lock. Fill another |
if (!early && (cpu_feature[0] & CPUID_SSE2) != 0) { |
* 14 bytes of NOPs so not to confuse the decoder. |
|
*/ |
|
patchbytes(_kernel_lock, X86_NOP, X86_RET); |
|
patchbytes(_kernel_unlock, X86_NOP, X86_RET); |
|
for (i = 2; i < 16; i++) { |
|
patchbytes((char *)_kernel_lock + i, X86_NOP, -1); |
|
patchbytes((char *)_kernel_unlock + i, X86_NOP, -1); |
|
} |
|
#endif |
|
} else if ((cpu_feature & CPUID_SSE2) != 0) { |
/* Faster memory barriers. */ |
/* Faster memory barriers. */ |
patchfunc( |
patchfunc( |
sse2_lfence, sse2_lfence_end, |
sse2_lfence, sse2_lfence_end, |
Line 187 x86_patch(bool early) |
|
Line 170 x86_patch(bool early) |
|
NULL |
NULL |
); |
); |
} |
} |
#endif /* GPROF */ |
|
|
|
#ifdef i386 |
if ((cpu_feature & CPUID_CX8) != 0) { |
/* |
|
* Patch early and late. Second time around the 'lock' prefix |
|
* may be gone. |
|
*/ |
|
if ((cpu_feature[0] & CPUID_CX8) != 0) { |
|
patchfunc( |
|
_atomic_cas_cx8, _atomic_cas_cx8_end, |
|
_atomic_cas_64, _atomic_cas_64_end, |
|
NULL |
|
); |
|
} |
|
#endif /* i386 */ |
|
|
|
#if !defined(SPLDEBUG) |
|
if (!early && (cpu_feature[0] & CPUID_CX8) != 0) { |
|
/* Faster splx(), mutex_spin_exit(). */ |
/* Faster splx(), mutex_spin_exit(). */ |
patchfunc( |
patchfunc( |
cx8_spllower, cx8_spllower_end, |
cx8_spllower, cx8_spllower_end, |
spllower, spllower_end, |
spllower, spllower_end, |
cx8_spllower_patch |
cx8_spllower_patch |
); |
); |
#if defined(i386) && !defined(LOCKDEBUG) |
#if defined(i386) |
|
#ifndef LOCKDEBUG |
patchfunc( |
patchfunc( |
i686_mutex_spin_exit, i686_mutex_spin_exit_end, |
i686_mutex_spin_exit, i686_mutex_spin_exit_end, |
mutex_spin_exit, mutex_spin_exit_end, |
mutex_spin_exit, mutex_spin_exit_end, |
i686_mutex_spin_exit_patch |
i686_mutex_spin_exit_patch |
); |
); |
#endif /* i386 && !LOCKDEBUG */ |
#endif |
} |
patchfunc( |
#endif /* !SPLDEBUG */ |
_atomic_cas_cx8, _atomic_cas_cx8_end, |
|
_atomic_cas_64, _atomic_cas_64_end, |
/* |
NULL |
* On some Opteron revisions, locked operations erroneously |
); |
* allow memory references to be `bled' outside of critical |
#endif |
* sections. Apply workaround. |
|
*/ |
|
if (cpu_vendor == CPUVENDOR_AMD && |
|
(CPUID2FAMILY(cpu_info_primary.ci_signature) == 0xe || |
|
(CPUID2FAMILY(cpu_info_primary.ci_signature) == 0xf && |
|
CPUID2EXTMODEL(cpu_info_primary.ci_signature) < 0x4))) { |
|
for (i = 0; x86_retpatch[i] != 0; i++) { |
|
/* ret,nop,nop,ret -> lfence,ret */ |
|
patchbytes(x86_retpatch[i], 0x0f, 0xae, 0xe8); |
|
} |
|
} |
} |
|
|
/* Write back and invalidate cache, flush pipelines. */ |
/* Write back and invalidate cache, flush pipelines. */ |
Line 243 x86_patch(bool early) |
|
Line 201 x86_patch(bool early) |
|
|
|
/* Re-enable write protection. */ |
/* Re-enable write protection. */ |
lcr0(cr0); |
lcr0(cr0); |
|
#endif /* GPROF */ |
} |
} |