File: [cvs.NetBSD.org] / src / sys / arch / x86 / x86 / fpu.c (download)
Revision 1.55, Fri Jul 5 17:08:56 2019 UTC (4 years, 8 months ago) by maxv
Branch: MAIN
CVS Tags: netbsd-9-base, netbsd-9-0-RELEASE, netbsd-9-0-RC2, netbsd-9-0-RC1 Branch point for: netbsd-9
Changes since 1.54: +6 -3
lines
More inlines, prerequisites for future changes. Also, remove fngetsw(),
which was a duplicate of fnstsw().
|
/* $NetBSD: fpu.c,v 1.55 2019/07/05 17:08:56 maxv Exp $ */
/*
* Copyright (c) 2008 The NetBSD Foundation, Inc. All
* rights reserved.
*
* This code is derived from software developed for The NetBSD Foundation
* by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1991 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)npx.c 7.2 (Berkeley) 5/12/91
*/
/*
* Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved.
* Copyright (c) 1990 William Jolitz.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)npx.c 7.2 (Berkeley) 5/12/91
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.55 2019/07/05 17:08:56 maxv Exp $");
#include "opt_multiprocessor.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/xcall.h>
#include <machine/cpu.h>
#include <machine/cpuvar.h>
#include <machine/cputypes.h>
#include <machine/intr.h>
#include <machine/cpufunc.h>
#include <machine/pcb.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
#include <x86/cpu.h>
#include <x86/fpu.h>
#ifdef XENPV
#define clts() HYPERVISOR_fpu_taskswitch(0)
#define stts() HYPERVISOR_fpu_taskswitch(1)
#endif
uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
bool x86_fpu_eager __read_mostly = false;
static inline union savefpu *
lwp_fpuarea(struct lwp *l)
{
struct pcb *pcb = lwp_getpcb(l);
return &pcb->pcb_savefpu;
}
void
fpuinit(struct cpu_info *ci)
{
/*
* This might not be strictly necessary since it will be initialized
* for each process. However it does no harm.
*/
clts();
fninit();
stts();
}
void
fpuinit_mxcsr_mask(void)
{
#ifndef XENPV
union savefpu fpusave __aligned(16);
u_long psl;
memset(&fpusave, 0, sizeof(fpusave));
/* Disable interrupts, and enable FPU */
psl = x86_read_psl();
x86_disable_intr();
clts();
/* Fill in the FPU area */
fxsave(&fpusave);
/* Restore previous state */
stts();
x86_write_psl(psl);
if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
} else {
x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask;
}
#else
/*
* XXX XXX XXX: On Xen the FXSAVE above faults. That's because
* &fpusave is not 16-byte aligned. Stack alignment problem
* somewhere, it seems.
*/
x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
#endif
}
static inline void
fpu_errata_amd(void)
{
uint16_t sw;
/*
* AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor
* when FSW.ES=0, leaking other threads' execution history.
*
* Clear them manually by loading a zero (fldummy). We do this
* unconditionally, regardless of FSW.ES.
*
* Before that, clear the ES bit in the x87 status word if it is
* currently set, in order to avoid causing a fault in the
* upcoming load.
*
* Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2],
* which indicates that FIP/FDP/FOP are restored (same behavior
* as Intel). We're not using it though.
*/
fnstsw(&sw);
if (sw & 0x80)
fnclex();
fldummy();
}
void
fpu_area_save(void *area, uint64_t xsave_features)
{
clts();
switch (x86_fpu_save) {
case FPU_SAVE_FSAVE:
fnsave(area);
break;
case FPU_SAVE_FXSAVE:
fxsave(area);
break;
case FPU_SAVE_XSAVE:
xsave(area, xsave_features);
break;
case FPU_SAVE_XSAVEOPT:
xsaveopt(area, xsave_features);
break;
}
}
void
fpu_area_restore(void *area, uint64_t xsave_features)
{
clts();
switch (x86_fpu_save) {
case FPU_SAVE_FSAVE:
frstor(area);
break;
case FPU_SAVE_FXSAVE:
if (cpu_vendor == CPUVENDOR_AMD)
fpu_errata_amd();
fxrstor(area);
break;
case FPU_SAVE_XSAVE:
case FPU_SAVE_XSAVEOPT:
if (cpu_vendor == CPUVENDOR_AMD)
fpu_errata_amd();
xrstor(area, xsave_features);
break;
}
}
static void
fpu_lwp_install(struct lwp *l)
{
struct pcb *pcb = lwp_getpcb(l);
struct cpu_info *ci = curcpu();
KASSERT(ci->ci_fpcurlwp == NULL);
KASSERT(pcb->pcb_fpcpu == NULL);
ci->ci_fpcurlwp = l;
pcb->pcb_fpcpu = ci;
fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features);
}
void
fpu_eagerswitch(struct lwp *oldlwp, struct lwp *newlwp)
{
int s;
s = splhigh();
#ifdef DIAGNOSTIC
if (oldlwp != NULL) {
struct pcb *pcb = lwp_getpcb(oldlwp);
struct cpu_info *ci = curcpu();
if (pcb->pcb_fpcpu == NULL) {
KASSERT(ci->ci_fpcurlwp != oldlwp);
} else if (pcb->pcb_fpcpu == ci) {
KASSERT(ci->ci_fpcurlwp == oldlwp);
} else {
panic("%s: oldlwp's state installed elsewhere",
__func__);
}
}
#endif
fpusave_cpu(true);
if (!(newlwp->l_flag & LW_SYSTEM))
fpu_lwp_install(newlwp);
splx(s);
}
/* -------------------------------------------------------------------------- */
/*
* The following table is used to ensure that the FPE_... value
* that is passed as a trapcode to the signal handler of the user
* process does not have more than one bit set.
*
* Multiple bits may be set if SSE simd instructions generate errors
* on more than one value or if the user process modifies the control
* word while a status word bit is already set (which this is a sign
* of bad coding).
* We have no choise than to narrow them down to one bit, since we must
* not send a trapcode that is not exactly one of the FPE_ macros.
*
* The mechanism has a static table with 127 entries. Each combination
* of the 7 FPU status word exception bits directly translates to a
* position in this table, where a single FPE_... value is stored.
* This FPE_... value stored there is considered the "most important"
* of the exception bits and will be sent as the signal code. The
* precedence of the bits is based upon Intel Document "Numerical
* Applications", Chapter "Special Computational Situations".
*
* The code to choose one of these values does these steps:
* 1) Throw away status word bits that cannot be masked.
* 2) Throw away the bits currently masked in the control word,
* assuming the user isn't interested in them anymore.
* 3) Reinsert status word bit 7 (stack fault) if it is set, which
* cannot be masked but must be preserved.
* 'Stack fault' is a sub-class of 'invalid operation'.
* 4) Use the remaining bits to point into the trapcode table.
*
* The 6 maskable bits in order of their preference, as stated in the
* above referenced Intel manual:
* 1 Invalid operation (FP_X_INV)
* 1a Stack underflow
* 1b Stack overflow
* 1c Operand of unsupported format
* 1d SNaN operand.
* 2 QNaN operand (not an exception, irrelevant here)
* 3 Any other invalid-operation not mentioned above or zero divide
* (FP_X_INV, FP_X_DZ)
* 4 Denormal operand (FP_X_DNML)
* 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
* 6 Inexact result (FP_X_IMP)
*
* NB: the above seems to mix up the mxscr error bits and the x87 ones.
* They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
* status.
*
* The table is nearly, but not quite, in bit order (ZERODIV and DENORM
* are swapped).
*
* This table assumes that any stack fault is cleared - so that an INVOP
* fault will only be reported as FLTSUB once.
* This might not happen if the mask is being changed.
*/
#define FPE_xxx1(f) (f & EN_SW_INVOP \
? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
: f & EN_SW_ZERODIV ? FPE_FLTDIV \
: f & EN_SW_DENORM ? FPE_FLTUND \
: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
: f & EN_SW_PRECLOSS ? FPE_FLTRES \
: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
#define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1))
#define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2))
#define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4))
#define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8))
#define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16))
static const uint8_t fpetable[128] = {
FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
};
#undef FPE_xxx1
#undef FPE_xxx2
#undef FPE_xxx4
#undef FPE_xxx8
#undef FPE_xxx16
#undef FPE_xxx32
/*
* This is a synchronous trap on either an x87 instruction (due to an unmasked
* error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
* to an error on the instruction itself.
*
* If trap actually generates a signal, then the fpu state is saved and then
* copied onto the lwp's user-stack, and then recovered from there when the
* signal returns.
*
* All this code needs to do is save the reason for the trap. For x87 traps the
* status word bits need clearing to stop the trap re-occurring. For SSE traps
* the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
*
* We come here with interrupts disabled.
*/
void
fputrap(struct trapframe *frame)
{
uint32_t statbits;
ksiginfo_t ksi;
if (__predict_false(!USERMODE(frame->tf_cs))) {
panic("fpu trap from kernel, trapframe %p\n", frame);
}
/*
* At this point, fpcurlwp should be curlwp. If it wasn't, the TS bit
* should be set, and we should have gotten a DNA exception.
*/
KASSERT(curcpu()->ci_fpcurlwp == curlwp);
if (frame->tf_trapno == T_XMM) {
uint32_t mxcsr;
x86_stmxcsr(&mxcsr);
statbits = mxcsr;
/* Clear the sticky status bits */
mxcsr &= ~0x3f;
x86_ldmxcsr(&mxcsr);
/* Remove masked interrupts and non-status bits */
statbits &= ~(statbits >> 7) & 0x3f;
/* Mark this is an XMM status */
statbits |= 0x10000;
} else {
uint16_t cw, sw;
/* Get current control and status words */
fnstcw(&cw);
fnstsw(&sw);
/* Clear any pending exceptions from status word */
fnclex();
/* Remove masked interrupts */
statbits = sw & ~(cw & 0x3f);
}
/* Doesn't matter now if we get pre-empted */
x86_enable_intr();
KSI_INIT_TRAP(&ksi);
ksi.ksi_signo = SIGFPE;
ksi.ksi_addr = (void *)X86_TF_RIP(frame);
ksi.ksi_code = fpetable[statbits & 0x7f];
ksi.ksi_trap = statbits;
(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
}
/*
* Implement device not available (DNA) exception.
*
* If we were the last lwp to use the FPU, we can simply return.
* Otherwise, we save the previous state, if necessary, and restore
* our last saved state.
*
* Called directly from the trap 0x13 entry with interrupts still disabled.
*/
void
fpudna(struct trapframe *frame)
{
struct cpu_info *ci = curcpu();
struct lwp *l, *fl;
struct pcb *pcb;
int s;
if (!USERMODE(frame->tf_cs)) {
panic("fpudna from kernel, ip %p, trapframe %p\n",
(void *)X86_TF_RIP(frame), frame);
}
s = splhigh();
/* Save state on current CPU. */
l = ci->ci_curlwp;
pcb = lwp_getpcb(l);
fl = ci->ci_fpcurlwp;
if (fl != NULL) {
if (__predict_false(x86_fpu_eager)) {
panic("%s: FPU busy with EagerFPU enabled",
__func__);
}
/*
* It seems we can get here on Xen even if we didn't
* switch lwp. In this case do nothing
*/
if (fl == l) {
KASSERT(pcb->pcb_fpcpu == ci);
clts();
splx(s);
return;
}
fpusave_cpu(true);
}
/* Save our state if on a remote CPU. */
if (pcb->pcb_fpcpu != NULL) {
if (__predict_false(x86_fpu_eager)) {
panic("%s: LWP busy with EagerFPU enabled",
__func__);
}
/* Explicitly disable preemption before dropping spl. */
kpreempt_disable();
splx(s);
/* Actually enable interrupts */
x86_enable_intr();
fpusave_lwp(l, true);
KASSERT(pcb->pcb_fpcpu == NULL);
s = splhigh();
kpreempt_enable();
}
/* Install the LWP's FPU state. */
fpu_lwp_install(l);
KASSERT(ci == curcpu());
splx(s);
}
/* -------------------------------------------------------------------------- */
/*
* Save current CPU's FPU state. Must be called at IPL_HIGH.
*/
void
fpusave_cpu(bool save)
{
struct cpu_info *ci;
struct pcb *pcb;
struct lwp *l;
KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
ci = curcpu();
l = ci->ci_fpcurlwp;
if (l == NULL) {
return;
}
pcb = lwp_getpcb(l);
if (save) {
fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features);
}
stts();
pcb->pcb_fpcpu = NULL;
ci->ci_fpcurlwp = NULL;
}
/*
* Save l's FPU state, which may be on this processor or another processor.
* It may take some time, so we avoid disabling preemption where possible.
* Caller must know that the target LWP is stopped, otherwise this routine
* may race against it.
*/
void
fpusave_lwp(struct lwp *l, bool save)
{
struct pcb *pcb = lwp_getpcb(l);
struct cpu_info *oci;
int s, spins, ticks;
spins = 0;
ticks = hardclock_ticks;
for (;;) {
s = splhigh();
oci = pcb->pcb_fpcpu;
if (oci == NULL) {
splx(s);
break;
}
if (oci == curcpu()) {
KASSERT(oci->ci_fpcurlwp == l);
fpusave_cpu(save);
splx(s);
break;
}
splx(s);
#ifdef XENPV
if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
cpu_name(oci));
}
#else
x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
#endif
while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
x86_pause();
spins++;
}
if (spins > 100000000) {
panic("fpusave_lwp: did not");
}
}
}
void
fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
{
union savefpu *fpu_save = lwp_fpuarea(l);
struct pcb *pcb = lwp_getpcb(l);
if (i386_use_fxsave) {
fpu_save->sv_xmm.fx_cw = x87_cw;
/* Force a reload of CW */
if ((x87_cw != __INITIAL_NPXCW__) &&
(x86_fpu_save == FPU_SAVE_XSAVE ||
x86_fpu_save == FPU_SAVE_XSAVEOPT)) {
fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
XCR0_X87;
}
} else {
fpu_save->sv_87.s87_cw = x87_cw;
}
pcb->pcb_fpu_dflt_cw = x87_cw;
}
void
fpu_clear(struct lwp *l, unsigned int x87_cw)
{
union savefpu *fpu_save;
struct pcb *pcb;
int s;
KASSERT(l == curlwp);
KASSERT((l->l_flag & LW_SYSTEM) == 0);
fpu_save = lwp_fpuarea(l);
pcb = lwp_getpcb(l);
s = splhigh();
if (x86_fpu_eager) {
KASSERT(pcb->pcb_fpcpu == NULL ||
pcb->pcb_fpcpu == curcpu());
fpusave_cpu(false);
} else {
splx(s);
fpusave_lwp(l, false);
}
KASSERT(pcb->pcb_fpcpu == NULL);
switch (x86_fpu_save) {
case FPU_SAVE_FSAVE:
memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
fpu_save->sv_87.s87_tw = 0xffff;
fpu_save->sv_87.s87_cw = x87_cw;
break;
case FPU_SAVE_FXSAVE:
memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
fpu_save->sv_xmm.fx_cw = x87_cw;
break;
case FPU_SAVE_XSAVE:
case FPU_SAVE_XSAVEOPT:
memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
fpu_save->sv_xmm.fx_cw = x87_cw;
/*
* Force a reload of CW if we're using the non-default
* value.
*/
if (__predict_false(x87_cw != __INITIAL_NPXCW__)) {
fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
XCR0_X87;
}
break;
}
pcb->pcb_fpu_dflt_cw = x87_cw;
if (x86_fpu_eager) {
fpu_lwp_install(l);
splx(s);
}
}
void
fpu_sigreset(struct lwp *l)
{
union savefpu *fpu_save = lwp_fpuarea(l);
struct pcb *pcb = lwp_getpcb(l);
/*
* For signal handlers the register values don't matter. Just reset
* a few fields.
*/
if (i386_use_fxsave) {
fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
fpu_save->sv_xmm.fx_tw = 0;
fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
} else {
fpu_save->sv_87.s87_tw = 0xffff;
fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
}
}
void
fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1)
{
const uint8_t *src = (const uint8_t *)&pcb1->pcb_savefpu;
uint8_t *dst = (uint8_t *)&pcb2->pcb_savefpu;
memcpy(dst, src, x86_fpu_save_size);
KASSERT(pcb2->pcb_fpcpu == NULL);
}
/* -------------------------------------------------------------------------- */
static void
process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
{
unsigned int tag, ab_tag;
const struct fpaccfx *fx_reg;
struct fpacc87 *s87_reg;
int i;
/*
* For historic reasons core dumps and ptrace all use the old save87
* layout. Convert the important parts.
* getucontext gets what we give it.
* setucontext should return something given by getucontext, but
* we are (at the moment) willing to change it.
*
* It really isn't worth setting the 'tag' bits to 01 (zero) or
* 10 (NaN etc) since the processor will set any internal bits
* correctly when the value is loaded (the 287 believed them).
*
* Additionally the s87_tw and s87_tw are 'indexed' by the actual
* register numbers, whereas the registers themselves have ST(0)
* first. Pairing the values and tags can only be done with
* reference to the 'top of stack'.
*
* If any x87 registers are used, they will typically be from
* r7 downwards - so the high bits of the tag register indicate
* used registers. The conversions are not optimised for this.
*
* The ABI we use requires the FP stack to be empty on every
* function call. I think this means that the stack isn't expected
* to overflow - overflow doesn't drop a core in my testing.
*
* Note that this code writes to all of the 's87' structure that
* actually gets written to userspace.
*/
/* FPU control/status */
s87->s87_cw = sxmm->fx_cw;
s87->s87_sw = sxmm->fx_sw;
/* tag word handled below */
s87->s87_ip = sxmm->fx_ip;
s87->s87_opcode = sxmm->fx_opcode;
s87->s87_dp = sxmm->fx_dp;
/* FP registers (in stack order) */
fx_reg = sxmm->fx_87_ac;
s87_reg = s87->s87_ac;
for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
*s87_reg = fx_reg->r;
/* Tag word and registers. */
ab_tag = sxmm->fx_tw & 0xff; /* Bits set if valid */
if (ab_tag == 0) {
/* none used */
s87->s87_tw = 0xffff;
return;
}
tag = 0;
/* Separate bits of abridged tag word with zeros */
for (i = 0x80; i != 0; tag <<= 1, i >>= 1)
tag |= ab_tag & i;
/* Replicate and invert so that 0 => 0b11 and 1 => 0b00 */
s87->s87_tw = (tag | tag >> 1) ^ 0xffff;
}
static void
process_s87_to_xmm(const struct save87 *s87, struct fxsave *sxmm)
{
unsigned int tag, ab_tag;
struct fpaccfx *fx_reg;
const struct fpacc87 *s87_reg;
int i;
/*
* ptrace gives us registers in the save87 format and
* we must convert them to the correct format.
*
* This code is normally used when overwriting the processes
* registers (in the pcb), so it musn't change any other fields.
*
* There is a lot of pad in 'struct fxsave', if the destination
* is written to userspace, it must be zeroed first.
*/
/* FPU control/status */
sxmm->fx_cw = s87->s87_cw;
sxmm->fx_sw = s87->s87_sw;
/* tag word handled below */
sxmm->fx_ip = s87->s87_ip;
sxmm->fx_opcode = s87->s87_opcode;
sxmm->fx_dp = s87->s87_dp;
/* Tag word */
tag = s87->s87_tw; /* 0b11 => unused */
if (tag == 0xffff) {
/* All unused - values don't matter, zero for safety */
sxmm->fx_tw = 0;
memset(&sxmm->fx_87_ac, 0, sizeof sxmm->fx_87_ac);
return;
}
tag ^= 0xffff; /* So 0b00 is unused */
tag |= tag >> 1; /* Look at even bits */
ab_tag = 0;
i = 1;
do
ab_tag |= tag & i;
while ((tag >>= 1) >= (i <<= 1));
sxmm->fx_tw = ab_tag;
/* FP registers (in stack order) */
fx_reg = sxmm->fx_87_ac;
s87_reg = s87->s87_ac;
for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
fx_reg->r = *s87_reg;
}
void
process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
{
union savefpu *fpu_save;
fpusave_lwp(l, true);
fpu_save = lwp_fpuarea(l);
if (i386_use_fxsave) {
memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
/*
* Invalid bits in mxcsr or mxcsr_mask will cause faults.
*/
fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
/*
* Make sure the x87 and SSE bits are set in xstate_bv.
* Otherwise xrstor will not restore them.
*/
if (x86_fpu_save == FPU_SAVE_XSAVE ||
x86_fpu_save == FPU_SAVE_XSAVEOPT) {
fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
(XCR0_X87 | XCR0_SSE);
}
} else {
process_xmm_to_s87(fpregs, &fpu_save->sv_87);
}
}
void
process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
{
union savefpu *fpu_save;
if (i386_use_fxsave) {
/* Save so we don't lose the xmm registers */
fpusave_lwp(l, true);
fpu_save = lwp_fpuarea(l);
process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
/*
* Make sure the x87 and SSE bits are set in xstate_bv.
* Otherwise xrstor will not restore them.
*/
if (x86_fpu_save == FPU_SAVE_XSAVE ||
x86_fpu_save == FPU_SAVE_XSAVEOPT) {
fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
(XCR0_X87 | XCR0_SSE);
}
} else {
fpusave_lwp(l, false);
fpu_save = lwp_fpuarea(l);
memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
}
}
void
process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
{
union savefpu *fpu_save;
fpusave_lwp(l, true);
fpu_save = lwp_fpuarea(l);
if (i386_use_fxsave) {
memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
} else {
memset(fpregs, 0, sizeof(*fpregs));
process_s87_to_xmm(&fpu_save->sv_87, fpregs);
}
}
void
process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
{
union savefpu *fpu_save;
fpusave_lwp(l, true);
fpu_save = lwp_fpuarea(l);
if (i386_use_fxsave) {
memset(fpregs, 0, sizeof(*fpregs));
process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
} else {
memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
}
}
int
process_read_xstate(struct lwp *l, struct xstate *xstate)
{
union savefpu *fpu_save;
fpusave_lwp(l, true);
fpu_save = lwp_fpuarea(l);
if (x86_fpu_save == FPU_SAVE_FSAVE) {
/* Convert from legacy FSAVE format. */
memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);
/* We only got x87 data. */
xstate->xs_rfbm = XCR0_X87;
xstate->xs_xstate_bv = XCR0_X87;
return 0;
}
/* Copy the legacy area. */
memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave,
sizeof(xstate->xs_fxsave));
if (x86_fpu_save == FPU_SAVE_FXSAVE) {
/* FXSAVE means we've got x87 + SSE data. */
xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
return 0;
}
/* Copy the bitmap indicating which states are available. */
xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));
#define COPY_COMPONENT(xcr0_val, xsave_val, field) \
if (xstate->xs_xstate_bv & xcr0_val) { \
KASSERT(x86_xsave_offsets[xsave_val] \
>= sizeof(struct xsave_header)); \
KASSERT(x86_xsave_sizes[xsave_val] \
>= sizeof(xstate -> field)); \
\
memcpy(&xstate -> field, \
(char*)fpu_save + x86_xsave_offsets[xsave_val], \
sizeof(xstate -> field)); \
}
COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
#undef COPY_COMPONENT
return 0;
}
int
process_verify_xstate(const struct xstate *xstate)
{
/* xstate_bv must be a subset of RFBM */
if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
return EINVAL;
switch (x86_fpu_save) {
case FPU_SAVE_FSAVE:
if ((xstate->xs_rfbm & ~XCR0_X87))
return EINVAL;
break;
case FPU_SAVE_FXSAVE:
if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
return EINVAL;
break;
default:
/* Verify whether no unsupported features are enabled */
if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
return EINVAL;
}
return 0;
}
int
process_write_xstate(struct lwp *l, const struct xstate *xstate)
{
union savefpu *fpu_save;
fpusave_lwp(l, true);
fpu_save = lwp_fpuarea(l);
/* Convert data into legacy FSAVE format. */
if (x86_fpu_save == FPU_SAVE_FSAVE) {
if (xstate->xs_xstate_bv & XCR0_X87)
process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
return 0;
}
/* If XSAVE is supported, make sure that xstate_bv is set correctly. */
if (x86_fpu_save >= FPU_SAVE_XSAVE) {
/*
* Bit-wise xstate->xs_rfbm ? xstate->xs_xstate_bv
* : fpu_save->sv_xsave_hdr.xsh_xstate_bv
*/
fpu_save->sv_xsave_hdr.xsh_xstate_bv =
(fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
xstate->xs_xstate_bv;
}
if (xstate->xs_xstate_bv & XCR0_X87) {
/*
* X87 state is split into two areas, interspersed with SSE
* data.
*/
memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
sizeof(xstate->xs_fxsave.fx_87_ac));
}
/*
* Copy MXCSR if either SSE or AVX state is requested, to match the XSAVE
* behavior for those flags.
*/
if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
/*
* Invalid bits in mxcsr or mxcsr_mask will cause faults.
*/
fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
& x86_fpu_mxcsr_mask;
fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
fpu_save->sv_xmm.fx_mxcsr_mask;
}
if (xstate->xs_xstate_bv & XCR0_SSE) {
memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
xstate->xs_fxsave.fx_xmm,
sizeof(xstate->xs_fxsave.fx_xmm));
}
#define COPY_COMPONENT(xcr0_val, xsave_val, field) \
if (xstate->xs_xstate_bv & xcr0_val) { \
KASSERT(x86_xsave_offsets[xsave_val] \
>= sizeof(struct xsave_header)); \
KASSERT(x86_xsave_sizes[xsave_val] \
>= sizeof(xstate -> field)); \
\
memcpy((char*)fpu_save + x86_xsave_offsets[xsave_val], \
&xstate -> field, sizeof(xstate -> field)); \
}
COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
#undef COPY_COMPONENT
return 0;
}
/* -------------------------------------------------------------------------- */
static volatile unsigned long eagerfpu_cpu_barrier1 __cacheline_aligned;
static volatile unsigned long eagerfpu_cpu_barrier2 __cacheline_aligned;
static void
eager_change_cpu(void *arg1, void *arg2)
{
struct cpu_info *ci = curcpu();
bool enabled = (bool)arg1;
int s;
s = splhigh();
/* Rendez-vous 1. */
atomic_dec_ulong(&eagerfpu_cpu_barrier1);
while (atomic_cas_ulong(&eagerfpu_cpu_barrier1, 0, 0) != 0) {
x86_pause();
}
fpusave_cpu(true);
if (ci == &cpu_info_primary) {
x86_fpu_eager = enabled;
}
/* Rendez-vous 2. */
atomic_dec_ulong(&eagerfpu_cpu_barrier2);
while (atomic_cas_ulong(&eagerfpu_cpu_barrier2, 0, 0) != 0) {
x86_pause();
}
splx(s);
}
static int
eager_change(bool enabled)
{
struct cpu_info *ci = NULL;
CPU_INFO_ITERATOR cii;
uint64_t xc;
mutex_enter(&cpu_lock);
/*
* We expect all the CPUs to be online.
*/
for (CPU_INFO_FOREACH(cii, ci)) {
struct schedstate_percpu *spc = &ci->ci_schedstate;
if (spc->spc_flags & SPCF_OFFLINE) {
printf("[!] cpu%d offline, EagerFPU not changed\n",
cpu_index(ci));
mutex_exit(&cpu_lock);
return EOPNOTSUPP;
}
}
/* Initialize the barriers */
eagerfpu_cpu_barrier1 = ncpu;
eagerfpu_cpu_barrier2 = ncpu;
printf("[+] %s EagerFPU...",
enabled ? "Enabling" : "Disabling");
xc = xc_broadcast(0, eager_change_cpu,
(void *)enabled, NULL);
xc_wait(xc);
printf(" done!\n");
mutex_exit(&cpu_lock);
return 0;
}
static int
sysctl_machdep_fpu_eager(SYSCTLFN_ARGS)
{
struct sysctlnode node;
int error;
bool val;
val = *(bool *)rnode->sysctl_data;
node = *rnode;
node.sysctl_data = &val;
error = sysctl_lookup(SYSCTLFN_CALL(&node));
if (error != 0 || newp == NULL)
return error;
if (val == x86_fpu_eager)
return 0;
return eager_change(val);
}
void sysctl_eagerfpu_init(struct sysctllog **);
void
sysctl_eagerfpu_init(struct sysctllog **clog)
{
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_READWRITE,
CTLTYPE_BOOL, "fpu_eager",
SYSCTL_DESCR("Whether the kernel uses Eager FPU Switch"),
sysctl_machdep_fpu_eager, 0,
&x86_fpu_eager, 0,
CTL_MACHDEP, CTL_CREATE, CTL_EOL);
}