Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/fpu update from Ingo Molnar: "The biggest change is the addition of the non-lazy (eager) FPU saving support model and enabling it on CPUs with optimized xsaveopt/xrstor FPU state saving instructions. There are also various Sparse fixes" Fix up trivial add-add conflict in arch/x86/kernel/traps.c * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86, kvm: fix kvm's usage of kernel_fpu_begin/end() x86, fpu: remove cpu_has_xmm check in the fx_finit() x86, fpu: make eagerfpu= boot param tri-state x86, fpu: enable eagerfpu by default for xsaveopt x86, fpu: decouple non-lazy/eager fpu restore from xsave x86, fpu: use non-lazy fpu restore for processors supporting xsave lguest, x86: handle guest TS bit for lazy/non-lazy fpu host models x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu() x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig() x86, fpu: drop_fpu() before restoring new state from sigframe x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels x86, fpu: Consolidate inline asm routines for saving/restoring fpu state x86, signal: Cleanup ifdefs and is_ia32, is_x32
This commit is contained in:
commit
ac07f5c3cb
23 changed files with 803 additions and 936 deletions
|
@ -1833,6 +1833,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
and restore using xsave. The kernel will fallback to
|
||||
enabling legacy floating-point and sse state.
|
||||
|
||||
eagerfpu= [X86]
|
||||
on enable eager fpu restore
|
||||
off disable eager fpu restore
|
||||
auto selects the default scheme, which automatically
|
||||
enables eagerfpu restore for xsaveopt.
|
||||
|
||||
nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
|
||||
wfi(ARM) instruction doesn't work correctly and not to
|
||||
use it. This is also useful when using JTAG debugger.
|
||||
|
|
|
@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
|
|||
|
||||
get_user_ex(tmp, &sc->fpstate);
|
||||
buf = compat_ptr(tmp);
|
||||
err |= restore_i387_xstate_ia32(buf);
|
||||
err |= restore_xstate_sig(buf, 1);
|
||||
|
||||
get_user_ex(*pax, &sc->ax);
|
||||
} get_user_catch(err);
|
||||
|
@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
|
|||
sp = (unsigned long) ka->sa.sa_restorer;
|
||||
|
||||
if (used_math()) {
|
||||
sp = sp - sig_xstate_ia32_size;
|
||||
unsigned long fx_aligned, math_size;
|
||||
|
||||
sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
|
||||
*fpstate = (struct _fpstate_ia32 __user *) sp;
|
||||
if (save_i387_xstate_ia32(*fpstate) < 0)
|
||||
if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
|
||||
math_size) < 0)
|
||||
return (void __user *) -1L;
|
||||
}
|
||||
|
||||
|
|
|
@ -97,6 +97,7 @@
|
|||
#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
|
||||
|
@ -300,12 +301,14 @@ extern const char * const x86_power_flags[32];
|
|||
#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
|
||||
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
|
||||
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
|
||||
#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
|
||||
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
|
||||
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
|
||||
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
|
||||
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
|
||||
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
|
||||
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
|
||||
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
|
||||
|
||||
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
|
||||
# define cpu_has_invlpg 1
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/regset.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
@ -21,42 +22,74 @@
|
|||
#include <asm/uaccess.h>
|
||||
#include <asm/xsave.h>
|
||||
|
||||
extern unsigned int sig_xstate_size;
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/sigcontext32.h>
|
||||
# include <asm/user32.h>
|
||||
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
compat_sigset_t *set, struct pt_regs *regs);
|
||||
int ia32_setup_frame(int sig, struct k_sigaction *ka,
|
||||
compat_sigset_t *set, struct pt_regs *regs);
|
||||
#else
|
||||
# define user_i387_ia32_struct user_i387_struct
|
||||
# define user32_fxsr_struct user_fxsr_struct
|
||||
# define ia32_setup_frame __setup_frame
|
||||
# define ia32_setup_rt_frame __setup_rt_frame
|
||||
#endif
|
||||
|
||||
extern unsigned int mxcsr_feature_mask;
|
||||
extern void fpu_init(void);
|
||||
extern void eager_fpu_init(void);
|
||||
|
||||
DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
|
||||
|
||||
extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
|
||||
struct task_struct *tsk);
|
||||
extern void convert_to_fxsr(struct task_struct *tsk,
|
||||
const struct user_i387_ia32_struct *env);
|
||||
|
||||
extern user_regset_active_fn fpregs_active, xfpregs_active;
|
||||
extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
|
||||
xstateregs_get;
|
||||
extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
|
||||
xstateregs_set;
|
||||
|
||||
|
||||
/*
|
||||
* xstateregs_active == fpregs_active. Please refer to the comment
|
||||
* at the definition of fpregs_active.
|
||||
*/
|
||||
#define xstateregs_active fpregs_active
|
||||
|
||||
extern struct _fpx_sw_bytes fx_sw_reserved;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
extern unsigned int sig_xstate_ia32_size;
|
||||
extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
|
||||
struct _fpstate_ia32;
|
||||
struct _xstate_ia32;
|
||||
extern int save_i387_xstate_ia32(void __user *buf);
|
||||
extern int restore_i387_xstate_ia32(void __user *buf);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
# define HAVE_HWFP (boot_cpu_data.hard_math)
|
||||
extern void finit_soft_fpu(struct i387_soft_struct *soft);
|
||||
#else
|
||||
# define HAVE_HWFP 1
|
||||
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
|
||||
#endif
|
||||
|
||||
static inline int is_ia32_compat_frame(void)
|
||||
{
|
||||
return config_enabled(CONFIG_IA32_EMULATION) &&
|
||||
test_thread_flag(TIF_IA32);
|
||||
}
|
||||
|
||||
static inline int is_ia32_frame(void)
|
||||
{
|
||||
return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
|
||||
}
|
||||
|
||||
static inline int is_x32_frame(void)
|
||||
{
|
||||
return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
|
||||
}
|
||||
|
||||
#define X87_FSW_ES (1 << 7) /* Exception Summary */
|
||||
|
||||
static __always_inline __pure bool use_eager_fpu(void)
|
||||
{
|
||||
return static_cpu_has(X86_FEATURE_EAGER_FPU);
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_xsaveopt(void)
|
||||
{
|
||||
return static_cpu_has(X86_FEATURE_XSAVEOPT);
|
||||
|
@ -72,6 +105,13 @@ static __always_inline __pure bool use_fxsr(void)
|
|||
return static_cpu_has(X86_FEATURE_FXSR);
|
||||
}
|
||||
|
||||
static inline void fx_finit(struct i387_fxsave_struct *fx)
|
||||
{
|
||||
memset(fx, 0, xstate_size);
|
||||
fx->cwd = 0x37f;
|
||||
fx->mxcsr = MXCSR_DEFAULT;
|
||||
}
|
||||
|
||||
extern void __sanitize_i387_state(struct task_struct *);
|
||||
|
||||
static inline void sanitize_i387_state(struct task_struct *tsk)
|
||||
|
@ -81,131 +121,88 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
|
|||
__sanitize_i387_state(tsk);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
|
||||
{
|
||||
int err;
|
||||
#define check_insn(insn, output, input...) \
|
||||
({ \
|
||||
int err; \
|
||||
asm volatile("1:" #insn "\n\t" \
|
||||
"2:\n" \
|
||||
".section .fixup,\"ax\"\n" \
|
||||
"3: movl $-1,%[err]\n" \
|
||||
" jmp 2b\n" \
|
||||
".previous\n" \
|
||||
_ASM_EXTABLE(1b, 3b) \
|
||||
: [err] "=r" (err), output \
|
||||
: "0"(0), input); \
|
||||
err; \
|
||||
})
|
||||
|
||||
/* See comment in fxsave() below. */
|
||||
#ifdef CONFIG_AS_FXSAVEQ
|
||||
asm volatile("1: fxrstorq %[fx]\n\t"
|
||||
"2:\n"
|
||||
".section .fixup,\"ax\"\n"
|
||||
"3: movl $-1,%[err]\n"
|
||||
" jmp 2b\n"
|
||||
".previous\n"
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
: [err] "=r" (err)
|
||||
: [fx] "m" (*fx), "0" (0));
|
||||
#else
|
||||
asm volatile("1: rex64/fxrstor (%[fx])\n\t"
|
||||
"2:\n"
|
||||
".section .fixup,\"ax\"\n"
|
||||
"3: movl $-1,%[err]\n"
|
||||
" jmp 2b\n"
|
||||
".previous\n"
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
: [err] "=r" (err)
|
||||
: [fx] "R" (fx), "m" (*fx), "0" (0));
|
||||
#endif
|
||||
return err;
|
||||
static inline int fsave_user(struct i387_fsave_struct __user *fx)
|
||||
{
|
||||
return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
|
||||
}
|
||||
|
||||
static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
|
||||
{
|
||||
int err;
|
||||
if (config_enabled(CONFIG_X86_32))
|
||||
return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
|
||||
else if (config_enabled(CONFIG_AS_FXSAVEQ))
|
||||
return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
|
||||
|
||||
/*
|
||||
* Clear the bytes not touched by the fxsave and reserved
|
||||
* for the SW usage.
|
||||
*/
|
||||
err = __clear_user(&fx->sw_reserved,
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
if (unlikely(err))
|
||||
return -EFAULT;
|
||||
|
||||
/* See comment in fxsave() below. */
|
||||
#ifdef CONFIG_AS_FXSAVEQ
|
||||
asm volatile("1: fxsaveq %[fx]\n\t"
|
||||
"2:\n"
|
||||
".section .fixup,\"ax\"\n"
|
||||
"3: movl $-1,%[err]\n"
|
||||
" jmp 2b\n"
|
||||
".previous\n"
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
: [err] "=r" (err), [fx] "=m" (*fx)
|
||||
: "0" (0));
|
||||
#else
|
||||
asm volatile("1: rex64/fxsave (%[fx])\n\t"
|
||||
"2:\n"
|
||||
".section .fixup,\"ax\"\n"
|
||||
"3: movl $-1,%[err]\n"
|
||||
" jmp 2b\n"
|
||||
".previous\n"
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
: [err] "=r" (err), "=m" (*fx)
|
||||
: [fx] "R" (fx), "0" (0));
|
||||
#endif
|
||||
if (unlikely(err) &&
|
||||
__clear_user(fx, sizeof(struct i387_fxsave_struct)))
|
||||
err = -EFAULT;
|
||||
/* No need to clear here because the caller clears USED_MATH */
|
||||
return err;
|
||||
/* See comment in fpu_fxsave() below. */
|
||||
return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
|
||||
}
|
||||
|
||||
static inline void fpu_fxsave(struct fpu *fpu)
|
||||
{
|
||||
/* Using "rex64; fxsave %0" is broken because, if the memory operand
|
||||
uses any extended registers for addressing, a second REX prefix
|
||||
will be generated (to the assembler, rex64 followed by semicolon
|
||||
is a separate instruction), and hence the 64-bitness is lost. */
|
||||
|
||||
#ifdef CONFIG_AS_FXSAVEQ
|
||||
/* Using "fxsaveq %0" would be the ideal choice, but is only supported
|
||||
starting with gas 2.16. */
|
||||
__asm__ __volatile__("fxsaveq %0"
|
||||
: "=m" (fpu->state->fxsave));
|
||||
#else
|
||||
/* Using, as a workaround, the properly prefixed form below isn't
|
||||
accepted by any binutils version so far released, complaining that
|
||||
the same type of prefix is used twice if an extended register is
|
||||
needed for addressing (fix submitted to mainline 2005-11-21).
|
||||
asm volatile("rex64/fxsave %0"
|
||||
: "=m" (fpu->state->fxsave));
|
||||
This, however, we can work around by forcing the compiler to select
|
||||
an addressing mode that doesn't require extended registers. */
|
||||
asm volatile("rex64/fxsave (%[fx])"
|
||||
: "=m" (fpu->state->fxsave)
|
||||
: [fx] "R" (&fpu->state->fxsave));
|
||||
#endif
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_32 */
|
||||
|
||||
/* perform fxrstor iff the processor has extended states, otherwise frstor */
|
||||
static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
|
||||
{
|
||||
/*
|
||||
* The "nop" is needed to make the instructions the same
|
||||
* length.
|
||||
*/
|
||||
alternative_input(
|
||||
"nop ; frstor %1",
|
||||
"fxrstor %1",
|
||||
X86_FEATURE_FXSR,
|
||||
"m" (*fx));
|
||||
if (config_enabled(CONFIG_X86_32))
|
||||
return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
else if (config_enabled(CONFIG_AS_FXSAVEQ))
|
||||
return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
|
||||
return 0;
|
||||
/* See comment in fpu_fxsave() below. */
|
||||
return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
|
||||
"m" (*fx));
|
||||
}
|
||||
|
||||
static inline int frstor_checking(struct i387_fsave_struct *fx)
|
||||
{
|
||||
return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
}
|
||||
|
||||
static inline void fpu_fxsave(struct fpu *fpu)
|
||||
{
|
||||
asm volatile("fxsave %[fx]"
|
||||
: [fx] "=m" (fpu->state->fxsave));
|
||||
if (config_enabled(CONFIG_X86_32))
|
||||
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
|
||||
else if (config_enabled(CONFIG_AS_FXSAVEQ))
|
||||
asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
|
||||
else {
|
||||
/* Using "rex64; fxsave %0" is broken because, if the memory
|
||||
* operand uses any extended registers for addressing, a second
|
||||
* REX prefix will be generated (to the assembler, rex64
|
||||
* followed by semicolon is a separate instruction), and hence
|
||||
* the 64-bitness is lost.
|
||||
*
|
||||
* Using "fxsaveq %0" would be the ideal choice, but is only
|
||||
* supported starting with gas 2.16.
|
||||
*
|
||||
* Using, as a workaround, the properly prefixed form below
|
||||
* isn't accepted by any binutils version so far released,
|
||||
* complaining that the same type of prefix is used twice if
|
||||
* an extended register is needed for addressing (fix submitted
|
||||
* to mainline 2005-11-21).
|
||||
*
|
||||
* asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
|
||||
*
|
||||
* This, however, we can work around by forcing the compiler to
|
||||
* select an addressing mode that doesn't require extended
|
||||
* registers.
|
||||
*/
|
||||
asm volatile( "rex64/fxsave (%[fx])"
|
||||
: "=m" (fpu->state->fxsave)
|
||||
: [fx] "R" (&fpu->state->fxsave));
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* These must be called with preempt disabled. Returns
|
||||
* 'true' if the FPU state is still intact.
|
||||
|
@ -248,17 +245,14 @@ static inline int __save_init_fpu(struct task_struct *tsk)
|
|||
return fpu_save_init(&tsk->thread.fpu);
|
||||
}
|
||||
|
||||
static inline int fpu_fxrstor_checking(struct fpu *fpu)
|
||||
{
|
||||
return fxrstor_checking(&fpu->state->fxsave);
|
||||
}
|
||||
|
||||
static inline int fpu_restore_checking(struct fpu *fpu)
|
||||
{
|
||||
if (use_xsave())
|
||||
return fpu_xrstor_checking(fpu);
|
||||
return fpu_xrstor_checking(&fpu->state->xsave);
|
||||
else if (use_fxsr())
|
||||
return fxrstor_checking(&fpu->state->fxsave);
|
||||
else
|
||||
return fpu_fxrstor_checking(fpu);
|
||||
return frstor_checking(&fpu->state->fsave);
|
||||
}
|
||||
|
||||
static inline int restore_fpu_checking(struct task_struct *tsk)
|
||||
|
@ -310,15 +304,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
|
|||
static inline void __thread_fpu_end(struct task_struct *tsk)
|
||||
{
|
||||
__thread_clear_has_fpu(tsk);
|
||||
stts();
|
||||
if (!use_eager_fpu())
|
||||
stts();
|
||||
}
|
||||
|
||||
static inline void __thread_fpu_begin(struct task_struct *tsk)
|
||||
{
|
||||
clts();
|
||||
if (!use_eager_fpu())
|
||||
clts();
|
||||
__thread_set_has_fpu(tsk);
|
||||
}
|
||||
|
||||
static inline void __drop_fpu(struct task_struct *tsk)
|
||||
{
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
/* Ignore delayed exceptions from user space */
|
||||
asm volatile("1: fwait\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE(1b, 2b));
|
||||
__thread_fpu_end(tsk);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void drop_fpu(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Forget coprocessor state..
|
||||
*/
|
||||
preempt_disable();
|
||||
tsk->fpu_counter = 0;
|
||||
__drop_fpu(tsk);
|
||||
clear_used_math();
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void drop_init_fpu(struct task_struct *tsk)
|
||||
{
|
||||
if (!use_eager_fpu())
|
||||
drop_fpu(tsk);
|
||||
else {
|
||||
if (use_xsave())
|
||||
xrstor_state(init_xstate_buf, -1);
|
||||
else
|
||||
fxrstor_checking(&init_xstate_buf->i387);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* FPU state switching for scheduling.
|
||||
*
|
||||
|
@ -352,7 +383,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
|
|||
{
|
||||
fpu_switch_t fpu;
|
||||
|
||||
fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
|
||||
/*
|
||||
* If the task has used the math, pre-load the FPU on xsave processors
|
||||
* or if the past 5 consecutive context-switches used math.
|
||||
*/
|
||||
fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
|
||||
new->fpu_counter > 5);
|
||||
if (__thread_has_fpu(old)) {
|
||||
if (!__save_init_fpu(old))
|
||||
cpu = ~0;
|
||||
|
@ -364,14 +400,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
|
|||
new->fpu_counter++;
|
||||
__thread_set_has_fpu(new);
|
||||
prefetch(new->thread.fpu.state);
|
||||
} else
|
||||
} else if (!use_eager_fpu())
|
||||
stts();
|
||||
} else {
|
||||
old->fpu_counter = 0;
|
||||
old->thread.fpu.last_cpu = ~0;
|
||||
if (fpu.preload) {
|
||||
new->fpu_counter++;
|
||||
if (fpu_lazy_restore(new, cpu))
|
||||
if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
|
||||
fpu.preload = 0;
|
||||
else
|
||||
prefetch(new->thread.fpu.state);
|
||||
|
@ -391,44 +427,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
|
|||
{
|
||||
if (fpu.preload) {
|
||||
if (unlikely(restore_fpu_checking(new)))
|
||||
__thread_fpu_end(new);
|
||||
drop_init_fpu(new);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Signal frame handlers...
|
||||
*/
|
||||
extern int save_i387_xstate(void __user *buf);
|
||||
extern int restore_i387_xstate(void __user *buf);
|
||||
extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
|
||||
extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
|
||||
|
||||
static inline void __clear_fpu(struct task_struct *tsk)
|
||||
static inline int xstate_sigframe_size(void)
|
||||
{
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
/* Ignore delayed exceptions from user space */
|
||||
asm volatile("1: fwait\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE(1b, 2b));
|
||||
__thread_fpu_end(tsk);
|
||||
return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
|
||||
}
|
||||
|
||||
static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
|
||||
{
|
||||
void __user *buf_fx = buf;
|
||||
int size = xstate_sigframe_size();
|
||||
|
||||
if (ia32_frame && use_fxsr()) {
|
||||
buf_fx = buf + sizeof(struct i387_fsave_struct);
|
||||
size += sizeof(struct i387_fsave_struct);
|
||||
}
|
||||
|
||||
return __restore_xstate_sig(buf, buf_fx, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* The actual user_fpu_begin/end() functions
|
||||
* need to be preemption-safe.
|
||||
* Need to be preemption-safe.
|
||||
*
|
||||
* NOTE! user_fpu_end() must be used only after you
|
||||
* have saved the FP state, and user_fpu_begin() must
|
||||
* be used only immediately before restoring it.
|
||||
* These functions do not do any save/restore on
|
||||
* their own.
|
||||
* NOTE! user_fpu_begin() must be used only immediately before restoring
|
||||
* it. This function does not do any save/restore on their own.
|
||||
*/
|
||||
static inline void user_fpu_end(void)
|
||||
{
|
||||
preempt_disable();
|
||||
__thread_fpu_end(current);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void user_fpu_begin(void)
|
||||
{
|
||||
preempt_disable();
|
||||
|
@ -437,25 +469,32 @@ static inline void user_fpu_begin(void)
|
|||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void __save_fpu(struct task_struct *tsk)
|
||||
{
|
||||
if (use_xsave())
|
||||
xsave_state(&tsk->thread.fpu.state->xsave, -1);
|
||||
else
|
||||
fpu_fxsave(&tsk->thread.fpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* These disable preemption on their own and are safe
|
||||
*/
|
||||
static inline void save_init_fpu(struct task_struct *tsk)
|
||||
{
|
||||
WARN_ON_ONCE(!__thread_has_fpu(tsk));
|
||||
|
||||
if (use_eager_fpu()) {
|
||||
__save_fpu(tsk);
|
||||
return;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
__save_init_fpu(tsk);
|
||||
__thread_fpu_end(tsk);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void clear_fpu(struct task_struct *tsk)
|
||||
{
|
||||
preempt_disable();
|
||||
__clear_fpu(tsk);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* i387 state interaction
|
||||
*/
|
||||
|
@ -510,11 +549,34 @@ static inline void fpu_free(struct fpu *fpu)
|
|||
}
|
||||
}
|
||||
|
||||
static inline void fpu_copy(struct fpu *dst, struct fpu *src)
|
||||
static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
|
||||
{
|
||||
memcpy(dst->state, src->state, xstate_size);
|
||||
if (use_eager_fpu()) {
|
||||
memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
|
||||
__save_fpu(dst);
|
||||
} else {
|
||||
struct fpu *dfpu = &dst->thread.fpu;
|
||||
struct fpu *sfpu = &src->thread.fpu;
|
||||
|
||||
unlazy_fpu(src);
|
||||
memcpy(dfpu->state, sfpu->state, xstate_size);
|
||||
}
|
||||
}
|
||||
|
||||
extern void fpu_finit(struct fpu *fpu);
|
||||
static inline unsigned long
|
||||
alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
|
||||
unsigned long *size)
|
||||
{
|
||||
unsigned long frame_size = xstate_sigframe_size();
|
||||
|
||||
*buf_fx = sp = round_down(sp - frame_size, 64);
|
||||
if (ia32_frame && use_fxsr()) {
|
||||
frame_size += sizeof(struct i387_fsave_struct);
|
||||
sp -= sizeof(struct i387_fsave_struct);
|
||||
}
|
||||
|
||||
*size = frame_size;
|
||||
return sp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -19,12 +19,37 @@ struct pt_regs;
|
|||
struct user_i387_struct;
|
||||
|
||||
extern int init_fpu(struct task_struct *child);
|
||||
extern void fpu_finit(struct fpu *fpu);
|
||||
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
|
||||
extern void math_state_restore(void);
|
||||
|
||||
extern bool irq_fpu_usable(void);
|
||||
extern void kernel_fpu_begin(void);
|
||||
extern void kernel_fpu_end(void);
|
||||
|
||||
/*
|
||||
* Careful: __kernel_fpu_begin/end() must be called with preempt disabled
|
||||
* and they don't touch the preempt state on their own.
|
||||
* If you enable preemption after __kernel_fpu_begin(), preempt notifier
|
||||
* should call the __kernel_fpu_end() to prevent the kernel/user FPU
|
||||
* state from getting corrupted. KVM for example uses this model.
|
||||
*
|
||||
* All other cases use kernel_fpu_begin/end() which disable preemption
|
||||
* during kernel FPU usage.
|
||||
*/
|
||||
extern void __kernel_fpu_begin(void);
|
||||
extern void __kernel_fpu_end(void);
|
||||
|
||||
static inline void kernel_fpu_begin(void)
|
||||
{
|
||||
WARN_ON_ONCE(!irq_fpu_usable());
|
||||
preempt_disable();
|
||||
__kernel_fpu_begin();
|
||||
}
|
||||
|
||||
static inline void kernel_fpu_end(void)
|
||||
{
|
||||
__kernel_fpu_end();
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Some instructions like VIA's padlock instructions generate a spurious
|
||||
|
|
|
@ -31,6 +31,10 @@ typedef struct {
|
|||
unsigned long sig[_NSIG_WORDS];
|
||||
} sigset_t;
|
||||
|
||||
#ifndef CONFIG_COMPAT
|
||||
typedef sigset_t compat_sigset_t;
|
||||
#endif
|
||||
|
||||
#else
|
||||
/* Here we must cater to libcs that poke about in kernel headers. */
|
||||
|
||||
|
|
|
@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
|
|||
* Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
|
||||
*/
|
||||
|
||||
#define XMMS_SAVE \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
cr0 = read_cr0(); \
|
||||
clts(); \
|
||||
asm volatile( \
|
||||
"movups %%xmm0,(%0) ;\n\t" \
|
||||
"movups %%xmm1,0x10(%0) ;\n\t" \
|
||||
"movups %%xmm2,0x20(%0) ;\n\t" \
|
||||
"movups %%xmm3,0x30(%0) ;\n\t" \
|
||||
: \
|
||||
: "r" (xmm_save) \
|
||||
: "memory"); \
|
||||
} while (0)
|
||||
|
||||
#define XMMS_RESTORE \
|
||||
do { \
|
||||
asm volatile( \
|
||||
"sfence ;\n\t" \
|
||||
"movups (%0),%%xmm0 ;\n\t" \
|
||||
"movups 0x10(%0),%%xmm1 ;\n\t" \
|
||||
"movups 0x20(%0),%%xmm2 ;\n\t" \
|
||||
"movups 0x30(%0),%%xmm3 ;\n\t" \
|
||||
: \
|
||||
: "r" (xmm_save) \
|
||||
: "memory"); \
|
||||
write_cr0(cr0); \
|
||||
preempt_enable(); \
|
||||
} while (0)
|
||||
|
||||
#define ALIGN16 __attribute__((aligned(16)))
|
||||
|
||||
#define OFFS(x) "16*("#x")"
|
||||
#define PF_OFFS(x) "256+16*("#x")"
|
||||
#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
|
||||
|
@ -587,10 +555,8 @@ static void
|
|||
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
|
||||
{
|
||||
unsigned long lines = bytes >> 8;
|
||||
char xmm_save[16*4] ALIGN16;
|
||||
int cr0;
|
||||
|
||||
XMMS_SAVE;
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile(
|
||||
#undef BLOCK
|
||||
|
@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
|
|||
:
|
||||
: "memory");
|
||||
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
unsigned long *p3)
|
||||
{
|
||||
unsigned long lines = bytes >> 8;
|
||||
char xmm_save[16*4] ALIGN16;
|
||||
int cr0;
|
||||
|
||||
XMMS_SAVE;
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile(
|
||||
#undef BLOCK
|
||||
|
@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
:
|
||||
: "memory" );
|
||||
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
unsigned long *p3, unsigned long *p4)
|
||||
{
|
||||
unsigned long lines = bytes >> 8;
|
||||
char xmm_save[16*4] ALIGN16;
|
||||
int cr0;
|
||||
|
||||
XMMS_SAVE;
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile(
|
||||
#undef BLOCK
|
||||
|
@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
:
|
||||
: "memory" );
|
||||
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
unsigned long *p3, unsigned long *p4, unsigned long *p5)
|
||||
{
|
||||
unsigned long lines = bytes >> 8;
|
||||
char xmm_save[16*4] ALIGN16;
|
||||
int cr0;
|
||||
|
||||
XMMS_SAVE;
|
||||
kernel_fpu_begin();
|
||||
|
||||
/* Make sure GCC forgets anything it knows about p4 or p5,
|
||||
such that it won't pass to the asm volatile below a
|
||||
|
@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
like assuming they have some legal value. */
|
||||
asm("" : "=r" (p4), "=r" (p5));
|
||||
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static struct xor_block_template xor_block_pIII_sse = {
|
||||
|
|
|
@ -34,41 +34,7 @@
|
|||
* no advantages to be gotten from x86-64 here anyways.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
unsigned long a, b;
|
||||
} __attribute__((aligned(16))) xmm_store_t;
|
||||
|
||||
/* Doesn't use gcc to save the XMM registers, because there is no easy way to
|
||||
tell it to do a clts before the register saving. */
|
||||
#define XMMS_SAVE \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
asm volatile( \
|
||||
"movq %%cr0,%0 ;\n\t" \
|
||||
"clts ;\n\t" \
|
||||
"movups %%xmm0,(%1) ;\n\t" \
|
||||
"movups %%xmm1,0x10(%1) ;\n\t" \
|
||||
"movups %%xmm2,0x20(%1) ;\n\t" \
|
||||
"movups %%xmm3,0x30(%1) ;\n\t" \
|
||||
: "=&r" (cr0) \
|
||||
: "r" (xmm_save) \
|
||||
: "memory"); \
|
||||
} while (0)
|
||||
|
||||
#define XMMS_RESTORE \
|
||||
do { \
|
||||
asm volatile( \
|
||||
"sfence ;\n\t" \
|
||||
"movups (%1),%%xmm0 ;\n\t" \
|
||||
"movups 0x10(%1),%%xmm1 ;\n\t" \
|
||||
"movups 0x20(%1),%%xmm2 ;\n\t" \
|
||||
"movups 0x30(%1),%%xmm3 ;\n\t" \
|
||||
"movq %0,%%cr0 ;\n\t" \
|
||||
: \
|
||||
: "r" (cr0), "r" (xmm_save) \
|
||||
: "memory"); \
|
||||
preempt_enable(); \
|
||||
} while (0)
|
||||
#include <asm/i387.h>
|
||||
|
||||
#define OFFS(x) "16*("#x")"
|
||||
#define PF_OFFS(x) "256+16*("#x")"
|
||||
|
@ -91,10 +57,8 @@ static void
|
|||
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
|
||||
{
|
||||
unsigned int lines = bytes >> 8;
|
||||
unsigned long cr0;
|
||||
xmm_store_t xmm_save[4];
|
||||
|
||||
XMMS_SAVE;
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile(
|
||||
#undef BLOCK
|
||||
|
@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
|
|||
: [inc] "r" (256UL)
|
||||
: "memory");
|
||||
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
unsigned long *p3)
|
||||
{
|
||||
unsigned int lines = bytes >> 8;
|
||||
xmm_store_t xmm_save[4];
|
||||
unsigned long cr0;
|
||||
|
||||
XMMS_SAVE;
|
||||
|
||||
kernel_fpu_begin();
|
||||
asm volatile(
|
||||
#undef BLOCK
|
||||
#define BLOCK(i) \
|
||||
|
@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
[p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
|
||||
: [inc] "r" (256UL)
|
||||
: "memory");
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
unsigned long *p3, unsigned long *p4)
|
||||
{
|
||||
unsigned int lines = bytes >> 8;
|
||||
xmm_store_t xmm_save[4];
|
||||
unsigned long cr0;
|
||||
|
||||
XMMS_SAVE;
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile(
|
||||
#undef BLOCK
|
||||
|
@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
: [inc] "r" (256UL)
|
||||
: "memory" );
|
||||
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
unsigned long *p3, unsigned long *p4, unsigned long *p5)
|
||||
{
|
||||
unsigned int lines = bytes >> 8;
|
||||
xmm_store_t xmm_save[4];
|
||||
unsigned long cr0;
|
||||
|
||||
XMMS_SAVE;
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile(
|
||||
#undef BLOCK
|
||||
|
@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
|
|||
: [inc] "r" (256UL)
|
||||
: "memory");
|
||||
|
||||
XMMS_RESTORE;
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static struct xor_block_template xor_block_sse = {
|
||||
|
|
|
@ -20,32 +20,6 @@
|
|||
#include <linux/compiler.h>
|
||||
#include <asm/i387.h>
|
||||
|
||||
#define ALIGN32 __aligned(32)
|
||||
|
||||
#define YMM_SAVED_REGS 4
|
||||
|
||||
#define YMMS_SAVE \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
cr0 = read_cr0(); \
|
||||
clts(); \
|
||||
asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
|
||||
asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
|
||||
asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
|
||||
asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
|
||||
} while (0);
|
||||
|
||||
#define YMMS_RESTORE \
|
||||
do { \
|
||||
asm volatile("sfence" : : : "memory"); \
|
||||
asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
|
||||
asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
|
||||
asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
|
||||
asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
|
||||
write_cr0(cr0); \
|
||||
preempt_enable(); \
|
||||
} while (0);
|
||||
|
||||
#define BLOCK4(i) \
|
||||
BLOCK(32 * i, 0) \
|
||||
BLOCK(32 * (i + 1), 1) \
|
||||
|
@ -60,10 +34,9 @@ do { \
|
|||
|
||||
static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
unsigned long lines = bytes >> 9;
|
||||
|
||||
YMMS_SAVE
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
|
@ -82,16 +55,15 @@ do { \
|
|||
p1 = (unsigned long *)((uintptr_t)p1 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
|
||||
unsigned long *p2)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
unsigned long lines = bytes >> 9;
|
||||
|
||||
YMMS_SAVE
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
|
@ -113,16 +85,15 @@ do { \
|
|||
p2 = (unsigned long *)((uintptr_t)p2 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
|
||||
unsigned long *p2, unsigned long *p3)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
unsigned long lines = bytes >> 9;
|
||||
|
||||
YMMS_SAVE
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
|
@ -147,16 +118,15 @@ do { \
|
|||
p3 = (unsigned long *)((uintptr_t)p3 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
|
||||
unsigned long *p2, unsigned long *p3, unsigned long *p4)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
unsigned long lines = bytes >> 9;
|
||||
|
||||
YMMS_SAVE
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
|
@ -184,7 +154,7 @@ do { \
|
|||
p4 = (unsigned long *)((uintptr_t)p4 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static struct xor_block_template xor_block_avx = {
|
||||
|
|
|
@ -34,17 +34,14 @@
|
|||
extern unsigned int xstate_size;
|
||||
extern u64 pcntxt_mask;
|
||||
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
||||
extern struct xsave_struct *init_xstate_buf;
|
||||
|
||||
extern void xsave_init(void);
|
||||
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
|
||||
extern int init_fpu(struct task_struct *child);
|
||||
extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
|
||||
void __user *fpstate,
|
||||
struct _fpx_sw_bytes *sw);
|
||||
|
||||
static inline int fpu_xrstor_checking(struct fpu *fpu)
|
||||
static inline int fpu_xrstor_checking(struct xsave_struct *fx)
|
||||
{
|
||||
struct xsave_struct *fx = &fpu->state->xsave;
|
||||
int err;
|
||||
|
||||
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
|
||||
|
@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
|
|||
* Clear the xsave header first, so that reserved fields are
|
||||
* initialized to zero.
|
||||
*/
|
||||
err = __clear_user(&buf->xsave_hdr,
|
||||
sizeof(struct xsave_hdr_struct));
|
||||
err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
|
||||
if (unlikely(err))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf)
|
|||
: [err] "=r" (err)
|
||||
: "D" (buf), "a" (-1), "d" (-1), "0" (0)
|
||||
: "memory");
|
||||
if (unlikely(err) && __clear_user(buf, xstate_size))
|
||||
err = -EFAULT;
|
||||
/* No need to clear here because the caller clears USED_MATH */
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
@ -165,10 +165,15 @@ void __init check_bugs(void)
|
|||
print_cpu_info(&boot_cpu_data);
|
||||
#endif
|
||||
check_config();
|
||||
check_fpu();
|
||||
check_hlt();
|
||||
check_popad();
|
||||
init_utsname()->machine[1] =
|
||||
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
||||
alternative_instructions();
|
||||
|
||||
/*
|
||||
* kernel_fpu_begin/end() in check_fpu() relies on the patched
|
||||
* alternative instructions.
|
||||
*/
|
||||
check_fpu();
|
||||
}
|
||||
|
|
|
@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void)
|
|||
dbg_restore_debug_regs();
|
||||
|
||||
fpu_init();
|
||||
xsave_init();
|
||||
|
||||
if (is_uv_system())
|
||||
uv_cpu_init();
|
||||
|
@ -1350,6 +1349,5 @@ void __cpuinit cpu_init(void)
|
|||
dbg_restore_debug_regs();
|
||||
|
||||
fpu_init();
|
||||
xsave_init();
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -19,24 +19,17 @@
|
|||
#include <asm/fpu-internal.h>
|
||||
#include <asm/user.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/sigcontext32.h>
|
||||
# include <asm/user32.h>
|
||||
#else
|
||||
# define save_i387_xstate_ia32 save_i387_xstate
|
||||
# define restore_i387_xstate_ia32 restore_i387_xstate
|
||||
# define _fpstate_ia32 _fpstate
|
||||
# define _xstate_ia32 _xstate
|
||||
# define sig_xstate_ia32_size sig_xstate_size
|
||||
# define fx_sw_reserved_ia32 fx_sw_reserved
|
||||
# define user_i387_ia32_struct user_i387_struct
|
||||
# define user32_fxsr_struct user_fxsr_struct
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Were we in an interrupt that interrupted kernel mode?
|
||||
*
|
||||
* We can do a kernel_fpu_begin/end() pair *ONLY* if that
|
||||
* For now, with eagerfpu we will return interrupted kernel FPU
|
||||
* state as not-idle. TBD: Ideally we can change the return value
|
||||
* to something like __thread_has_fpu(current). But we need to
|
||||
* be careful of doing __thread_clear_has_fpu() before saving
|
||||
* the FPU etc for supporting nested uses etc. For now, take
|
||||
* the simple route!
|
||||
*
|
||||
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
|
||||
* pair does nothing at all: the thread must not have fpu (so
|
||||
* that we don't try to save the FPU state), and TS must
|
||||
* be set (so that the clts/stts pair does nothing that is
|
||||
|
@ -44,6 +37,9 @@
|
|||
*/
|
||||
static inline bool interrupted_kernel_fpu_idle(void)
|
||||
{
|
||||
if (use_eager_fpu())
|
||||
return 0;
|
||||
|
||||
return !__thread_has_fpu(current) &&
|
||||
(read_cr0() & X86_CR0_TS);
|
||||
}
|
||||
|
@ -77,29 +73,29 @@ bool irq_fpu_usable(void)
|
|||
}
|
||||
EXPORT_SYMBOL(irq_fpu_usable);
|
||||
|
||||
void kernel_fpu_begin(void)
|
||||
void __kernel_fpu_begin(void)
|
||||
{
|
||||
struct task_struct *me = current;
|
||||
|
||||
WARN_ON_ONCE(!irq_fpu_usable());
|
||||
preempt_disable();
|
||||
if (__thread_has_fpu(me)) {
|
||||
__save_init_fpu(me);
|
||||
__thread_clear_has_fpu(me);
|
||||
/* We do 'stts()' in kernel_fpu_end() */
|
||||
} else {
|
||||
/* We do 'stts()' in __kernel_fpu_end() */
|
||||
} else if (!use_eager_fpu()) {
|
||||
this_cpu_write(fpu_owner_task, NULL);
|
||||
clts();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_fpu_begin);
|
||||
EXPORT_SYMBOL(__kernel_fpu_begin);
|
||||
|
||||
void kernel_fpu_end(void)
|
||||
void __kernel_fpu_end(void)
|
||||
{
|
||||
stts();
|
||||
preempt_enable();
|
||||
if (use_eager_fpu())
|
||||
math_state_restore();
|
||||
else
|
||||
stts();
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_fpu_end);
|
||||
EXPORT_SYMBOL(__kernel_fpu_end);
|
||||
|
||||
void unlazy_fpu(struct task_struct *tsk)
|
||||
{
|
||||
|
@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk)
|
|||
}
|
||||
EXPORT_SYMBOL(unlazy_fpu);
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
# define HAVE_HWFP (boot_cpu_data.hard_math)
|
||||
#else
|
||||
# define HAVE_HWFP 1
|
||||
#endif
|
||||
|
||||
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
|
||||
unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
|
||||
unsigned int xstate_size;
|
||||
EXPORT_SYMBOL_GPL(xstate_size);
|
||||
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
|
||||
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
|
||||
|
||||
static void __cpuinit mxcsr_feature_mask_init(void)
|
||||
{
|
||||
unsigned long mask = 0;
|
||||
|
||||
clts();
|
||||
if (cpu_has_fxsr) {
|
||||
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
|
||||
asm volatile("fxsave %0" : : "m" (fx_scratch));
|
||||
|
@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
|
|||
mask = 0x0000ffbf;
|
||||
}
|
||||
mxcsr_feature_mask &= mask;
|
||||
stts();
|
||||
}
|
||||
|
||||
static void __cpuinit init_thread_xstate(void)
|
||||
|
@ -192,9 +179,8 @@ void __cpuinit fpu_init(void)
|
|||
init_thread_xstate();
|
||||
|
||||
mxcsr_feature_mask_init();
|
||||
/* clean state in init */
|
||||
current_thread_info()->status = 0;
|
||||
clear_used_math();
|
||||
xsave_init();
|
||||
eager_fpu_init();
|
||||
}
|
||||
|
||||
void fpu_finit(struct fpu *fpu)
|
||||
|
@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu)
|
|||
}
|
||||
|
||||
if (cpu_has_fxsr) {
|
||||
struct i387_fxsave_struct *fx = &fpu->state->fxsave;
|
||||
|
||||
memset(fx, 0, xstate_size);
|
||||
fx->cwd = 0x37f;
|
||||
if (cpu_has_xmm)
|
||||
fx->mxcsr = MXCSR_DEFAULT;
|
||||
fx_finit(&fpu->state->fxsave);
|
||||
} else {
|
||||
struct i387_fsave_struct *fp = &fpu->state->fsave;
|
||||
memset(fp, 0, xstate_size);
|
||||
|
@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
|
|||
* FXSR floating point environment conversions.
|
||||
*/
|
||||
|
||||
static void
|
||||
void
|
||||
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
|
||||
{
|
||||
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
|
||||
|
@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
|
|||
memcpy(&to[i], &from[i], sizeof(to[0]));
|
||||
}
|
||||
|
||||
static void convert_to_fxsr(struct task_struct *tsk,
|
||||
const struct user_i387_ia32_struct *env)
|
||||
void convert_to_fxsr(struct task_struct *tsk,
|
||||
const struct user_i387_ia32_struct *env)
|
||||
|
||||
{
|
||||
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
|
||||
|
@ -588,223 +569,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Signal frame handlers.
|
||||
*/
|
||||
|
||||
static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
|
||||
|
||||
fp->status = fp->swd;
|
||||
if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
|
||||
struct user_i387_ia32_struct env;
|
||||
int err = 0;
|
||||
|
||||
convert_from_fxsr(&env, tsk);
|
||||
if (__copy_to_user(buf, &env, sizeof(env)))
|
||||
return -1;
|
||||
|
||||
err |= __put_user(fx->swd, &buf->status);
|
||||
err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int save_i387_xsave(void __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct _fpstate_ia32 __user *fx = buf;
|
||||
int err = 0;
|
||||
|
||||
|
||||
sanitize_i387_state(tsk);
|
||||
|
||||
/*
|
||||
* For legacy compatible, we always set FP/SSE bits in the bit
|
||||
* vector while saving the state to the user context.
|
||||
* This will enable us capturing any changes(during sigreturn) to
|
||||
* the FP/SSE bits by the legacy applications which don't touch
|
||||
* xstate_bv in the xsave header.
|
||||
*
|
||||
* xsave aware applications can change the xstate_bv in the xsave
|
||||
* header as well as change any contents in the memory layout.
|
||||
* xrestore as part of sigreturn will capture all the changes.
|
||||
*/
|
||||
tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
|
||||
|
||||
if (save_i387_fxsave(fx) < 0)
|
||||
return -1;
|
||||
|
||||
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
err |= __put_user(FP_XSTATE_MAGIC2,
|
||||
(__u32 __user *) (buf + sig_xstate_ia32_size
|
||||
- FP_XSTATE_MAGIC2_SIZE));
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int save_i387_xstate_ia32(void __user *buf)
|
||||
{
|
||||
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (!used_math())
|
||||
return 0;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
|
||||
return -EACCES;
|
||||
/*
|
||||
* This will cause a "finit" to be triggered by the next
|
||||
* attempted FPU operation by the 'current' process.
|
||||
*/
|
||||
clear_used_math();
|
||||
|
||||
if (!HAVE_HWFP) {
|
||||
return fpregs_soft_get(current, NULL,
|
||||
0, sizeof(struct user_i387_ia32_struct),
|
||||
NULL, fp) ? -1 : 1;
|
||||
}
|
||||
|
||||
unlazy_fpu(tsk);
|
||||
|
||||
if (cpu_has_xsave)
|
||||
return save_i387_xsave(fp);
|
||||
if (cpu_has_fxsr)
|
||||
return save_i387_fxsave(fp);
|
||||
else
|
||||
return save_i387_fsave(fp);
|
||||
}
|
||||
|
||||
static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
|
||||
sizeof(struct i387_fsave_struct));
|
||||
}
|
||||
|
||||
static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
|
||||
unsigned int size)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct user_i387_ia32_struct env;
|
||||
int err;
|
||||
|
||||
err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
|
||||
size);
|
||||
/* mxcsr reserved bits must be masked to zero for security reasons */
|
||||
tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
|
||||
if (err || __copy_from_user(&env, buf, sizeof(env)))
|
||||
return 1;
|
||||
convert_to_fxsr(tsk, &env);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int restore_i387_xsave(void __user *buf)
|
||||
{
|
||||
struct _fpx_sw_bytes fx_sw_user;
|
||||
struct _fpstate_ia32 __user *fx_user =
|
||||
((struct _fpstate_ia32 __user *) buf);
|
||||
struct i387_fxsave_struct __user *fx =
|
||||
(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
|
||||
struct xsave_hdr_struct *xsave_hdr =
|
||||
¤t->thread.fpu.state->xsave.xsave_hdr;
|
||||
u64 mask;
|
||||
int err;
|
||||
|
||||
if (check_for_xstate(fx, buf, &fx_sw_user))
|
||||
goto fx_only;
|
||||
|
||||
mask = fx_sw_user.xstate_bv;
|
||||
|
||||
err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
|
||||
|
||||
xsave_hdr->xstate_bv &= pcntxt_mask;
|
||||
/*
|
||||
* These bits must be zero.
|
||||
*/
|
||||
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
|
||||
|
||||
/*
|
||||
* Init the state that is not present in the memory layout
|
||||
* and enabled by the OS.
|
||||
*/
|
||||
mask = ~(pcntxt_mask & ~mask);
|
||||
xsave_hdr->xstate_bv &= mask;
|
||||
|
||||
return err;
|
||||
fx_only:
|
||||
/*
|
||||
* Couldn't find the extended state information in the memory
|
||||
* layout. Restore the FP/SSE and init the other extended state
|
||||
* enabled by the OS.
|
||||
*/
|
||||
xsave_hdr->xstate_bv = XSTATE_FPSSE;
|
||||
return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
|
||||
}
|
||||
|
||||
int restore_i387_xstate_ia32(void __user *buf)
|
||||
{
|
||||
int err;
|
||||
struct task_struct *tsk = current;
|
||||
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
|
||||
|
||||
if (HAVE_HWFP)
|
||||
clear_fpu(tsk);
|
||||
|
||||
if (!buf) {
|
||||
if (used_math()) {
|
||||
clear_fpu(tsk);
|
||||
clear_used_math();
|
||||
}
|
||||
|
||||
return 0;
|
||||
} else
|
||||
if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
|
||||
return -EACCES;
|
||||
|
||||
if (!used_math()) {
|
||||
err = init_fpu(tsk);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (HAVE_HWFP) {
|
||||
if (cpu_has_xsave)
|
||||
err = restore_i387_xsave(buf);
|
||||
else if (cpu_has_fxsr)
|
||||
err = restore_i387_fxsave(fp, sizeof(struct
|
||||
i387_fxsave_struct));
|
||||
else
|
||||
err = restore_i387_fsave(fp);
|
||||
} else {
|
||||
err = fpregs_soft_set(current, NULL,
|
||||
0, sizeof(struct user_i387_ia32_struct),
|
||||
NULL, fp) != 0;
|
||||
}
|
||||
set_used_math();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* FPU state for core dumps.
|
||||
* This is only used for a.out dumps now.
|
||||
|
|
|
@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|||
{
|
||||
int ret;
|
||||
|
||||
unlazy_fpu(src);
|
||||
|
||||
*dst = *src;
|
||||
if (fpu_allocated(&src->thread.fpu)) {
|
||||
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
|
||||
ret = fpu_alloc(&dst->thread.fpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
fpu_copy(&dst->thread.fpu, &src->thread.fpu);
|
||||
fpu_copy(dst, src);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -97,16 +95,6 @@ void arch_task_cache_init(void)
|
|||
SLAB_PANIC | SLAB_NOTRACK, NULL);
|
||||
}
|
||||
|
||||
static inline void drop_fpu(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Forget coprocessor state..
|
||||
*/
|
||||
tsk->fpu_counter = 0;
|
||||
clear_fpu(tsk);
|
||||
clear_used_math();
|
||||
}
|
||||
|
||||
/*
|
||||
* Free current thread data structures etc..
|
||||
*/
|
||||
|
@ -163,7 +151,13 @@ void flush_thread(void)
|
|||
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
||||
drop_fpu(tsk);
|
||||
drop_init_fpu(tsk);
|
||||
/*
|
||||
* Free the FPU state for non xsave platforms. They get reallocated
|
||||
* lazily at the first use.
|
||||
*/
|
||||
if (!use_eager_fpu())
|
||||
free_thread_xstate(tsk);
|
||||
}
|
||||
|
||||
static void hard_disable_TSC(void)
|
||||
|
|
|
@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|||
regs->cs = __USER_CS;
|
||||
regs->ip = new_ip;
|
||||
regs->sp = new_sp;
|
||||
/*
|
||||
* Free the old FP and other extended state
|
||||
*/
|
||||
free_thread_xstate(current);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(start_thread);
|
||||
|
||||
|
|
|
@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
|
|||
regs->cs = _cs;
|
||||
regs->ss = _ss;
|
||||
regs->flags = X86_EFLAGS_IF;
|
||||
/*
|
||||
* Free the old FP and other extended state
|
||||
*/
|
||||
free_thread_xstate(current);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -1333,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
|
|||
#define genregs32_get genregs_get
|
||||
#define genregs32_set genregs_set
|
||||
|
||||
#define user_i387_ia32_struct user_i387_struct
|
||||
#define user32_fxsr_struct user_fxsr_struct
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
|
|
|
@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
|
|||
regs->orig_ax = -1; /* disable syscall checks */
|
||||
|
||||
get_user_ex(buf, &sc->fpstate);
|
||||
err |= restore_i387_xstate(buf);
|
||||
err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
|
||||
|
||||
get_user_ex(*pax, &sc->ax);
|
||||
} get_user_catch(err);
|
||||
|
@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
|
|||
void __user **fpstate)
|
||||
{
|
||||
/* Default to using normal stack */
|
||||
unsigned long math_size = 0;
|
||||
unsigned long sp = regs->sp;
|
||||
unsigned long buf_fx = 0;
|
||||
int onsigstack = on_sig_stack(sp);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* redzone */
|
||||
sp -= 128;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
if (config_enabled(CONFIG_X86_64))
|
||||
sp -= 128;
|
||||
|
||||
if (!onsigstack) {
|
||||
/* This is the X/Open sanctioned signal stack switching. */
|
||||
if (ka->sa.sa_flags & SA_ONSTACK) {
|
||||
if (current->sas_ss_size)
|
||||
sp = current->sas_ss_sp + current->sas_ss_size;
|
||||
} else {
|
||||
#ifdef CONFIG_X86_32
|
||||
/* This is the legacy signal stack switching. */
|
||||
if ((regs->ss & 0xffff) != __USER_DS &&
|
||||
!(ka->sa.sa_flags & SA_RESTORER) &&
|
||||
ka->sa.sa_restorer)
|
||||
} else if (config_enabled(CONFIG_X86_32) &&
|
||||
(regs->ss & 0xffff) != __USER_DS &&
|
||||
!(ka->sa.sa_flags & SA_RESTORER) &&
|
||||
ka->sa.sa_restorer) {
|
||||
/* This is the legacy signal stack switching. */
|
||||
sp = (unsigned long) ka->sa.sa_restorer;
|
||||
#endif /* CONFIG_X86_32 */
|
||||
}
|
||||
}
|
||||
|
||||
if (used_math()) {
|
||||
sp -= sig_xstate_size;
|
||||
#ifdef CONFIG_X86_64
|
||||
sp = round_down(sp, 64);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
|
||||
&buf_fx, &math_size);
|
||||
*fpstate = (void __user *)sp;
|
||||
}
|
||||
|
||||
|
@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
|
|||
if (onsigstack && !likely(on_sig_stack(sp)))
|
||||
return (void __user *)-1L;
|
||||
|
||||
/* save i387 state */
|
||||
if (used_math() && save_i387_xstate(*fpstate) < 0)
|
||||
/* save i387 and extended state */
|
||||
if (used_math() &&
|
||||
save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
|
||||
return (void __user *)-1L;
|
||||
|
||||
return (void __user *)sp;
|
||||
|
@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
|||
}
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
|
||||
siginfo_t *info, compat_sigset_t *set,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
void __user *restorer;
|
||||
int err = 0;
|
||||
void __user *fpstate = NULL;
|
||||
|
||||
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
|
||||
return -EFAULT;
|
||||
|
||||
if (ka->sa.sa_flags & SA_SIGINFO) {
|
||||
if (copy_siginfo_to_user32(&frame->info, info))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
put_user_try {
|
||||
/* Create the ucontext. */
|
||||
if (cpu_has_xsave)
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
put_user_ex(0, &frame->uc.uc__pad0);
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
|
||||
regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
if (ka->sa.sa_flags & SA_RESTORER) {
|
||||
restorer = ka->sa.sa_restorer;
|
||||
} else {
|
||||
/* could use a vstub here */
|
||||
restorer = NULL;
|
||||
err |= -EFAULT;
|
||||
}
|
||||
put_user_ex(restorer, &frame->pretcode);
|
||||
} put_user_catch(err);
|
||||
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
||||
/* Set up registers for signal handler */
|
||||
regs->sp = (unsigned long) frame;
|
||||
regs->ip = (unsigned long) ka->sa.sa_handler;
|
||||
|
||||
/* We use the x32 calling convention here... */
|
||||
regs->di = sig;
|
||||
regs->si = (unsigned long) &frame->info;
|
||||
regs->dx = (unsigned long) &frame->uc;
|
||||
|
||||
loadsegment(ds, __USER_DS);
|
||||
loadsegment(es, __USER_DS);
|
||||
|
||||
regs->cs = __USER_CS;
|
||||
regs->ss = __USER_DS;
|
||||
#endif /* CONFIG_X86_X32_ABI */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Atomically swap in the new signal mask, and wait for a signal.
|
||||
|
@ -612,55 +678,22 @@ static int signr_convert(int sig)
|
|||
return sig;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
||||
#define is_ia32 1
|
||||
#define ia32_setup_frame __setup_frame
|
||||
#define ia32_setup_rt_frame __setup_rt_frame
|
||||
|
||||
#else /* !CONFIG_X86_32 */
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#define is_ia32 test_thread_flag(TIF_IA32)
|
||||
#else /* !CONFIG_IA32_EMULATION */
|
||||
#define is_ia32 0
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
#define is_x32 test_thread_flag(TIF_X32)
|
||||
|
||||
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
|
||||
siginfo_t *info, compat_sigset_t *set,
|
||||
struct pt_regs *regs);
|
||||
#else /* !CONFIG_X86_X32_ABI */
|
||||
#define is_x32 0
|
||||
#endif /* CONFIG_X86_X32_ABI */
|
||||
|
||||
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
sigset_t *set, struct pt_regs *regs);
|
||||
int ia32_setup_frame(int sig, struct k_sigaction *ka,
|
||||
sigset_t *set, struct pt_regs *regs);
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
static int
|
||||
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
int usig = signr_convert(sig);
|
||||
sigset_t *set = sigmask_to_save();
|
||||
compat_sigset_t *cset = (compat_sigset_t *) set;
|
||||
|
||||
/* Set up the stack frame */
|
||||
if (is_ia32) {
|
||||
if (is_ia32_frame()) {
|
||||
if (ka->sa.sa_flags & SA_SIGINFO)
|
||||
return ia32_setup_rt_frame(usig, ka, info, set, regs);
|
||||
return ia32_setup_rt_frame(usig, ka, info, cset, regs);
|
||||
else
|
||||
return ia32_setup_frame(usig, ka, set, regs);
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
} else if (is_x32) {
|
||||
return x32_setup_rt_frame(usig, ka, info,
|
||||
(compat_sigset_t *)set, regs);
|
||||
#endif
|
||||
return ia32_setup_frame(usig, ka, cset, regs);
|
||||
} else if (is_x32_frame()) {
|
||||
return x32_setup_rt_frame(usig, ka, info, cset, regs);
|
||||
} else {
|
||||
return __setup_rt_frame(sig, ka, info, set, regs);
|
||||
}
|
||||
|
@ -828,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
|
||||
siginfo_t *info, compat_sigset_t *set,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
void __user *restorer;
|
||||
int err = 0;
|
||||
void __user *fpstate = NULL;
|
||||
|
||||
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
|
||||
return -EFAULT;
|
||||
|
||||
if (ka->sa.sa_flags & SA_SIGINFO) {
|
||||
if (copy_siginfo_to_user32(&frame->info, info))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
put_user_try {
|
||||
/* Create the ucontext. */
|
||||
if (cpu_has_xsave)
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
put_user_ex(0, &frame->uc.uc__pad0);
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
|
||||
regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
if (ka->sa.sa_flags & SA_RESTORER) {
|
||||
restorer = ka->sa.sa_restorer;
|
||||
} else {
|
||||
/* could use a vstub here */
|
||||
restorer = NULL;
|
||||
err |= -EFAULT;
|
||||
}
|
||||
put_user_ex(restorer, &frame->pretcode);
|
||||
} put_user_catch(err);
|
||||
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
||||
/* Set up registers for signal handler */
|
||||
regs->sp = (unsigned long) frame;
|
||||
regs->ip = (unsigned long) ka->sa.sa_handler;
|
||||
|
||||
/* We use the x32 calling convention here... */
|
||||
regs->di = sig;
|
||||
regs->si = (unsigned long) &frame->info;
|
||||
regs->dx = (unsigned long) &frame->uc;
|
||||
|
||||
loadsegment(ds, __USER_DS);
|
||||
loadsegment(es, __USER_DS);
|
||||
|
||||
regs->cs = __USER_CS;
|
||||
regs->ss = __USER_DS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
|
||||
{
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
|
|
|
@ -628,11 +628,12 @@ void math_state_restore(void)
|
|||
}
|
||||
|
||||
__thread_fpu_begin(tsk);
|
||||
|
||||
/*
|
||||
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
||||
*/
|
||||
if (unlikely(restore_fpu_checking(tsk))) {
|
||||
__thread_fpu_end(tsk);
|
||||
drop_init_fpu(tsk);
|
||||
force_sig(SIGSEGV, tsk);
|
||||
return;
|
||||
}
|
||||
|
@ -645,6 +646,8 @@ dotraplinkage void __kprobes
|
|||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
BUG_ON(use_eager_fpu());
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
if (read_cr0() & X86_CR0_EM) {
|
||||
struct math_emu_info info = { };
|
||||
|
|
|
@ -10,9 +10,7 @@
|
|||
#include <linux/compat.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/fpu-internal.h>
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#include <asm/sigcontext32.h>
|
||||
#endif
|
||||
#include <asm/sigframe.h>
|
||||
#include <asm/xcr.h>
|
||||
|
||||
/*
|
||||
|
@ -23,13 +21,9 @@ u64 pcntxt_mask;
|
|||
/*
|
||||
* Represents init state for the supported extended state.
|
||||
*/
|
||||
static struct xsave_struct *init_xstate_buf;
|
||||
|
||||
struct _fpx_sw_bytes fx_sw_reserved;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
struct _fpx_sw_bytes fx_sw_reserved_ia32;
|
||||
#endif
|
||||
struct xsave_struct *init_xstate_buf;
|
||||
|
||||
static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
|
||||
static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
|
||||
|
||||
/*
|
||||
|
@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
|
|||
*/
|
||||
void __sanitize_i387_state(struct task_struct *tsk)
|
||||
{
|
||||
u64 xstate_bv;
|
||||
int feature_bit = 0x2;
|
||||
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
|
||||
int feature_bit = 0x2;
|
||||
u64 xstate_bv;
|
||||
|
||||
if (!fx)
|
||||
return;
|
||||
|
@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk)
|
|||
* Check for the presence of extended state information in the
|
||||
* user fpstate pointer in the sigcontext.
|
||||
*/
|
||||
int check_for_xstate(struct i387_fxsave_struct __user *buf,
|
||||
void __user *fpstate,
|
||||
struct _fpx_sw_bytes *fx_sw_user)
|
||||
static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
|
||||
void __user *fpstate,
|
||||
struct _fpx_sw_bytes *fx_sw)
|
||||
{
|
||||
int min_xstate_size = sizeof(struct i387_fxsave_struct) +
|
||||
sizeof(struct xsave_hdr_struct);
|
||||
unsigned int magic2;
|
||||
int err;
|
||||
|
||||
err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* First Magic check failed.
|
||||
*/
|
||||
if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
|
||||
return -EINVAL;
|
||||
/* Check for the first magic field and other error scenarios. */
|
||||
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
|
||||
fx_sw->xstate_size < min_xstate_size ||
|
||||
fx_sw->xstate_size > xstate_size ||
|
||||
fx_sw->xstate_size > fx_sw->extended_size)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Check for error scenarios.
|
||||
*/
|
||||
if (fx_sw_user->xstate_size < min_xstate_size ||
|
||||
fx_sw_user->xstate_size > xstate_size ||
|
||||
fx_sw_user->xstate_size > fx_sw_user->extended_size)
|
||||
return -EINVAL;
|
||||
|
||||
err = __get_user(magic2, (__u32 __user *) (fpstate +
|
||||
fx_sw_user->extended_size -
|
||||
FP_XSTATE_MAGIC2_SIZE));
|
||||
if (err)
|
||||
return err;
|
||||
/*
|
||||
* Check for the presence of second magic word at the end of memory
|
||||
* layout. This detects the case where the user just copied the legacy
|
||||
* fpstate layout with out copying the extended state information
|
||||
* in the memory layout.
|
||||
*/
|
||||
if (magic2 != FP_XSTATE_MAGIC2)
|
||||
return -EFAULT;
|
||||
if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
|
||||
|| magic2 != FP_XSTATE_MAGIC2)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Signal frame handlers.
|
||||
*/
|
||||
|
||||
int save_i387_xstate(void __user *buf)
|
||||
static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
int err = 0;
|
||||
if (use_fxsr()) {
|
||||
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
|
||||
struct user_i387_ia32_struct env;
|
||||
struct _fpstate_ia32 __user *fp = buf;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
|
||||
return -EACCES;
|
||||
convert_from_fxsr(&env, tsk);
|
||||
|
||||
BUG_ON(sig_xstate_size < xstate_size);
|
||||
|
||||
if ((unsigned long)buf % 64)
|
||||
pr_err("%s: bad fpstate %p\n", __func__, buf);
|
||||
|
||||
if (!used_math())
|
||||
return 0;
|
||||
|
||||
if (user_has_fpu()) {
|
||||
if (use_xsave())
|
||||
err = xsave_user(buf);
|
||||
else
|
||||
err = fxsave_user(buf);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
user_fpu_end();
|
||||
if (__copy_to_user(buf, &env, sizeof(env)) ||
|
||||
__put_user(xsave->i387.swd, &fp->status) ||
|
||||
__put_user(X86_FXSR_MAGIC, &fp->magic))
|
||||
return -1;
|
||||
} else {
|
||||
sanitize_i387_state(tsk);
|
||||
if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
|
||||
xstate_size))
|
||||
struct i387_fsave_struct __user *fp = buf;
|
||||
u32 swd;
|
||||
if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
|
||||
return -1;
|
||||
}
|
||||
|
||||
clear_used_math(); /* trigger finit */
|
||||
|
||||
if (use_xsave()) {
|
||||
struct _fpstate __user *fx = buf;
|
||||
struct _xstate __user *x = buf;
|
||||
u64 xstate_bv;
|
||||
|
||||
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
|
||||
err |= __put_user(FP_XSTATE_MAGIC2,
|
||||
(__u32 __user *) (buf + sig_xstate_size
|
||||
- FP_XSTATE_MAGIC2_SIZE));
|
||||
|
||||
/*
|
||||
* Read the xstate_bv which we copied (directly from the cpu or
|
||||
* from the state in task struct) to the user buffers and
|
||||
* set the FP/SSE bits.
|
||||
*/
|
||||
err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
|
||||
|
||||
/*
|
||||
* For legacy compatible, we always set FP/SSE bits in the bit
|
||||
* vector while saving the state to the user context. This will
|
||||
* enable us capturing any changes(during sigreturn) to
|
||||
* the FP/SSE bits by the legacy applications which don't touch
|
||||
* xstate_bv in the xsave header.
|
||||
*
|
||||
* xsave aware apps can change the xstate_bv in the xsave
|
||||
* header as well as change any contents in the memory layout.
|
||||
* xrestore as part of sigreturn will capture all the changes.
|
||||
*/
|
||||
xstate_bv |= XSTATE_FPSSE;
|
||||
|
||||
err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore the extended state if present. Otherwise, restore the FP/SSE
|
||||
* state.
|
||||
*/
|
||||
static int restore_user_xstate(void __user *buf)
|
||||
static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
|
||||
{
|
||||
struct _fpx_sw_bytes fx_sw_user;
|
||||
u64 mask;
|
||||
struct xsave_struct __user *x = buf;
|
||||
struct _fpx_sw_bytes *sw_bytes;
|
||||
u32 xstate_bv;
|
||||
int err;
|
||||
|
||||
if (((unsigned long)buf % 64) ||
|
||||
check_for_xstate(buf, buf, &fx_sw_user))
|
||||
goto fx_only;
|
||||
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
|
||||
sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
|
||||
err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
|
||||
|
||||
mask = fx_sw_user.xstate_bv;
|
||||
|
||||
/*
|
||||
* restore the state passed by the user.
|
||||
*/
|
||||
err = xrestore_user(buf, mask);
|
||||
if (err)
|
||||
if (!use_xsave())
|
||||
return err;
|
||||
|
||||
/*
|
||||
* init the state skipped by the user.
|
||||
*/
|
||||
mask = pcntxt_mask & ~mask;
|
||||
if (unlikely(mask))
|
||||
xrstor_state(init_xstate_buf, mask);
|
||||
err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
|
||||
|
||||
return 0;
|
||||
|
||||
fx_only:
|
||||
/*
|
||||
* couldn't find the extended state information in the
|
||||
* memory layout. Restore just the FP/SSE and init all
|
||||
* the other extended state.
|
||||
* Read the xstate_bv which we copied (directly from the cpu or
|
||||
* from the state in task struct) to the user buffers.
|
||||
*/
|
||||
xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
|
||||
return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
|
||||
err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
|
||||
|
||||
/*
|
||||
* For legacy compatible, we always set FP/SSE bits in the bit
|
||||
* vector while saving the state to the user context. This will
|
||||
* enable us capturing any changes(during sigreturn) to
|
||||
* the FP/SSE bits by the legacy applications which don't touch
|
||||
* xstate_bv in the xsave header.
|
||||
*
|
||||
* xsave aware apps can change the xstate_bv in the xsave
|
||||
* header as well as change any contents in the memory layout.
|
||||
* xrestore as part of sigreturn will capture all the changes.
|
||||
*/
|
||||
xstate_bv |= XSTATE_FPSSE;
|
||||
|
||||
err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int save_user_xstate(struct xsave_struct __user *buf)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (use_xsave())
|
||||
err = xsave_user(buf);
|
||||
else if (use_fxsr())
|
||||
err = fxsave_user((struct i387_fxsave_struct __user *) buf);
|
||||
else
|
||||
err = fsave_user((struct i387_fsave_struct __user *) buf);
|
||||
|
||||
if (unlikely(err) && __clear_user(buf, xstate_size))
|
||||
err = -EFAULT;
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* This restores directly out of user space. Exceptions are handled.
|
||||
* Save the fpu, extended register state to the user signal frame.
|
||||
*
|
||||
* 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
|
||||
* state is copied.
|
||||
* 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
|
||||
*
|
||||
* buf == buf_fx for 64-bit frames and 32-bit fsave frame.
|
||||
* buf != buf_fx for 32-bit frames with fxstate.
|
||||
*
|
||||
* If the fpu, extended register state is live, save the state directly
|
||||
* to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
|
||||
* copy the thread's fpu state to the user frame starting at 'buf_fx'.
|
||||
*
|
||||
* If this is a 32-bit frame with fxstate, put a fsave header before
|
||||
* the aligned state at 'buf_fx'.
|
||||
*
|
||||
* For [f]xsave state, update the SW reserved fields in the [f]xsave frame
|
||||
* indicating the absence/presence of the extended state to the user.
|
||||
*/
|
||||
int restore_i387_xstate(void __user *buf)
|
||||
int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
{
|
||||
struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave;
|
||||
struct task_struct *tsk = current;
|
||||
int err = 0;
|
||||
int ia32_fxstate = (buf != buf_fx);
|
||||
|
||||
ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
|
||||
config_enabled(CONFIG_IA32_EMULATION));
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, buf, size))
|
||||
return -EACCES;
|
||||
|
||||
if (!HAVE_HWFP)
|
||||
return fpregs_soft_get(current, NULL, 0,
|
||||
sizeof(struct user_i387_ia32_struct), NULL,
|
||||
(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
|
||||
|
||||
if (user_has_fpu()) {
|
||||
/* Save the live register state to the user directly. */
|
||||
if (save_user_xstate(buf_fx))
|
||||
return -1;
|
||||
/* Update the thread's fxstate to save the fsave header. */
|
||||
if (ia32_fxstate)
|
||||
fpu_fxsave(&tsk->thread.fpu);
|
||||
} else {
|
||||
sanitize_i387_state(tsk);
|
||||
if (__copy_to_user(buf_fx, xsave, xstate_size))
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Save the fsave header for the 32-bit frames. */
|
||||
if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
|
||||
return -1;
|
||||
|
||||
if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
|
||||
return -1;
|
||||
|
||||
drop_init_fpu(tsk); /* trigger finit */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sanitize_restored_xstate(struct task_struct *tsk,
|
||||
struct user_i387_ia32_struct *ia32_env,
|
||||
u64 xstate_bv, int fx_only)
|
||||
{
|
||||
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
|
||||
struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
|
||||
|
||||
if (use_xsave()) {
|
||||
/* These bits must be zero. */
|
||||
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
|
||||
|
||||
/*
|
||||
* Init the state that is not present in the memory
|
||||
* layout and not enabled by the OS.
|
||||
*/
|
||||
if (fx_only)
|
||||
xsave_hdr->xstate_bv = XSTATE_FPSSE;
|
||||
else
|
||||
xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
|
||||
}
|
||||
|
||||
if (use_fxsr()) {
|
||||
/*
|
||||
* mscsr reserved bits must be masked to zero for security
|
||||
* reasons.
|
||||
*/
|
||||
xsave->i387.mxcsr &= mxcsr_feature_mask;
|
||||
|
||||
convert_to_fxsr(tsk, ia32_env);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore the extended state if present. Otherwise, restore the FP/SSE state.
|
||||
*/
|
||||
static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
|
||||
{
|
||||
if (use_xsave()) {
|
||||
if ((unsigned long)buf % 64 || fx_only) {
|
||||
u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
|
||||
xrstor_state(init_xstate_buf, init_bv);
|
||||
return fxrstor_checking((__force void *) buf);
|
||||
} else {
|
||||
u64 init_bv = pcntxt_mask & ~xbv;
|
||||
if (unlikely(init_bv))
|
||||
xrstor_state(init_xstate_buf, init_bv);
|
||||
return xrestore_user(buf, xbv);
|
||||
}
|
||||
} else if (use_fxsr()) {
|
||||
return fxrstor_checking((__force void *) buf);
|
||||
} else
|
||||
return frstor_checking((__force void *) buf);
|
||||
}
|
||||
|
||||
int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
{
|
||||
int ia32_fxstate = (buf != buf_fx);
|
||||
struct task_struct *tsk = current;
|
||||
int state_size = xstate_size;
|
||||
u64 xstate_bv = 0;
|
||||
int fx_only = 0;
|
||||
|
||||
ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
|
||||
config_enabled(CONFIG_IA32_EMULATION));
|
||||
|
||||
if (!buf) {
|
||||
if (used_math())
|
||||
goto clear;
|
||||
drop_init_fpu(tsk);
|
||||
return 0;
|
||||
} else
|
||||
if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
|
||||
return -EACCES;
|
||||
|
||||
if (!used_math()) {
|
||||
err = init_fpu(tsk);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
user_fpu_begin();
|
||||
if (use_xsave())
|
||||
err = restore_user_xstate(buf);
|
||||
else
|
||||
err = fxrstor_checking((__force struct i387_fxsave_struct *)
|
||||
buf);
|
||||
if (unlikely(err)) {
|
||||
if (!access_ok(VERIFY_READ, buf, size))
|
||||
return -EACCES;
|
||||
|
||||
if (!used_math() && init_fpu(tsk))
|
||||
return -1;
|
||||
|
||||
if (!HAVE_HWFP) {
|
||||
return fpregs_soft_set(current, NULL,
|
||||
0, sizeof(struct user_i387_ia32_struct),
|
||||
NULL, buf) != 0;
|
||||
}
|
||||
|
||||
if (use_xsave()) {
|
||||
struct _fpx_sw_bytes fx_sw_user;
|
||||
if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
|
||||
/*
|
||||
* Couldn't find the extended state information in the
|
||||
* memory layout. Restore just the FP/SSE and init all
|
||||
* the other extended state.
|
||||
*/
|
||||
state_size = sizeof(struct i387_fxsave_struct);
|
||||
fx_only = 1;
|
||||
} else {
|
||||
state_size = fx_sw_user.xstate_size;
|
||||
xstate_bv = fx_sw_user.xstate_bv;
|
||||
}
|
||||
}
|
||||
|
||||
if (ia32_fxstate) {
|
||||
/*
|
||||
* Encountered an error while doing the restore from the
|
||||
* user buffer, clear the fpu state.
|
||||
* For 32-bit frames with fxstate, copy the user state to the
|
||||
* thread's fpu state, reconstruct fxstate from the fsave
|
||||
* header. Sanitize the copied state etc.
|
||||
*/
|
||||
clear:
|
||||
clear_fpu(tsk);
|
||||
clear_used_math();
|
||||
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
|
||||
struct user_i387_ia32_struct env;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* Drop the current fpu which clears used_math(). This ensures
|
||||
* that any context-switch during the copy of the new state,
|
||||
* avoids the intermediate state from getting restored/saved.
|
||||
* Thus avoiding the new restored state from getting corrupted.
|
||||
* We will be ready to restore/save the state only after
|
||||
* set_used_math() is again set.
|
||||
*/
|
||||
drop_fpu(tsk);
|
||||
|
||||
if (__copy_from_user(xsave, buf_fx, state_size) ||
|
||||
__copy_from_user(&env, buf, sizeof(env))) {
|
||||
err = -1;
|
||||
} else {
|
||||
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
|
||||
set_used_math();
|
||||
}
|
||||
|
||||
if (use_eager_fpu())
|
||||
math_state_restore();
|
||||
|
||||
return err;
|
||||
} else {
|
||||
/*
|
||||
* For 64-bit frames and 32-bit fsave frames, restore the user
|
||||
* state to the registers directly (with exceptions handled).
|
||||
*/
|
||||
user_fpu_begin();
|
||||
if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
|
||||
drop_init_fpu(tsk);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prepare the SW reserved portion of the fxsave memory layout, indicating
|
||||
|
@ -321,31 +428,22 @@ int restore_i387_xstate(void __user *buf)
|
|||
*/
|
||||
static void prepare_fx_sw_frame(void)
|
||||
{
|
||||
int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
|
||||
FP_XSTATE_MAGIC2_SIZE;
|
||||
int fsave_header_size = sizeof(struct i387_fsave_struct);
|
||||
int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
|
||||
|
||||
sig_xstate_size = sizeof(struct _fpstate) + size_extended;
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
|
||||
#endif
|
||||
|
||||
memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
|
||||
if (config_enabled(CONFIG_X86_32))
|
||||
size += fsave_header_size;
|
||||
|
||||
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
|
||||
fx_sw_reserved.extended_size = sig_xstate_size;
|
||||
fx_sw_reserved.extended_size = size;
|
||||
fx_sw_reserved.xstate_bv = pcntxt_mask;
|
||||
fx_sw_reserved.xstate_size = xstate_size;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned int sig_xstate_size = sizeof(struct _fpstate);
|
||||
#endif
|
||||
if (config_enabled(CONFIG_IA32_EMULATION)) {
|
||||
fx_sw_reserved_ia32 = fx_sw_reserved;
|
||||
fx_sw_reserved_ia32.extended_size += fsave_header_size;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable the extended processor state save/restore feature
|
||||
|
@ -384,19 +482,21 @@ static void __init setup_xstate_features(void)
|
|||
/*
|
||||
* setup the xstate image representing the init state
|
||||
*/
|
||||
static void __init setup_xstate_init(void)
|
||||
static void __init setup_init_fpu_buf(void)
|
||||
{
|
||||
setup_xstate_features();
|
||||
|
||||
/*
|
||||
* Setup init_xstate_buf to represent the init state of
|
||||
* all the features managed by the xsave
|
||||
*/
|
||||
init_xstate_buf = alloc_bootmem_align(xstate_size,
|
||||
__alignof__(struct xsave_struct));
|
||||
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
|
||||
fx_finit(&init_xstate_buf->i387);
|
||||
|
||||
if (!cpu_has_xsave)
|
||||
return;
|
||||
|
||||
setup_xstate_features();
|
||||
|
||||
clts();
|
||||
/*
|
||||
* Init all the features state with header_bv being 0x0
|
||||
*/
|
||||
|
@ -406,9 +506,21 @@ static void __init setup_xstate_init(void)
|
|||
* of any feature which is not represented by all zero's.
|
||||
*/
|
||||
xsave_state(init_xstate_buf, -1);
|
||||
stts();
|
||||
}
|
||||
|
||||
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
|
||||
static int __init eager_fpu_setup(char *s)
|
||||
{
|
||||
if (!strcmp(s, "on"))
|
||||
eagerfpu = ENABLE;
|
||||
else if (!strcmp(s, "off"))
|
||||
eagerfpu = DISABLE;
|
||||
else if (!strcmp(s, "auto"))
|
||||
eagerfpu = AUTO;
|
||||
return 1;
|
||||
}
|
||||
__setup("eagerfpu=", eager_fpu_setup);
|
||||
|
||||
/*
|
||||
* Enable and initialize the xsave feature.
|
||||
*/
|
||||
|
@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void)
|
|||
|
||||
update_regset_xstate_info(xstate_size, pcntxt_mask);
|
||||
prepare_fx_sw_frame();
|
||||
setup_init_fpu_buf();
|
||||
|
||||
setup_xstate_init();
|
||||
/* Auto enable eagerfpu for xsaveopt */
|
||||
if (cpu_has_xsaveopt && eagerfpu != DISABLE)
|
||||
eagerfpu = ENABLE;
|
||||
|
||||
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
|
||||
pcntxt_mask, xstate_size);
|
||||
|
@ -471,3 +586,43 @@ void __cpuinit xsave_init(void)
|
|||
next_func = xstate_enable;
|
||||
this_func();
|
||||
}
|
||||
|
||||
static inline void __init eager_fpu_init_bp(void)
|
||||
{
|
||||
current->thread.fpu.state =
|
||||
alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
|
||||
if (!init_xstate_buf)
|
||||
setup_init_fpu_buf();
|
||||
}
|
||||
|
||||
void __cpuinit eager_fpu_init(void)
|
||||
{
|
||||
static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
|
||||
|
||||
clear_used_math();
|
||||
current_thread_info()->status = 0;
|
||||
|
||||
if (eagerfpu == ENABLE)
|
||||
setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
|
||||
|
||||
if (!cpu_has_eager_fpu) {
|
||||
stts();
|
||||
return;
|
||||
}
|
||||
|
||||
if (boot_func) {
|
||||
boot_func();
|
||||
boot_func = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is same as math_state_restore(). But use_xsave() is
|
||||
* not yet patched to use math_state_restore().
|
||||
*/
|
||||
init_fpu(current);
|
||||
__thread_fpu_begin(current);
|
||||
if (cpu_has_xsave)
|
||||
xrstor_state(init_xstate_buf, -1);
|
||||
else
|
||||
fxrstor_checking(&init_xstate_buf->i387);
|
||||
}
|
||||
|
|
|
@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
|
|||
#ifdef CONFIG_X86_64
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
|
||||
#endif
|
||||
if (user_has_fpu())
|
||||
clts();
|
||||
/*
|
||||
* If the FPU is not active (through the host task or
|
||||
* the guest vcpu), then restore the cr0.TS bit.
|
||||
*/
|
||||
if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
|
||||
stts();
|
||||
load_gdt(&__get_cpu_var(host_gdt));
|
||||
}
|
||||
|
||||
|
@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void)
|
|||
unsigned long tmpl;
|
||||
struct desc_ptr dt;
|
||||
|
||||
vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */
|
||||
vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
|
||||
vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
|
||||
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
|
||||
|
||||
|
|
|
@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
kvm_put_guest_xcr0(vcpu);
|
||||
vcpu->guest_fpu_loaded = 1;
|
||||
unlazy_fpu(current);
|
||||
__kernel_fpu_begin();
|
||||
fpu_restore_checking(&vcpu->arch.guest_fpu);
|
||||
trace_kvm_fpu(1);
|
||||
}
|
||||
|
@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu->guest_fpu_loaded = 0;
|
||||
fpu_save_init(&vcpu->arch.guest_fpu);
|
||||
__kernel_fpu_end();
|
||||
++vcpu->stat.fpu_reload;
|
||||
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
|
||||
trace_kvm_fpu(0);
|
||||
|
|
|
@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
|
|||
* we set it now, so we can trap and pass that trap to the Guest if it
|
||||
* uses the FPU.
|
||||
*/
|
||||
if (cpu->ts)
|
||||
unlazy_fpu(current);
|
||||
if (cpu->ts && user_has_fpu())
|
||||
stts();
|
||||
|
||||
/*
|
||||
* SYSENTER is an optimized way of doing system calls. We can't allow
|
||||
|
@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
|
|||
if (boot_cpu_has(X86_FEATURE_SEP))
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
|
||||
|
||||
/* Clear the host TS bit if it was set above. */
|
||||
if (cpu->ts && user_has_fpu())
|
||||
clts();
|
||||
|
||||
/*
|
||||
* If the Guest page faulted, then the cr2 register will tell us the
|
||||
* bad virtual address. We have to grab this now, because once we
|
||||
|
@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
|
|||
* a different CPU. So all the critical stuff should be done
|
||||
* before this.
|
||||
*/
|
||||
else if (cpu->regs->trapnum == 7)
|
||||
else if (cpu->regs->trapnum == 7 && !user_has_fpu())
|
||||
math_state_restore();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue