x86/paravirt: split sysret and sysexit
Don't conflate sysret and sysexit; they're different instructions with different semantics, and may be in use at the same time (at least within the same kernel, depending on whether its an Intel or AMD system). sysexit - just return to userspace, does no register restoration of any kind; must explicitly atomically enable interrupts. sysret - reloads flags from r11, so no need to explicitly enable interrupts on 64-bit, responsible for restoring usermode %gs Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
e04e0a630d
commit
d75cd22fdd
11 changed files with 36 additions and 25 deletions
|
@ -111,7 +111,7 @@ void foo(void)
|
|||
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
|
||||
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
|
||||
OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
|
||||
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
|
||||
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@ int main(void)
|
|||
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
|
||||
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
|
||||
OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
|
||||
OFFSET(PV_CPU_usersp_sysret, pv_cpu_ops, usersp_sysret);
|
||||
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
|
||||
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
|
||||
#endif
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
* for paravirtualization. The following will never clobber any registers:
|
||||
* INTERRUPT_RETURN (aka. "iret")
|
||||
* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
|
||||
* ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
|
||||
* ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
|
||||
*
|
||||
* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
|
||||
* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
|
||||
|
@ -349,7 +349,7 @@ sysenter_past_esp:
|
|||
xorl %ebp,%ebp
|
||||
TRACE_IRQS_ON
|
||||
1: mov PT_FS(%esp), %fs
|
||||
ENABLE_INTERRUPTS_SYSCALL_RET
|
||||
ENABLE_INTERRUPTS_SYSEXIT
|
||||
CFI_ENDPROC
|
||||
.pushsection .fixup,"ax"
|
||||
2: movl $0,PT_FS(%esp)
|
||||
|
@ -874,10 +874,10 @@ ENTRY(native_iret)
|
|||
.previous
|
||||
END(native_iret)
|
||||
|
||||
ENTRY(native_irq_enable_syscall_ret)
|
||||
ENTRY(native_irq_enable_sysexit)
|
||||
sti
|
||||
sysexit
|
||||
END(native_irq_enable_syscall_ret)
|
||||
END(native_irq_enable_sysexit)
|
||||
#endif
|
||||
|
||||
KPROBE_ENTRY(int3)
|
||||
|
|
|
@ -59,7 +59,7 @@
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
ENTRY(native_irq_enable_syscall_ret)
|
||||
ENTRY(native_usersp_sysret)
|
||||
movq %gs:pda_oldrsp,%rsp
|
||||
swapgs
|
||||
sysretq
|
||||
|
@ -275,7 +275,7 @@ sysret_check:
|
|||
CFI_REGISTER rip,rcx
|
||||
RESTORE_ARGS 0,-ARG_SKIP,1
|
||||
/*CFI_REGISTER rflags,r11*/
|
||||
ENABLE_INTERRUPTS_SYSCALL_RET
|
||||
USERSP_SYSRET
|
||||
|
||||
CFI_RESTORE_STATE
|
||||
/* Handle reschedules */
|
||||
|
|
|
@ -140,7 +140,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
|
|||
/* If the operation is a nop, then nop the callsite */
|
||||
ret = paravirt_patch_nop();
|
||||
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.usersp_sysret))
|
||||
/* If operation requires a jmp, then jmp */
|
||||
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
|
||||
else
|
||||
|
@ -191,7 +192,8 @@ static void native_flush_tlb_single(unsigned long addr)
|
|||
|
||||
/* These are in entry.S */
|
||||
extern void native_iret(void);
|
||||
extern void native_irq_enable_syscall_ret(void);
|
||||
extern void native_irq_enable_sysexit(void);
|
||||
extern void native_usersp_sysret(void);
|
||||
|
||||
static int __init print_banner(void)
|
||||
{
|
||||
|
@ -327,7 +329,11 @@ struct pv_cpu_ops pv_cpu_ops = {
|
|||
.write_idt_entry = native_write_idt_entry,
|
||||
.load_sp0 = native_load_sp0,
|
||||
|
||||
.irq_enable_syscall_ret = native_irq_enable_syscall_ret,
|
||||
#ifdef CONFIG_X86_32
|
||||
.irq_enable_sysexit = native_irq_enable_sysexit,
|
||||
#else
|
||||
.usersp_sysret = native_usersp_sysret,
|
||||
#endif
|
||||
.iret = native_iret,
|
||||
.swapgs = native_swapgs,
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
|
|||
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
|
||||
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
|
||||
DEF_NATIVE(pv_cpu_ops, iret, "iret");
|
||||
DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
|
||||
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
|
||||
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||
|
@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
|||
PATCH_SITE(pv_irq_ops, restore_fl);
|
||||
PATCH_SITE(pv_irq_ops, save_fl);
|
||||
PATCH_SITE(pv_cpu_ops, iret);
|
||||
PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
|
||||
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr2);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
|
|
|
@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
|||
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
|
||||
|
||||
/* the three commands give us more control to how to return from a syscall */
|
||||
DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
|
||||
DEF_NATIVE(pv_cpu_ops, usersp_sysret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
|
||||
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
|
||||
|
||||
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
|
@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
|||
PATCH_SITE(pv_irq_ops, irq_enable);
|
||||
PATCH_SITE(pv_irq_ops, irq_disable);
|
||||
PATCH_SITE(pv_cpu_ops, iret);
|
||||
PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
|
||||
PATCH_SITE(pv_cpu_ops, usersp_sysret);
|
||||
PATCH_SITE(pv_cpu_ops, swapgs);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr2);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
|
|
|
@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
|
|||
insns, ip);
|
||||
case PARAVIRT_PATCH(pv_cpu_ops.iret):
|
||||
return patch_internal(VMI_CALL_IRET, len, insns, ip);
|
||||
case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
|
||||
case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
|
||||
return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
|
||||
default:
|
||||
break;
|
||||
|
@ -896,7 +896,7 @@ static inline int __init activate_vmi(void)
|
|||
* the backend. They are performance critical anyway, so requiring
|
||||
* a patch is not a big problem.
|
||||
*/
|
||||
pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
|
||||
pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
|
||||
pv_cpu_ops.iret = (void *)0xbadbab0;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
|
|
@ -1089,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
|
|||
.read_pmc = native_read_pmc,
|
||||
|
||||
.iret = xen_iret,
|
||||
.irq_enable_syscall_ret = xen_sysexit,
|
||||
.irq_enable_sysexit = xen_sysexit,
|
||||
|
||||
.load_tr_desc = paravirt_nop,
|
||||
.set_ldt = xen_set_ldt,
|
||||
|
|
|
@ -112,13 +112,13 @@ static inline unsigned long __raw_local_irq_save(void)
|
|||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define INTERRUPT_RETURN iretq
|
||||
#define ENABLE_INTERRUPTS_SYSCALL_RET \
|
||||
#define USERSP_SYSRET \
|
||||
movq %gs:pda_oldrsp, %rsp; \
|
||||
swapgs; \
|
||||
sysretq;
|
||||
#else
|
||||
#define INTERRUPT_RETURN iret
|
||||
#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
||||
#define GET_CR0_INTO_EAX movl %cr0, %eax
|
||||
#endif
|
||||
|
||||
|
|
|
@ -141,8 +141,9 @@ struct pv_cpu_ops {
|
|||
u64 (*read_pmc)(int counter);
|
||||
unsigned long long (*read_tscp)(unsigned int *aux);
|
||||
|
||||
/* These two are jmp to, not actually called. */
|
||||
void (*irq_enable_syscall_ret)(void);
|
||||
/* These three are jmp to, not actually called. */
|
||||
void (*irq_enable_sysexit)(void);
|
||||
void (*usersp_sysret)(void);
|
||||
void (*iret)(void);
|
||||
|
||||
void (*swapgs)(void);
|
||||
|
@ -1480,10 +1481,10 @@ static inline unsigned long __raw_local_irq_save(void)
|
|||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
|
||||
PV_RESTORE_REGS;)
|
||||
|
||||
#define ENABLE_INTERRUPTS_SYSCALL_RET \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_syscall_ret))
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -1504,6 +1505,10 @@ static inline unsigned long __raw_local_irq_save(void)
|
|||
movq %rax, %rcx; \
|
||||
xorq %rax, %rax;
|
||||
|
||||
#define USERSP_SYSRET \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usersp_sysret), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usersp_sysret))
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
|
Loading…
Reference in a new issue