2008-07-28 12:53:57 -06:00
|
|
|
#include <linux/hardirq.h>
|
|
|
|
|
2009-08-20 01:59:09 -06:00
|
|
|
#include <asm/x86_init.h>
|
|
|
|
|
2008-07-28 12:53:57 -06:00
|
|
|
#include <xen/interface/xen.h>
|
|
|
|
#include <xen/interface/sched.h>
|
|
|
|
#include <xen/interface/vcpu.h>
|
|
|
|
|
|
|
|
#include <asm/xen/hypercall.h>
|
|
|
|
#include <asm/xen/hypervisor.h>
|
|
|
|
|
|
|
|
#include "xen-ops.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Force a proper event-channel callback from Xen after clearing the
|
|
|
|
* callback mask. We do this in a very simple manner, by making a call
|
|
|
|
* down into Xen. The pending flag will be checked by Xen on return.
|
|
|
|
*/
|
|
|
|
void xen_force_evtchn_callback(void)
|
|
|
|
{
|
|
|
|
(void)HYPERVISOR_xen_version(0, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long xen_save_fl(void)
|
|
|
|
{
|
|
|
|
struct vcpu_info *vcpu;
|
|
|
|
unsigned long flags;
|
|
|
|
|
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new
generic percpu methods:
percpu_read()
percpu_write()
percpu_add()
percpu_sub()
percpu_and()
percpu_or()
percpu_xor()
and implements support for them on x86. (other architectures will fall
back to a default implementation)
The advantage is that for example to read a local percpu variable,
instead of this sequence:
return __get_cpu_var(var);
ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx
ffffffff8102ca32: 81
ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax
ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax
We can get a single instruction by using the optimized variants:
return percpu_read(var);
ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax
I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.
tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
* added percpu_and() for completeness's sake
* made generic percpu ops atomic against preemption
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
2009-01-15 06:15:53 -07:00
|
|
|
vcpu = percpu_read(xen_vcpu);
|
2008-07-28 12:53:57 -06:00
|
|
|
|
|
|
|
/* flag has opposite sense of mask */
|
|
|
|
flags = !vcpu->evtchn_upcall_mask;
|
|
|
|
|
|
|
|
/* convert to IF type flag
|
|
|
|
-0 -> 0x00000000
|
|
|
|
-1 -> 0xffffffff
|
|
|
|
*/
|
|
|
|
return (-flags) & X86_EFLAGS_IF;
|
|
|
|
}
|
x86/paravirt: add register-saving thunks to reduce caller register pressure
Impact: Optimization
One of the problems with inserting a pile of C calls where previously
there were none is that the register pressure is greatly increased.
The C calling convention says that the caller must expect a certain
set of registers may be trashed by the callee, and that the callee can
use those registers without restriction. This includes the function
argument registers, and several others.
This patch seeks to alleviate this pressure by introducing wrapper
thunks that will do the register saving/restoring, so that the
callsite doesn't need to worry about it, but the callee function can
be conventional compiler-generated code. In many cases (particularly
performance-sensitive cases) the callee will be in assembler anyway,
and need not use the compiler's calling convention.
Standard calling convention is:
arguments return scratch
x86-32 eax edx ecx eax ?
x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11
The thunk preserves all argument and scratch registers. The return
register is not preserved, and is available as a scratch register for
unwrapped callee code (and of course the return value).
Wrapped function pointers are themselves wrapped in a struct
paravirt_callee_save structure, in order to get some warning from the
compiler when functions with mismatched calling conventions are used.
The most common paravirt ops, both statically and dynamically, are
interrupt enable/disable/save/restore, so handle them first. This is
particularly easy since their calls are handled specially anyway.
XXX Deal with VMI. What's their calling convention?
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-01-28 15:35:05 -07:00
|
|
|
PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
|
2008-07-28 12:53:57 -06:00
|
|
|
|
|
|
|
static void xen_restore_fl(unsigned long flags)
|
|
|
|
{
|
|
|
|
struct vcpu_info *vcpu;
|
|
|
|
|
|
|
|
/* convert from IF type flag */
|
|
|
|
flags = !(flags & X86_EFLAGS_IF);
|
|
|
|
|
|
|
|
/* There's a one instruction preempt window here. We need to
|
|
|
|
make sure we're don't switch CPUs between getting the vcpu
|
|
|
|
pointer and updating the mask. */
|
|
|
|
preempt_disable();
|
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new
generic percpu methods:
percpu_read()
percpu_write()
percpu_add()
percpu_sub()
percpu_and()
percpu_or()
percpu_xor()
and implements support for them on x86. (other architectures will fall
back to a default implementation)
The advantage is that for example to read a local percpu variable,
instead of this sequence:
return __get_cpu_var(var);
ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx
ffffffff8102ca32: 81
ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax
ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax
We can get a single instruction by using the optimized variants:
return percpu_read(var);
ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax
I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.
tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
* added percpu_and() for completeness's sake
* made generic percpu ops atomic against preemption
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
2009-01-15 06:15:53 -07:00
|
|
|
vcpu = percpu_read(xen_vcpu);
|
2008-07-28 12:53:57 -06:00
|
|
|
vcpu->evtchn_upcall_mask = flags;
|
|
|
|
preempt_enable_no_resched();
|
|
|
|
|
|
|
|
/* Doesn't matter if we get preempted here, because any
|
|
|
|
pending event will get dealt with anyway. */
|
|
|
|
|
|
|
|
if (flags == 0) {
|
|
|
|
preempt_check_resched();
|
|
|
|
barrier(); /* unmask then check (avoid races) */
|
|
|
|
if (unlikely(vcpu->evtchn_upcall_pending))
|
|
|
|
xen_force_evtchn_callback();
|
|
|
|
}
|
|
|
|
}
|
x86/paravirt: add register-saving thunks to reduce caller register pressure
Impact: Optimization
One of the problems with inserting a pile of C calls where previously
there were none is that the register pressure is greatly increased.
The C calling convention says that the caller must expect a certain
set of registers may be trashed by the callee, and that the callee can
use those registers without restriction. This includes the function
argument registers, and several others.
This patch seeks to alleviate this pressure by introducing wrapper
thunks that will do the register saving/restoring, so that the
callsite doesn't need to worry about it, but the callee function can
be conventional compiler-generated code. In many cases (particularly
performance-sensitive cases) the callee will be in assembler anyway,
and need not use the compiler's calling convention.
Standard calling convention is:
arguments return scratch
x86-32 eax edx ecx eax ?
x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11
The thunk preserves all argument and scratch registers. The return
register is not preserved, and is available as a scratch register for
unwrapped callee code (and of course the return value).
Wrapped function pointers are themselves wrapped in a struct
paravirt_callee_save structure, in order to get some warning from the
compiler when functions with mismatched calling conventions are used.
The most common paravirt ops, both statically and dynamically, are
interrupt enable/disable/save/restore, so handle them first. This is
particularly easy since their calls are handled specially anyway.
XXX Deal with VMI. What's their calling convention?
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-01-28 15:35:05 -07:00
|
|
|
PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
|
2008-07-28 12:53:57 -06:00
|
|
|
|
|
|
|
static void xen_irq_disable(void)
|
|
|
|
{
|
|
|
|
/* There's a one instruction preempt window here. We need to
|
|
|
|
make sure we're don't switch CPUs between getting the vcpu
|
|
|
|
pointer and updating the mask. */
|
|
|
|
preempt_disable();
|
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new
generic percpu methods:
percpu_read()
percpu_write()
percpu_add()
percpu_sub()
percpu_and()
percpu_or()
percpu_xor()
and implements support for them on x86. (other architectures will fall
back to a default implementation)
The advantage is that for example to read a local percpu variable,
instead of this sequence:
return __get_cpu_var(var);
ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx
ffffffff8102ca32: 81
ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax
ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax
We can get a single instruction by using the optimized variants:
return percpu_read(var);
ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax
I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.
tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
* added percpu_and() for completeness's sake
* made generic percpu ops atomic against preemption
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
2009-01-15 06:15:53 -07:00
|
|
|
percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
|
2008-07-28 12:53:57 -06:00
|
|
|
preempt_enable_no_resched();
|
|
|
|
}
|
x86/paravirt: add register-saving thunks to reduce caller register pressure
Impact: Optimization
One of the problems with inserting a pile of C calls where previously
there were none is that the register pressure is greatly increased.
The C calling convention says that the caller must expect a certain
set of registers may be trashed by the callee, and that the callee can
use those registers without restriction. This includes the function
argument registers, and several others.
This patch seeks to alleviate this pressure by introducing wrapper
thunks that will do the register saving/restoring, so that the
callsite doesn't need to worry about it, but the callee function can
be conventional compiler-generated code. In many cases (particularly
performance-sensitive cases) the callee will be in assembler anyway,
and need not use the compiler's calling convention.
Standard calling convention is:
arguments return scratch
x86-32 eax edx ecx eax ?
x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11
The thunk preserves all argument and scratch registers. The return
register is not preserved, and is available as a scratch register for
unwrapped callee code (and of course the return value).
Wrapped function pointers are themselves wrapped in a struct
paravirt_callee_save structure, in order to get some warning from the
compiler when functions with mismatched calling conventions are used.
The most common paravirt ops, both statically and dynamically, are
interrupt enable/disable/save/restore, so handle them first. This is
particularly easy since their calls are handled specially anyway.
XXX Deal with VMI. What's their calling convention?
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-01-28 15:35:05 -07:00
|
|
|
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
|
2008-07-28 12:53:57 -06:00
|
|
|
|
|
|
|
static void xen_irq_enable(void)
|
|
|
|
{
|
|
|
|
struct vcpu_info *vcpu;
|
|
|
|
|
|
|
|
/* We don't need to worry about being preempted here, since
|
|
|
|
either a) interrupts are disabled, so no preemption, or b)
|
|
|
|
the caller is confused and is trying to re-enable interrupts
|
|
|
|
on an indeterminate processor. */
|
|
|
|
|
percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new
generic percpu methods:
percpu_read()
percpu_write()
percpu_add()
percpu_sub()
percpu_and()
percpu_or()
percpu_xor()
and implements support for them on x86. (other architectures will fall
back to a default implementation)
The advantage is that for example to read a local percpu variable,
instead of this sequence:
return __get_cpu_var(var);
ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx
ffffffff8102ca32: 81
ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax
ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax
We can get a single instruction by using the optimized variants:
return percpu_read(var);
ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax
I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.
tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
* added percpu_and() for completeness's sake
* made generic percpu ops atomic against preemption
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
2009-01-15 06:15:53 -07:00
|
|
|
vcpu = percpu_read(xen_vcpu);
|
2008-07-28 12:53:57 -06:00
|
|
|
vcpu->evtchn_upcall_mask = 0;
|
|
|
|
|
|
|
|
/* Doesn't matter if we get preempted here, because any
|
|
|
|
pending event will get dealt with anyway. */
|
|
|
|
|
|
|
|
barrier(); /* unmask then check (avoid races) */
|
|
|
|
if (unlikely(vcpu->evtchn_upcall_pending))
|
|
|
|
xen_force_evtchn_callback();
|
|
|
|
}
|
x86/paravirt: add register-saving thunks to reduce caller register pressure
Impact: Optimization
One of the problems with inserting a pile of C calls where previously
there were none is that the register pressure is greatly increased.
The C calling convention says that the caller must expect a certain
set of registers may be trashed by the callee, and that the callee can
use those registers without restriction. This includes the function
argument registers, and several others.
This patch seeks to alleviate this pressure by introducing wrapper
thunks that will do the register saving/restoring, so that the
callsite doesn't need to worry about it, but the callee function can
be conventional compiler-generated code. In many cases (particularly
performance-sensitive cases) the callee will be in assembler anyway,
and need not use the compiler's calling convention.
Standard calling convention is:
arguments return scratch
x86-32 eax edx ecx eax ?
x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11
The thunk preserves all argument and scratch registers. The return
register is not preserved, and is available as a scratch register for
unwrapped callee code (and of course the return value).
Wrapped function pointers are themselves wrapped in a struct
paravirt_callee_save structure, in order to get some warning from the
compiler when functions with mismatched calling conventions are used.
The most common paravirt ops, both statically and dynamically, are
interrupt enable/disable/save/restore, so handle them first. This is
particularly easy since their calls are handled specially anyway.
XXX Deal with VMI. What's their calling convention?
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-01-28 15:35:05 -07:00
|
|
|
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
|
2008-07-28 12:53:57 -06:00
|
|
|
|
|
|
|
static void xen_safe_halt(void)
|
|
|
|
{
|
|
|
|
/* Blocking includes an implicit local_irq_enable(). */
|
|
|
|
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void xen_halt(void)
|
|
|
|
{
|
|
|
|
if (irqs_disabled())
|
|
|
|
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
|
|
|
|
else
|
|
|
|
xen_safe_halt();
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct pv_irq_ops xen_irq_ops __initdata = {
|
x86/paravirt: add register-saving thunks to reduce caller register pressure
Impact: Optimization
One of the problems with inserting a pile of C calls where previously
there were none is that the register pressure is greatly increased.
The C calling convention says that the caller must expect a certain
set of registers may be trashed by the callee, and that the callee can
use those registers without restriction. This includes the function
argument registers, and several others.
This patch seeks to alleviate this pressure by introducing wrapper
thunks that will do the register saving/restoring, so that the
callsite doesn't need to worry about it, but the callee function can
be conventional compiler-generated code. In many cases (particularly
performance-sensitive cases) the callee will be in assembler anyway,
and need not use the compiler's calling convention.
Standard calling convention is:
arguments return scratch
x86-32 eax edx ecx eax ?
x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11
The thunk preserves all argument and scratch registers. The return
register is not preserved, and is available as a scratch register for
unwrapped callee code (and of course the return value).
Wrapped function pointers are themselves wrapped in a struct
paravirt_callee_save structure, in order to get some warning from the
compiler when functions with mismatched calling conventions are used.
The most common paravirt ops, both statically and dynamically, are
interrupt enable/disable/save/restore, so handle them first. This is
particularly easy since their calls are handled specially anyway.
XXX Deal with VMI. What's their calling convention?
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-01-28 15:35:05 -07:00
|
|
|
.save_fl = PV_CALLEE_SAVE(xen_save_fl),
|
|
|
|
.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
|
|
|
|
.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
|
|
|
|
.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
|
|
|
|
|
2008-07-28 12:53:57 -06:00
|
|
|
.safe_halt = xen_safe_halt,
|
|
|
|
.halt = xen_halt,
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
.adjust_exception_frame = xen_adjust_exception_frame,
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
void __init xen_init_irq_ops()
|
|
|
|
{
|
|
|
|
pv_irq_ops = xen_irq_ops;
|
2009-08-20 01:59:09 -06:00
|
|
|
x86_init.irqs.intr_init = xen_init_IRQ;
|
2008-07-28 12:53:57 -06:00
|
|
|
}
|