x86: style cleanups for xen assemblies
Make the following style cleanups: * drop unnecessary //#include from xen-asm_32.S * compulsive adding of space after comma * reformat multiline comments Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
69b745ff91
commit
130ace11a9
3 changed files with 219 additions and 204 deletions
|
@ -1,14 +1,14 @@
|
||||||
/*
|
/*
|
||||||
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
|
* Asm versions of Xen pv-ops, suitable for either direct use or
|
||||||
The inline versions are the same as the direct-use versions, with the
|
* inlining. The inline versions are the same as the direct-use
|
||||||
pre- and post-amble chopped off.
|
* versions, with the pre- and post-amble chopped off.
|
||||||
|
*
|
||||||
This code is encoded for size rather than absolute efficiency,
|
* This code is encoded for size rather than absolute efficiency, with
|
||||||
with a view to being able to inline as much as possible.
|
* a view to being able to inline as much as possible.
|
||||||
|
*
|
||||||
We only bother with direct forms (ie, vcpu in percpu data) of
|
* We only bother with direct forms (ie, vcpu in percpu data) of the
|
||||||
the operations here; the indirect forms are better handled in
|
* operations here; the indirect forms are better handled in C, since
|
||||||
C, since they're generally too large to inline anyway.
|
* they're generally too large to inline anyway.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
|
@ -18,17 +18,19 @@
|
||||||
#include "xen-asm.h"
|
#include "xen-asm.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Enable events. This clears the event mask and tests the pending
|
* Enable events. This clears the event mask and tests the pending
|
||||||
event status with one and operation. If there are pending
|
* event status with one and operation. If there are pending events,
|
||||||
events, then enter the hypervisor to get them handled.
|
* then enter the hypervisor to get them handled.
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_irq_enable_direct)
|
ENTRY(xen_irq_enable_direct)
|
||||||
/* Unmask events */
|
/* Unmask events */
|
||||||
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||||
|
|
||||||
/* Preempt here doesn't matter because that will deal with
|
/*
|
||||||
any pending interrupts. The pending check may end up being
|
* Preempt here doesn't matter because that will deal with any
|
||||||
run on the wrong CPU, but that doesn't hurt. */
|
* pending interrupts. The pending check may end up being run
|
||||||
|
* on the wrong CPU, but that doesn't hurt.
|
||||||
|
*/
|
||||||
|
|
||||||
/* Test for pending */
|
/* Test for pending */
|
||||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||||
|
@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Disabling events is simply a matter of making the event mask
|
* Disabling events is simply a matter of making the event mask
|
||||||
non-zero.
|
* non-zero.
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_irq_disable_direct)
|
ENTRY(xen_irq_disable_direct)
|
||||||
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||||
|
@ -54,13 +56,13 @@ ENDPATCH(xen_irq_disable_direct)
|
||||||
RELOC(xen_irq_disable_direct, 0)
|
RELOC(xen_irq_disable_direct, 0)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
(xen_)save_fl is used to get the current interrupt enable status.
|
* (xen_)save_fl is used to get the current interrupt enable status.
|
||||||
Callers expect the status to be in X86_EFLAGS_IF, and other bits
|
* Callers expect the status to be in X86_EFLAGS_IF, and other bits
|
||||||
may be set in the return value. We take advantage of this by
|
* may be set in the return value. We take advantage of this by
|
||||||
making sure that X86_EFLAGS_IF has the right value (and other bits
|
* making sure that X86_EFLAGS_IF has the right value (and other bits
|
||||||
in that byte are 0), but other bits in the return value are
|
* in that byte are 0), but other bits in the return value are
|
||||||
undefined. We need to toggle the state of the bit, because
|
* undefined. We need to toggle the state of the bit, because Xen and
|
||||||
Xen and x86 use opposite senses (mask vs enable).
|
* x86 use opposite senses (mask vs enable).
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_save_fl_direct)
|
ENTRY(xen_save_fl_direct)
|
||||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||||
|
@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
In principle the caller should be passing us a value return
|
* In principle the caller should be passing us a value return from
|
||||||
from xen_save_fl_direct, but for robustness sake we test only
|
* xen_save_fl_direct, but for robustness sake we test only the
|
||||||
the X86_EFLAGS_IF flag rather than the whole byte. After
|
* X86_EFLAGS_IF flag rather than the whole byte. After setting the
|
||||||
setting the interrupt mask state, it checks for unmasked
|
* interrupt mask state, it checks for unmasked pending events and
|
||||||
pending events and enters the hypervisor to get them delivered
|
* enters the hypervisor to get them delivered if so.
|
||||||
if so.
|
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_restore_fl_direct)
|
ENTRY(xen_restore_fl_direct)
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct)
|
||||||
testb $X86_EFLAGS_IF>>8, %ah
|
testb $X86_EFLAGS_IF>>8, %ah
|
||||||
#endif
|
#endif
|
||||||
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||||
/* Preempt here doesn't matter because that will deal with
|
/*
|
||||||
any pending interrupts. The pending check may end up being
|
* Preempt here doesn't matter because that will deal with any
|
||||||
run on the wrong CPU, but that doesn't hurt. */
|
* pending interrupts. The pending check may end up being run
|
||||||
|
* on the wrong CPU, but that doesn't hurt.
|
||||||
|
*/
|
||||||
|
|
||||||
/* check for unmasked and pending */
|
/* check for unmasked and pending */
|
||||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
|
||||||
|
@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Force an event check by making a hypercall,
|
* Force an event check by making a hypercall, but preserve regs
|
||||||
but preserve regs before making the call.
|
* before making the call.
|
||||||
*/
|
*/
|
||||||
check_events:
|
check_events:
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
|
@ -137,4 +140,3 @@ check_events:
|
||||||
pop %rax
|
pop %rax
|
||||||
#endif
|
#endif
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,16 @@
|
||||||
/*
|
/*
|
||||||
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
|
* Asm versions of Xen pv-ops, suitable for either direct use or
|
||||||
The inline versions are the same as the direct-use versions, with the
|
* inlining. The inline versions are the same as the direct-use
|
||||||
pre- and post-amble chopped off.
|
* versions, with the pre- and post-amble chopped off.
|
||||||
|
*
|
||||||
This code is encoded for size rather than absolute efficiency,
|
* This code is encoded for size rather than absolute efficiency, with
|
||||||
with a view to being able to inline as much as possible.
|
* a view to being able to inline as much as possible.
|
||||||
|
*
|
||||||
We only bother with direct forms (ie, vcpu in pda) of the operations
|
* We only bother with direct forms (ie, vcpu in pda) of the
|
||||||
here; the indirect forms are better handled in C, since they're
|
* operations here; the indirect forms are better handled in C, since
|
||||||
generally too large to inline anyway.
|
* they're generally too large to inline anyway.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//#include <asm/asm-offsets.h>
|
|
||||||
#include <asm/thread_info.h>
|
#include <asm/thread_info.h>
|
||||||
#include <asm/processor-flags.h>
|
#include <asm/processor-flags.h>
|
||||||
#include <asm/segment.h>
|
#include <asm/segment.h>
|
||||||
|
@ -21,8 +20,8 @@
|
||||||
#include "xen-asm.h"
|
#include "xen-asm.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Force an event check by making a hypercall,
|
* Force an event check by making a hypercall, but preserve regs
|
||||||
but preserve regs before making the call.
|
* before making the call.
|
||||||
*/
|
*/
|
||||||
check_events:
|
check_events:
|
||||||
push %eax
|
push %eax
|
||||||
|
@ -35,10 +34,10 @@ check_events:
|
||||||
ret
|
ret
|
||||||
|
|
||||||
/*
|
/*
|
||||||
We can't use sysexit directly, because we're not running in ring0.
|
* We can't use sysexit directly, because we're not running in ring0.
|
||||||
But we can easily fake it up using iret. Assuming xen_sysexit
|
* But we can easily fake it up using iret. Assuming xen_sysexit is
|
||||||
is jumped to with a standard stack frame, we can just strip it
|
* jumped to with a standard stack frame, we can just strip it back to
|
||||||
back to a standard iret frame and use iret.
|
* a standard iret frame and use iret.
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_sysexit)
|
ENTRY(xen_sysexit)
|
||||||
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
|
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
|
||||||
|
@ -49,33 +48,31 @@ ENTRY(xen_sysexit)
|
||||||
ENDPROC(xen_sysexit)
|
ENDPROC(xen_sysexit)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
This is run where a normal iret would be run, with the same stack setup:
|
* This is run where a normal iret would be run, with the same stack setup:
|
||||||
8: eflags
|
* 8: eflags
|
||||||
4: cs
|
* 4: cs
|
||||||
esp-> 0: eip
|
* esp-> 0: eip
|
||||||
|
*
|
||||||
This attempts to make sure that any pending events are dealt
|
* This attempts to make sure that any pending events are dealt with
|
||||||
with on return to usermode, but there is a small window in
|
* on return to usermode, but there is a small window in which an
|
||||||
which an event can happen just before entering usermode. If
|
* event can happen just before entering usermode. If the nested
|
||||||
the nested interrupt ends up setting one of the TIF_WORK_MASK
|
* interrupt ends up setting one of the TIF_WORK_MASK pending work
|
||||||
pending work flags, they will not be tested again before
|
* flags, they will not be tested again before returning to
|
||||||
returning to usermode. This means that a process can end up
|
* usermode. This means that a process can end up with pending work,
|
||||||
with pending work, which will be unprocessed until the process
|
* which will be unprocessed until the process enters and leaves the
|
||||||
enters and leaves the kernel again, which could be an
|
* kernel again, which could be an unbounded amount of time. This
|
||||||
unbounded amount of time. This means that a pending signal or
|
* means that a pending signal or reschedule event could be
|
||||||
reschedule event could be indefinitely delayed.
|
* indefinitely delayed.
|
||||||
|
*
|
||||||
The fix is to notice a nested interrupt in the critical
|
* The fix is to notice a nested interrupt in the critical window, and
|
||||||
window, and if one occurs, then fold the nested interrupt into
|
* if one occurs, then fold the nested interrupt into the current
|
||||||
the current interrupt stack frame, and re-process it
|
* interrupt stack frame, and re-process it iteratively rather than
|
||||||
iteratively rather than recursively. This means that it will
|
* recursively. This means that it will exit via the normal path, and
|
||||||
exit via the normal path, and all pending work will be dealt
|
* all pending work will be dealt with appropriately.
|
||||||
with appropriately.
|
*
|
||||||
|
* Because the nested interrupt handler needs to deal with the current
|
||||||
Because the nested interrupt handler needs to deal with the
|
* stack state in whatever form its in, we keep things simple by only
|
||||||
current stack state in whatever form its in, we keep things
|
* using a single register which is pushed/popped on the stack.
|
||||||
simple by only using a single register which is pushed/popped
|
|
||||||
on the stack.
|
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_iret)
|
ENTRY(xen_iret)
|
||||||
/* test eflags for special cases */
|
/* test eflags for special cases */
|
||||||
|
@ -85,8 +82,10 @@ ENTRY(xen_iret)
|
||||||
push %eax
|
push %eax
|
||||||
ESP_OFFSET=4 # bytes pushed onto stack
|
ESP_OFFSET=4 # bytes pushed onto stack
|
||||||
|
|
||||||
/* Store vcpu_info pointer for easy access. Do it this
|
/*
|
||||||
way to avoid having to reload %fs */
|
* Store vcpu_info pointer for easy access. Do it this way to
|
||||||
|
* avoid having to reload %fs
|
||||||
|
*/
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
GET_THREAD_INFO(%eax)
|
GET_THREAD_INFO(%eax)
|
||||||
movl TI_cpu(%eax), %eax
|
movl TI_cpu(%eax), %eax
|
||||||
|
@ -99,30 +98,39 @@ ENTRY(xen_iret)
|
||||||
/* check IF state we're restoring */
|
/* check IF state we're restoring */
|
||||||
testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
|
testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
|
||||||
|
|
||||||
/* Maybe enable events. Once this happens we could get a
|
/*
|
||||||
recursive event, so the critical region starts immediately
|
* Maybe enable events. Once this happens we could get a
|
||||||
afterwards. However, if that happens we don't end up
|
* recursive event, so the critical region starts immediately
|
||||||
resuming the code, so we don't have to be worried about
|
* afterwards. However, if that happens we don't end up
|
||||||
being preempted to another CPU. */
|
* resuming the code, so we don't have to be worried about
|
||||||
|
* being preempted to another CPU.
|
||||||
|
*/
|
||||||
setz XEN_vcpu_info_mask(%eax)
|
setz XEN_vcpu_info_mask(%eax)
|
||||||
xen_iret_start_crit:
|
xen_iret_start_crit:
|
||||||
|
|
||||||
/* check for unmasked and pending */
|
/* check for unmasked and pending */
|
||||||
cmpw $0x0001, XEN_vcpu_info_pending(%eax)
|
cmpw $0x0001, XEN_vcpu_info_pending(%eax)
|
||||||
|
|
||||||
/* If there's something pending, mask events again so we
|
/*
|
||||||
can jump back into xen_hypervisor_callback */
|
* If there's something pending, mask events again so we can
|
||||||
|
* jump back into xen_hypervisor_callback
|
||||||
|
*/
|
||||||
sete XEN_vcpu_info_mask(%eax)
|
sete XEN_vcpu_info_mask(%eax)
|
||||||
|
|
||||||
popl %eax
|
popl %eax
|
||||||
|
|
||||||
/* From this point on the registers are restored and the stack
|
/*
|
||||||
updated, so we don't need to worry about it if we're preempted */
|
* From this point on the registers are restored and the stack
|
||||||
|
* updated, so we don't need to worry about it if we're
|
||||||
|
* preempted
|
||||||
|
*/
|
||||||
iret_restore_end:
|
iret_restore_end:
|
||||||
|
|
||||||
/* Jump to hypervisor_callback after fixing up the stack.
|
/*
|
||||||
Events are masked, so jumping out of the critical
|
* Jump to hypervisor_callback after fixing up the stack.
|
||||||
region is OK. */
|
* Events are masked, so jumping out of the critical region is
|
||||||
|
* OK.
|
||||||
|
*/
|
||||||
je xen_hypervisor_callback
|
je xen_hypervisor_callback
|
||||||
|
|
||||||
1: iret
|
1: iret
|
||||||
|
@ -139,55 +147,55 @@ hyper_iret:
|
||||||
.globl xen_iret_start_crit, xen_iret_end_crit
|
.globl xen_iret_start_crit, xen_iret_end_crit
|
||||||
|
|
||||||
/*
|
/*
|
||||||
This is called by xen_hypervisor_callback in entry.S when it sees
|
* This is called by xen_hypervisor_callback in entry.S when it sees
|
||||||
that the EIP at the time of interrupt was between xen_iret_start_crit
|
* that the EIP at the time of interrupt was between
|
||||||
and xen_iret_end_crit. We're passed the EIP in %eax so we can do
|
* xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
|
||||||
a more refined determination of what to do.
|
* %eax so we can do a more refined determination of what to do.
|
||||||
|
*
|
||||||
The stack format at this point is:
|
* The stack format at this point is:
|
||||||
----------------
|
* ----------------
|
||||||
ss : (ss/esp may be present if we came from usermode)
|
* ss : (ss/esp may be present if we came from usermode)
|
||||||
esp :
|
* esp :
|
||||||
eflags } outer exception info
|
* eflags } outer exception info
|
||||||
cs }
|
* cs }
|
||||||
eip }
|
* eip }
|
||||||
---------------- <- edi (copy dest)
|
* ---------------- <- edi (copy dest)
|
||||||
eax : outer eax if it hasn't been restored
|
* eax : outer eax if it hasn't been restored
|
||||||
----------------
|
* ----------------
|
||||||
eflags } nested exception info
|
* eflags } nested exception info
|
||||||
cs } (no ss/esp because we're nested
|
* cs } (no ss/esp because we're nested
|
||||||
eip } from the same ring)
|
* eip } from the same ring)
|
||||||
orig_eax }<- esi (copy src)
|
* orig_eax }<- esi (copy src)
|
||||||
- - - - - - - -
|
* - - - - - - - -
|
||||||
fs }
|
* fs }
|
||||||
es }
|
* es }
|
||||||
ds } SAVE_ALL state
|
* ds } SAVE_ALL state
|
||||||
eax }
|
* eax }
|
||||||
: :
|
* : :
|
||||||
ebx }<- esp
|
* ebx }<- esp
|
||||||
----------------
|
* ----------------
|
||||||
|
*
|
||||||
In order to deliver the nested exception properly, we need to shift
|
* In order to deliver the nested exception properly, we need to shift
|
||||||
everything from the return addr up to the error code so it
|
* everything from the return addr up to the error code so it sits
|
||||||
sits just under the outer exception info. This means that when we
|
* just under the outer exception info. This means that when we
|
||||||
handle the exception, we do it in the context of the outer exception
|
* handle the exception, we do it in the context of the outer
|
||||||
rather than starting a new one.
|
* exception rather than starting a new one.
|
||||||
|
*
|
||||||
The only caveat is that if the outer eax hasn't been
|
* The only caveat is that if the outer eax hasn't been restored yet
|
||||||
restored yet (ie, it's still on stack), we need to insert
|
* (ie, it's still on stack), we need to insert its value into the
|
||||||
its value into the SAVE_ALL state before going on, since
|
* SAVE_ALL state before going on, since it's usermode state which we
|
||||||
it's usermode state which we eventually need to restore.
|
* eventually need to restore.
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_iret_crit_fixup)
|
ENTRY(xen_iret_crit_fixup)
|
||||||
/*
|
/*
|
||||||
Paranoia: Make sure we're really coming from kernel space.
|
* Paranoia: Make sure we're really coming from kernel space.
|
||||||
One could imagine a case where userspace jumps into the
|
* One could imagine a case where userspace jumps into the
|
||||||
critical range address, but just before the CPU delivers a GP,
|
* critical range address, but just before the CPU delivers a
|
||||||
it decides to deliver an interrupt instead. Unlikely?
|
* GP, it decides to deliver an interrupt instead. Unlikely?
|
||||||
Definitely. Easy to avoid? Yes. The Intel documents
|
* Definitely. Easy to avoid? Yes. The Intel documents
|
||||||
explicitly say that the reported EIP for a bad jump is the
|
* explicitly say that the reported EIP for a bad jump is the
|
||||||
jump instruction itself, not the destination, but some virtual
|
* jump instruction itself, not the destination, but some
|
||||||
environments get this wrong.
|
* virtual environments get this wrong.
|
||||||
*/
|
*/
|
||||||
movl PT_CS(%esp), %ecx
|
movl PT_CS(%esp), %ecx
|
||||||
andl $SEGMENT_RPL_MASK, %ecx
|
andl $SEGMENT_RPL_MASK, %ecx
|
||||||
|
@ -197,8 +205,10 @@ ENTRY(xen_iret_crit_fixup)
|
||||||
lea PT_ORIG_EAX(%esp), %esi
|
lea PT_ORIG_EAX(%esp), %esi
|
||||||
lea PT_EFLAGS(%esp), %edi
|
lea PT_EFLAGS(%esp), %edi
|
||||||
|
|
||||||
/* If eip is before iret_restore_end then stack
|
/*
|
||||||
hasn't been restored yet. */
|
* If eip is before iret_restore_end then stack
|
||||||
|
* hasn't been restored yet.
|
||||||
|
*/
|
||||||
cmp $iret_restore_end, %eax
|
cmp $iret_restore_end, %eax
|
||||||
jae 1f
|
jae 1f
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
/*
|
/*
|
||||||
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
|
* Asm versions of Xen pv-ops, suitable for either direct use or
|
||||||
The inline versions are the same as the direct-use versions, with the
|
* inlining. The inline versions are the same as the direct-use
|
||||||
pre- and post-amble chopped off.
|
* versions, with the pre- and post-amble chopped off.
|
||||||
|
*
|
||||||
This code is encoded for size rather than absolute efficiency,
|
* This code is encoded for size rather than absolute efficiency, with
|
||||||
with a view to being able to inline as much as possible.
|
* a view to being able to inline as much as possible.
|
||||||
|
*
|
||||||
We only bother with direct forms (ie, vcpu in pda) of the operations
|
* We only bother with direct forms (ie, vcpu in pda) of the
|
||||||
here; the indirect forms are better handled in C, since they're
|
* operations here; the indirect forms are better handled in C, since
|
||||||
generally too large to inline anyway.
|
* they're generally too large to inline anyway.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <asm/errno.h>
|
#include <asm/errno.h>
|
||||||
|
@ -27,19 +27,19 @@ ENTRY(xen_adjust_exception_frame)
|
||||||
|
|
||||||
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
|
||||||
/*
|
/*
|
||||||
Xen64 iret frame:
|
* Xen64 iret frame:
|
||||||
|
*
|
||||||
ss
|
* ss
|
||||||
rsp
|
* rsp
|
||||||
rflags
|
* rflags
|
||||||
cs
|
* cs
|
||||||
rip <-- standard iret frame
|
* rip <-- standard iret frame
|
||||||
|
*
|
||||||
flags
|
* flags
|
||||||
|
*
|
||||||
rcx }
|
* rcx }
|
||||||
r11 }<-- pushed by hypercall page
|
* r11 }<-- pushed by hypercall page
|
||||||
rsp -> rax }
|
* rsp->rax }
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_iret)
|
ENTRY(xen_iret)
|
||||||
pushq $0
|
pushq $0
|
||||||
|
@ -48,8 +48,8 @@ ENDPATCH(xen_iret)
|
||||||
RELOC(xen_iret, 1b+1)
|
RELOC(xen_iret, 1b+1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
sysexit is not used for 64-bit processes, so it's
|
* sysexit is not used for 64-bit processes, so it's only ever used to
|
||||||
only ever used to return to 32-bit compat userspace.
|
* return to 32-bit compat userspace.
|
||||||
*/
|
*/
|
||||||
ENTRY(xen_sysexit)
|
ENTRY(xen_sysexit)
|
||||||
pushq $__USER32_DS
|
pushq $__USER32_DS
|
||||||
|
@ -64,8 +64,10 @@ ENDPATCH(xen_sysexit)
|
||||||
RELOC(xen_sysexit, 1b+1)
|
RELOC(xen_sysexit, 1b+1)
|
||||||
|
|
||||||
ENTRY(xen_sysret64)
|
ENTRY(xen_sysret64)
|
||||||
/* We're already on the usermode stack at this point, but still
|
/*
|
||||||
with the kernel gs, so we can easily switch back */
|
* We're already on the usermode stack at this point, but
|
||||||
|
* still with the kernel gs, so we can easily switch back
|
||||||
|
*/
|
||||||
movq %rsp, PER_CPU_VAR(old_rsp)
|
movq %rsp, PER_CPU_VAR(old_rsp)
|
||||||
movq PER_CPU_VAR(kernel_stack), %rsp
|
movq PER_CPU_VAR(kernel_stack), %rsp
|
||||||
|
|
||||||
|
@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64)
|
||||||
RELOC(xen_sysret64, 1b+1)
|
RELOC(xen_sysret64, 1b+1)
|
||||||
|
|
||||||
ENTRY(xen_sysret32)
|
ENTRY(xen_sysret32)
|
||||||
/* We're already on the usermode stack at this point, but still
|
/*
|
||||||
with the kernel gs, so we can easily switch back */
|
* We're already on the usermode stack at this point, but
|
||||||
|
* still with the kernel gs, so we can easily switch back
|
||||||
|
*/
|
||||||
movq %rsp, PER_CPU_VAR(old_rsp)
|
movq %rsp, PER_CPU_VAR(old_rsp)
|
||||||
movq PER_CPU_VAR(kernel_stack), %rsp
|
movq PER_CPU_VAR(kernel_stack), %rsp
|
||||||
|
|
||||||
|
@ -98,22 +102,21 @@ ENDPATCH(xen_sysret32)
|
||||||
RELOC(xen_sysret32, 1b+1)
|
RELOC(xen_sysret32, 1b+1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Xen handles syscall callbacks much like ordinary exceptions,
|
* Xen handles syscall callbacks much like ordinary exceptions, which
|
||||||
which means we have:
|
* means we have:
|
||||||
- kernel gs
|
* - kernel gs
|
||||||
- kernel rsp
|
* - kernel rsp
|
||||||
- an iret-like stack frame on the stack (including rcx and r11):
|
* - an iret-like stack frame on the stack (including rcx and r11):
|
||||||
ss
|
* ss
|
||||||
rsp
|
* rsp
|
||||||
rflags
|
* rflags
|
||||||
cs
|
* cs
|
||||||
rip
|
* rip
|
||||||
r11
|
* r11
|
||||||
rsp-> rcx
|
* rsp->rcx
|
||||||
|
*
|
||||||
In all the entrypoints, we undo all that to make it look
|
* In all the entrypoints, we undo all that to make it look like a
|
||||||
like a CPU-generated syscall/sysenter and jump to the normal
|
* CPU-generated syscall/sysenter and jump to the normal entrypoint.
|
||||||
entrypoint.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
.macro undo_xen_syscall
|
.macro undo_xen_syscall
|
||||||
|
|
Loading…
Reference in a new issue