x86-64: Use RIP-relative addressing for most per-CPU accesses
Observing that per-CPU data (in the SMP case) is reachable by exploiting 64-bit address wraparound (building on the default kernel load address being at 16Mb), the one byte shorter RIP-relative addressing form can be used for most per-CPU accesses. The one exception are the "stable" reads, where the use of the "P" operand modifier prevents the compiler from using RIP-relative addressing, but is unavoidable due to the use of the "p" constraint (side note: with gcc 4.9.x the intended effect of this isn't being achieved anymore, see gcc bug 63637). With the dependency on the minimum kernel load address, arbitrarily low values for CONFIG_PHYSICAL_START are now no longer possible. A link time assertion is being added, directing to the need to increase that value when it triggers. Signed-off-by: Jan Beulich <jbeulich@suse.com> Link: http://lkml.kernel.org/r/5458A1780200007800044A9D@mail.emea.novell.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
6d24c5f72d
commit
97b67ae559
2 changed files with 46 additions and 15 deletions
|
@ -64,7 +64,7 @@
|
|||
#define __percpu_prefix ""
|
||||
#endif
|
||||
|
||||
#define __percpu_arg(x) __percpu_prefix "%P" #x
|
||||
#define __percpu_arg(x) __percpu_prefix "%" #x
|
||||
|
||||
/*
|
||||
* Initialized pointers to per-cpu variables needed for the boot
|
||||
|
@ -179,29 +179,58 @@ do { \
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define percpu_from_op(op, var, constraint) \
|
||||
#define percpu_from_op(op, var) \
|
||||
({ \
|
||||
typeof(var) pfo_ret__; \
|
||||
switch (sizeof(var)) { \
|
||||
case 1: \
|
||||
asm(op "b "__percpu_arg(1)",%0" \
|
||||
: "=q" (pfo_ret__) \
|
||||
: constraint); \
|
||||
: "m" (var)); \
|
||||
break; \
|
||||
case 2: \
|
||||
asm(op "w "__percpu_arg(1)",%0" \
|
||||
: "=r" (pfo_ret__) \
|
||||
: constraint); \
|
||||
: "m" (var)); \
|
||||
break; \
|
||||
case 4: \
|
||||
asm(op "l "__percpu_arg(1)",%0" \
|
||||
: "=r" (pfo_ret__) \
|
||||
: constraint); \
|
||||
: "m" (var)); \
|
||||
break; \
|
||||
case 8: \
|
||||
asm(op "q "__percpu_arg(1)",%0" \
|
||||
: "=r" (pfo_ret__) \
|
||||
: constraint); \
|
||||
: "m" (var)); \
|
||||
break; \
|
||||
default: __bad_percpu_size(); \
|
||||
} \
|
||||
pfo_ret__; \
|
||||
})
|
||||
|
||||
#define percpu_stable_op(op, var) \
|
||||
({ \
|
||||
typeof(var) pfo_ret__; \
|
||||
switch (sizeof(var)) { \
|
||||
case 1: \
|
||||
asm(op "b "__percpu_arg(P1)",%0" \
|
||||
: "=q" (pfo_ret__) \
|
||||
: "p" (&(var))); \
|
||||
break; \
|
||||
case 2: \
|
||||
asm(op "w "__percpu_arg(P1)",%0" \
|
||||
: "=r" (pfo_ret__) \
|
||||
: "p" (&(var))); \
|
||||
break; \
|
||||
case 4: \
|
||||
asm(op "l "__percpu_arg(P1)",%0" \
|
||||
: "=r" (pfo_ret__) \
|
||||
: "p" (&(var))); \
|
||||
break; \
|
||||
case 8: \
|
||||
asm(op "q "__percpu_arg(P1)",%0" \
|
||||
: "=r" (pfo_ret__) \
|
||||
: "p" (&(var))); \
|
||||
break; \
|
||||
default: __bad_percpu_size(); \
|
||||
} \
|
||||
|
@ -359,11 +388,11 @@ do { \
|
|||
* per-thread variables implemented as per-cpu variables and thus
|
||||
* stable for the duration of the respective task.
|
||||
*/
|
||||
#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
|
||||
#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
|
||||
|
||||
#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
|
||||
#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
|
||||
#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
|
||||
|
||||
#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
|
||||
#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
|
||||
|
@ -381,9 +410,9 @@ do { \
|
|||
#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
|
||||
#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
|
||||
|
||||
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
|
||||
#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
|
||||
#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
|
||||
#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
|
||||
#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
|
||||
#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
|
||||
|
@ -435,7 +464,7 @@ do { \
|
|||
* 32 bit must fall back to generic operations.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
|
||||
#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
|
||||
#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
|
||||
#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
|
||||
|
@ -444,7 +473,7 @@ do { \
|
|||
#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
|
||||
#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
||||
|
||||
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
||||
#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
|
||||
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
|
||||
#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
|
||||
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
|
||||
|
|
|
@ -186,6 +186,8 @@ SECTIONS
|
|||
* start another segment - init.
|
||||
*/
|
||||
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
|
||||
ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
|
||||
"per-CPU data too large - increase CONFIG_PHYSICAL_START")
|
||||
#endif
|
||||
|
||||
INIT_TEXT_SECTION(PAGE_SIZE)
|
||||
|
|
Loading…
Reference in a new issue