s390/percpu: make use of interlocked-access facility 1 instructions
Optimize this_cpu_* functions for 64 bit by making use of new instructions that came with the interlocked-access facility 1 (load-and-*) and the general-instructions-extension facility (asi, agsi). That way we get rid of the compare-and-swap loop in most cases. Code size reduction (defconfig, -march=z196): 11,555 bytes. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
parent
0702fbf572
commit
f84cd97e5c
1 changed files with 96 additions and 21 deletions
|
@ -21,7 +21,11 @@
|
||||||
#define ARCH_NEEDS_WEAK_PER_CPU
|
#define ARCH_NEEDS_WEAK_PER_CPU
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define arch_this_cpu_to_op(pcp, val, op) \
|
/*
|
||||||
|
* We use a compare-and-swap loop since that uses less cpu cycles than
|
||||||
|
* disabling and enabling interrupts like the generic variant would do.
|
||||||
|
*/
|
||||||
|
#define arch_this_cpu_to_op_simple(pcp, val, op) \
|
||||||
({ \
|
({ \
|
||||||
typedef typeof(pcp) pcp_op_T__; \
|
typedef typeof(pcp) pcp_op_T__; \
|
||||||
pcp_op_T__ old__, new__, prev__; \
|
pcp_op_T__ old__, new__, prev__; \
|
||||||
|
@ -38,30 +42,101 @@
|
||||||
new__; \
|
new__; \
|
||||||
})
|
})
|
||||||
|
|
||||||
#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
|
#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
|
||||||
|
#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
|
||||||
|
#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
|
||||||
|
#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
|
||||||
|
#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
|
||||||
|
#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
|
||||||
|
|
||||||
#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
|
||||||
#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
|
||||||
#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
|
||||||
#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
|
|
||||||
|
|
||||||
#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &)
|
#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &)
|
#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &)
|
#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &)
|
#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
|
||||||
|
#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
|
||||||
|
#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
|
||||||
|
#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
|
||||||
|
#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
|
||||||
|
#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
|
||||||
|
#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, ^)
|
||||||
|
|
||||||
#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |)
|
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
|
||||||
#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |)
|
|
||||||
#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |)
|
|
||||||
#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |)
|
|
||||||
|
|
||||||
#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
|
#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \
|
||||||
#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
|
{ \
|
||||||
#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
|
typedef typeof(pcp) pcp_op_T__; \
|
||||||
#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
|
pcp_op_T__ val__ = (val); \
|
||||||
|
pcp_op_T__ old__, *ptr__; \
|
||||||
|
preempt_disable(); \
|
||||||
|
ptr__ = __this_cpu_ptr(&(pcp)); \
|
||||||
|
if (__builtin_constant_p(val__) && \
|
||||||
|
((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
|
||||||
|
asm volatile( \
|
||||||
|
op2 " %[ptr__],%[val__]\n" \
|
||||||
|
: [ptr__] "+Q" (*ptr__) \
|
||||||
|
: [val__] "i" ((szcast)val__) \
|
||||||
|
: "cc"); \
|
||||||
|
} else { \
|
||||||
|
asm volatile( \
|
||||||
|
op1 " %[old__],%[val__],%[ptr__]\n" \
|
||||||
|
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
|
||||||
|
: [val__] "d" (val__) \
|
||||||
|
: "cc"); \
|
||||||
|
} \
|
||||||
|
preempt_enable(); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
|
||||||
|
#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
|
||||||
|
|
||||||
|
#define arch_this_cpu_add_return(pcp, val, op) \
|
||||||
|
({ \
|
||||||
|
typedef typeof(pcp) pcp_op_T__; \
|
||||||
|
pcp_op_T__ val__ = (val); \
|
||||||
|
pcp_op_T__ old__, *ptr__; \
|
||||||
|
preempt_disable(); \
|
||||||
|
ptr__ = __this_cpu_ptr(&(pcp)); \
|
||||||
|
asm volatile( \
|
||||||
|
op " %[old__],%[val__],%[ptr__]\n" \
|
||||||
|
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
|
||||||
|
: [val__] "d" (val__) \
|
||||||
|
: "cc"); \
|
||||||
|
preempt_enable(); \
|
||||||
|
old__ + val__; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
|
||||||
|
#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
|
||||||
|
|
||||||
|
#define arch_this_cpu_to_op(pcp, val, op) \
|
||||||
|
{ \
|
||||||
|
typedef typeof(pcp) pcp_op_T__; \
|
||||||
|
pcp_op_T__ val__ = (val); \
|
||||||
|
pcp_op_T__ old__, *ptr__; \
|
||||||
|
preempt_disable(); \
|
||||||
|
ptr__ = __this_cpu_ptr(&(pcp)); \
|
||||||
|
asm volatile( \
|
||||||
|
op " %[old__],%[val__],%[ptr__]\n" \
|
||||||
|
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
|
||||||
|
: [val__] "d" (val__) \
|
||||||
|
: "cc"); \
|
||||||
|
preempt_enable(); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan")
|
||||||
|
#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang")
|
||||||
|
#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao")
|
||||||
|
#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog")
|
||||||
|
#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lax")
|
||||||
|
#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laxg")
|
||||||
|
|
||||||
|
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
|
||||||
|
|
||||||
#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
|
#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
|
||||||
({ \
|
({ \
|
||||||
|
|
Loading…
Reference in a new issue