From 4ae803253e4649803dc6e972bb5e59fc107cf474 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 17 Sep 2013 09:48:44 +0200
Subject: [PATCH] s390/bitops: optimize set_bit() for constant values

Since zEC12 we have the interlocked-access facility 2 which allows to
use the instructions ni/oi/xi to update a single byte in storage with
compare-and-swap semantics.
So change set_bit(), clear_bit() and change_bit() to generate such code
instead of a compare-and-swap loop (or using the load-and-* instruction
family), if possible.
This reduces the text segment by yet another 8KB (defconfig).

Alternatively the long displacement variants niy/oiy/xiy could have
been used, but the extended displacement field is usually not needed
and therefore would only increase the size of the text segment again.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/bitops.h | 36 ++++++++++++++++++++++++++++++++++
 arch/s390/kernel/head.S        |  2 +-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 6038349c8410..16df62dde094 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -151,6 +151,18 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
 	unsigned long *addr = __bitops_word(nr, ptr);
 	unsigned long mask;
 
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+	if (__builtin_constant_p(nr)) {
+		unsigned char *caddr = __bitops_byte(nr, ptr);
+
+		asm volatile(
+			"oi	%0,%b1\n"
+			: "+Q" (*caddr)
+			: "i" (1 << (nr & 7))
+			: "cc");
+		return;
+	}
+#endif
 	mask = 1UL << (nr & (BITS_PER_LONG - 1));
 	__BITOPS_LOOP(addr, mask, __BITOPS_OR);
 }
@@ -160,6 +172,18 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
 	unsigned long *addr = __bitops_word(nr, ptr);
 	unsigned long mask;
 
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+	if (__builtin_constant_p(nr)) {
+		unsigned char *caddr = __bitops_byte(nr, ptr);
+
+		asm volatile(
+			"ni	%0,%b1\n"
+			: "+Q" (*caddr)
+			: "i" (~(1 << (nr & 7)))
+			: "cc");
+		return;
+	}
+#endif
 	mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
 	__BITOPS_LOOP(addr, mask, __BITOPS_AND);
 }
@@ -169,6 +193,18 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
 	unsigned long *addr = __bitops_word(nr, ptr);
 	unsigned long mask;
 
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+	if (__builtin_constant_p(nr)) {
+		unsigned char *caddr = __bitops_byte(nr, ptr);
+
+		asm volatile(
+			"xi	%0,%b1\n"
+			: "+Q" (*caddr)
+			: "i" (1 << (nr & 7))
+			: "cc");
+		return;
+	}
+#endif
 	mask = 1UL << (nr & (BITS_PER_LONG - 1));
 	__BITOPS_LOOP(addr, mask, __BITOPS_XOR);
 }
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index fd8db63dfc94..429afcc480cb 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -437,7 +437,7 @@ ENTRY(startup_kdump)
 
 #if defined(CONFIG_64BIT)
 #if defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100efe3, 0xf46ce000, 0x00400000
+	.long 3, 0xc100efe3, 0xf46ce800, 0x00400000
 #elif defined(CONFIG_MARCH_Z196)
 	.long 2, 0xc100efe3, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)