From a5d5f7daa744b34477c4a12728bde0a1694a1707 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 12 Jul 2012 23:57:35 +0100
Subject: [PATCH 01/10] ARM: 7465/1: Handle >4GB memory sizes in device tree
 and mem=size@start option

The memory regions which are passed to arm_add_memory() from
device tree blobs via early_init_dt_add_memory_arch() can
have sizes which are larger than will fit in a 32 bit integer,
so switch to using a phys_addr_t to hold them, to avoid
silently dropping the top 32 bits of the size. Similarly, use
phys_addr_t in early_mem() so that mem=size@start command line
options specifying more than 4GB behave sensibly.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/setup.h | 4 ++--
 arch/arm/kernel/setup.c      | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index 23ebc0c82a39..24d284a1bfc7 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -196,7 +196,7 @@ static const struct tagtable __tagtable_##fn __tag = { tag, fn }
 
 struct membank {
 	phys_addr_t start;
-	unsigned long size;
+	phys_addr_t size;
 	unsigned int highmem;
 };
 
@@ -217,7 +217,7 @@ extern struct meminfo meminfo;
 #define bank_phys_end(bank)	((bank)->start + (bank)->size)
 #define bank_phys_size(bank)	(bank)->size
 
-extern int arm_add_memory(phys_addr_t start, unsigned long size);
+extern int arm_add_memory(phys_addr_t start, phys_addr_t size);
 extern void early_print(const char *str, ...);
 extern void dump_machine_table(void);
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index e15d83bb4ea3..a81dcecc7343 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -508,7 +508,7 @@ void __init dump_machine_table(void)
 		/* can't use cpu_relax() here as it may require MMU setup */;
 }
 
-int __init arm_add_memory(phys_addr_t start, unsigned long size)
+int __init arm_add_memory(phys_addr_t start, phys_addr_t size)
 {
 	struct membank *bank = &meminfo.bank[meminfo.nr_banks];
 
@@ -538,7 +538,7 @@ int __init arm_add_memory(phys_addr_t start, unsigned long size)
 	}
 #endif
 
-	bank->size = size & PAGE_MASK;
+	bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1);
 
 	/*
 	 * Check whether this memory region has non-zero size or
@@ -558,7 +558,7 @@ int __init arm_add_memory(phys_addr_t start, unsigned long size)
 static int __init early_mem(char *p)
 {
 	static int usermem __initdata = 0;
-	unsigned long size;
+	phys_addr_t size;
 	phys_addr_t start;
 	char *endp;
 

From 98bd8b96b26db3399a48202318dca4aaa2515355 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Fri, 13 Jul 2012 08:19:34 +0100
Subject: [PATCH 02/10] ARM: 7466/1: disable interrupt before spinning
 endlessly

The CPU will endlessly spin at the end of machine_halt and
machine_restart calls.  However, this will lead to a soft lockup
warning after about 20 seconds, if CONFIG_LOCKUP_DETECTOR is enabled,
as system timer is still alive.

Disable interrupt before going to spin endlessly, so that the lockup
warning will never be seen.

Cc: <stable@vger.kernel.org>
Reported-by: Marek Vasut <marex@denx.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 19c95ea65b2f..693b744fd572 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -247,6 +247,7 @@ void machine_shutdown(void)
 void machine_halt(void)
 {
 	machine_shutdown();
+	local_irq_disable();
 	while (1);
 }
 
@@ -268,6 +269,7 @@ void machine_restart(char *cmd)
 
 	/* Whoops - the platform was unable to reboot. Tell the user! */
 	printk("Reboot failed -- System halted\n");
+	local_irq_disable();
 	while (1);
 }
 

From a76d7bd96d65fa5119adba97e1b58d95f2e78829 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 13 Jul 2012 19:15:40 +0100
Subject: [PATCH 03/10] ARM: 7467/1: mutex: use generic xchg-based
 implementation for ARMv6+

The open-coded mutex implementation for ARMv6+ cores suffers from a
severe lack of barriers, so in the uncontended case we don't actually
protect any accesses performed during the critical section.

Furthermore, the code is largely a duplication of the ARMv6+ atomic_dec
code but optimised to remove a branch instruction, as the mutex fastpath
was previously inlined. Now that this is executed out-of-line, we can
reuse the atomic access code for the locking (in fact, we use the xchg
code as this produces shorter critical sections).

This patch uses the generic xchg based implementation for mutexes on
ARMv6+, which introduces barriers to the lock/unlock operations and also
has the benefit of removing a fair amount of inline assembly code.

Cc: <stable@vger.kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nicolas Pitre <nico@linaro.org>
Reported-by: Shan Kang <kangshan0910@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mutex.h | 119 ++---------------------------------
 1 file changed, 4 insertions(+), 115 deletions(-)

diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h
index 93226cf23ae0..b1479fd04a95 100644
--- a/arch/arm/include/asm/mutex.h
+++ b/arch/arm/include/asm/mutex.h
@@ -7,121 +7,10 @@
  */
 #ifndef _ASM_MUTEX_H
 #define _ASM_MUTEX_H
-
-#if __LINUX_ARM_ARCH__ < 6
-/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
-# include <asm-generic/mutex-xchg.h>
-#else
-
 /*
- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
- * atomic decrement (it is not a reliable atomic decrement but it satisfies
- * the defined semantics for our purpose, while being smaller and faster
- * than a real atomic decrement or atomic swap.  The idea is to attempt
- * decrementing the lock value only once.  If once decremented it isn't zero,
- * or if its store-back fails due to a dispute on the exclusive store, we
- * simply bail out immediately through the slow path where the lock will be
- * reattempted until it succeeds.
+ * On pre-ARMv6 hardware this results in a swp-based implementation,
+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive
+ * accesses instead.
  */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		fail_fn(count);
-}
-
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		__res = fail_fn(count);
-	return __res;
-}
-
-/*
- * Same trick is used for the unlock fast path. However the original value,
- * rather than the result, is used to test for success in order to have
- * better generated assembly.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"ldrex	%0, [%3]	\n\t"
-		"add	%1, %0, #1	\n\t"
-		"strex	%2, %1, [%3]	"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__orig |= __ex_flag;
-	if (unlikely(__orig != 0))
-		fail_fn(count);
-}
-
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock()	1
-
-/*
- * For __mutex_fastpath_trylock we use another construct which could be
- * described as a "single value cmpxchg".
- *
- * This provides the needed trylock semantics like cmpxchg would, but it is
- * lighter and less generic than a true cmpxchg implementation.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"1: ldrex	%0, [%3]	\n\t"
-		"subs		%1, %0, #1	\n\t"
-		"strexeq	%2, %1, [%3]	\n\t"
-		"movlt		%0, #0		\n\t"
-		"cmpeq		%2, #0		\n\t"
-		"bgt		1b		"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&count->counter)
-		: "cc", "memory" );
-
-	return __orig;
-}
-
-#endif
+#include <asm-generic/mutex-xchg.h>
 #endif

From 4c36595ec87115f2f876f7d4fdec8ca284a42b9c Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Wed, 18 Jul 2012 19:15:25 +0100
Subject: [PATCH 04/10] ARM: 7468/1: ftrace: Trace function entry before
 updating index

Commit 722b3c74695377d11d18a52f3da08114d37f3f37 modified x86 ftrace to
avoid tracing all functions called from irqs when function graph was
used with a filter.  Port the same fix to ARM.

Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/ftrace.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index df0bf0c8cb79..34e56647dcee 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -179,20 +179,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	old = *parent;
 	*parent = return_hooker;
 
+	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		*parent = old;
+		return;
+	}
+
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
 				       frame_pointer);
 	if (err == -EBUSY) {
 		*parent = old;
 		return;
 	}
-
-	trace.func = self_addr;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		current->curr_ret_stack--;
-		*parent = old;
-	}
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE

From a84b895a2348f0dbff31b71ddf954f70a6cde368 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Fri, 20 Jul 2012 02:03:43 +0100
Subject: [PATCH 05/10] ARM: 7476/1: vfp: only clear vfp state for current cpu
 in vfp_pm_suspend

vfp_pm_suspend runs on each cpu, only clear the hardware state
pointer for the current cpu.  Prevents a possible crash if one
cpu clears the hw state pointer when another cpu has already
checked if it is valid.

Cc: stable@vger.kernel.org
Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/vfp/vfpmodule.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 586961929e96..9428006728f2 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -460,7 +460,7 @@ static int vfp_pm_suspend(void)
 	}
 
 	/* clear any information we had about last context state */
-	memset(vfp_current_hw_state, 0, sizeof(vfp_current_hw_state));
+	vfp_current_hw_state[ti->cpu] = NULL;
 
 	return 0;
 }

From 24b35521b8ddf088531258f06f681bb7b227bf47 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Fri, 20 Jul 2012 02:03:42 +0100
Subject: [PATCH 06/10] ARM: 7477/1: vfp: Always save VFP state in
 vfp_pm_suspend on UP

vfp_pm_suspend should save the VFP state in suspend after
any lazy context switch.  If it only saves when the VFP is enabled,
the state can get lost when, on a UP system:
  Thread 1 uses the VFP
  Context switch occurs to thread 2, VFP is disabled but the
     VFP context is not saved
  Thread 2 initiates suspend
  vfp_pm_suspend is called with the VFP disabled, and the unsaved
     VFP context of Thread 1 in the registers

Modify vfp_pm_suspend to save the VFP context whenever
vfp_current_hw_state is not NULL.

Includes a fix from Ido Yariv <ido@wizery.com>, who pointed out that on
SMP systems, the state pointer can be pointing to a freed task struct if
a task exited on another cpu, fixed by using #ifndef CONFIG_SMP in the
new if clause.

Cc: Barry Song <bs14@csr.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ido Yariv <ido@wizery.com>
Cc: Daniel Drake <dsd@laptop.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/vfp/vfpmodule.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 9428006728f2..fb849d044bde 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -457,6 +457,12 @@ static int vfp_pm_suspend(void)
 
 		/* disable, just in case */
 		fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
+	} else if (vfp_current_hw_state[ti->cpu]) {
+#ifndef CONFIG_SMP
+		fmxr(FPEXC, fpexc | FPEXC_EN);
+		vfp_save_state(vfp_current_hw_state[ti->cpu], fpexc);
+		fmxr(FPEXC, fpexc);
+#endif
 	}
 
 	/* clear any information we had about last context state */

From 5a783cbc48367cfc7b65afc75430953dfe60098f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 20 Jul 2012 18:24:55 +0100
Subject: [PATCH 07/10] ARM: 7478/1: errata: extend workaround for erratum
 #720789

Commit cdf357f1 ("ARM: 6299/1: errata: TLBIASIDIS and TLBIMVAIS
operations can broadcast a faulty ASID") replaced by-ASID TLB flushing
operations with all-ASID variants to workaround A9 erratum #720789.

This patch extends the workaround to include the tlb_range operations,
which were overlooked by the original patch.

Cc: <stable@vger.kernel.org>
Tested-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/tlb-v7.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 845f461f8ec1..c2021139cb56 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -38,11 +38,19 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	dsb
 	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
 	mov	r1, r1, lsr #PAGE_SHIFT
+#ifdef CONFIG_ARM_ERRATA_720789
+	mov	r3, #0
+#else
 	asid	r3, r3				@ mask ASID
+#endif
 	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
 	mov	r1, r1, lsl #PAGE_SHIFT
 1:
+#ifdef CONFIG_ARM_ERRATA_720789
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
+#else
 	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
+#endif
 	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
 
 	add	r0, r0, #PAGE_SZ
@@ -67,7 +75,11 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	mov	r0, r0, lsl #PAGE_SHIFT
 	mov	r1, r1, lsl #PAGE_SHIFT
 1:
+#ifdef CONFIG_ARM_ERRATA_720789
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
+#else
 	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
+#endif
 	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1

From c5dff4ffd327088d85035bec535b7d0c9ea03151 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@dowhile0.org>
Date: Sat, 28 Jul 2012 15:19:55 +0100
Subject: [PATCH 08/10] ARM: 7480/1: only call smp_send_stop() on SMP

On reboot or poweroff (machine_shutdown()) a call to smp_send_stop() is
made (to stop the others CPU's) when CONFIG_SMP=y.

arch/arm/kernel/process.c:

void machine_shutdown(void)
{
 #ifdef CONFIG_SMP
       smp_send_stop();
 #endif
}

smp_send_stop() calls the function pointer smp_cross_call(), which is set
on the smp_init_cpus() function for OMAP processors.

arch/arm/mach-omap2/omap-smp.c:

void __init smp_init_cpus(void)
{
...
	set_smp_cross_call(gic_raise_softirq);
...
}

But the ARM setup_arch() function only calls smp_init_cpus()
if CONFIG_SMP=y && is_smp().

arm/kernel/setup.c:

void __init setup_arch(char **cmdline_p)
{
...
 #ifdef CONFIG_SMP
	if (is_smp())
		smp_init_cpus();
 #endif
...
}

Newer OMAP CPU's are SMP machines so omap2plus_defconfig sets
CONFIG_SMP=y. Unfortunately on an OMAP UP machine is_smp()
returns false and smp_init_cpus() is never called and the
smp_cross_call() function remains NULL.

If the machine is rebooted or powered off, smp_send_stop() will
be called (since CONFIG_SMP=y) leading to the following error:

[   42.815551] Restarting system.
[   42.819030] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[   42.827667] pgd = d7a74000
[   42.830566] [00000000] *pgd=96ce7831, *pte=00000000, *ppte=00000000
[   42.837249] Internal error: Oops: 80000007 [#1] SMP ARM
[   42.842773] Modules linked in:
[   42.846008] CPU: 0    Not tainted  (3.5.0-rc3-next-20120622-00002-g62e87ba-dirty #44)
[   42.854278] PC is at 0x0
[   42.856994] LR is at smp_send_stop+0x4c/0xe4
[   42.861511] pc : [<00000000>]    lr : [<c00183a4>]    psr: 60000013
[   42.861511] sp : d6c85e70  ip : 00000000  fp : 00000000
[   42.873626] r10: 00000000  r9 : d6c84000  r8 : 00000002
[   42.879150] r7 : c07235a0  r6 : c06dd2d0  r5 : 000f4241  r4 : d6c85e74
[   42.886047] r3 : 00000000  r2 : 00000000  r1 : 00000006  r0 : d6c85e74
[   42.892944] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
[   42.900482] Control: 10c5387d  Table: 97a74019  DAC: 00000015
[   42.906555] Process reboot (pid: 1166, stack limit = 0xd6c842f8)
[   42.912902] Stack: (0xd6c85e70 to 0xd6c86000)
[   42.917510] 5e60:                                     c07235a0 00000000 00000000 d6c84000
[   42.926177] 5e80: 01234567 c00143d0 4321fedc c00511bc d6c85ebc 00000168 00000460 00000000
[   42.934814] 5ea0: c1017950 a0000013 c1017900 d8014390 d7ec3858 c0498e48 c1017950 00000000
[   42.943481] 5ec0: d6ddde10 d6c85f78 00000003 00000000 d6ddde10 d6c84000 00000000 00000000
[   42.952117] 5ee0: 00000002 00000000 00000000 c0088c88 00000002 00000000 00000000 c00f4b90
[   42.960784] 5f00: 00000000 d6c85ebc d8014390 d7e311c8 60000013 00000103 00000002 d6c84000
[   42.969421] 5f20: c00f3274 d6e00a00 00000001 60000013 d6c84000 00000000 00000000 c00895d4
[   42.978057] 5f40: 00000002 d8007c80 d781f000 c00f6150 d8010cc0 c00f3274 d781f000 d6c84000
[   42.986694] 5f60: c0013020 d6e00a00 00000001 20000010 0001257c ef000000 00000000 c00895d4
[   42.995361] 5f80: 00000002 00000001 00000003 00000000 00000001 00000003 00000000 00000058
[   43.003997] 5fa0: c00130c8 c0012f00 00000001 00000003 fee1dead 28121969 01234567 00000002
[   43.012634] 5fc0: 00000001 00000003 00000000 00000058 00012584 0001257c 00000001 00000000
[   43.021270] 5fe0: 000124bc bec5cc6c 00008f9c 4a2f7c40 20000010 fee1dead 00000000 00000000
[   43.029968] [<c00183a4>] (smp_send_stop+0x4c/0xe4) from [<c00143d0>] (machine_restart+0xc/0x4c)
[   43.039154] [<c00143d0>] (machine_restart+0xc/0x4c) from [<c00511bc>] (sys_reboot+0x144/0x1f0)
[   43.048278] [<c00511bc>] (sys_reboot+0x144/0x1f0) from [<c0012f00>] (ret_fast_syscall+0x0/0x3c)
[   43.057464] Code: bad PC value
[   43.060760] ---[ end trace c3988d1dd0b8f0fb ]---

Add a check so smp_cross_call() is only called when there is more than one CPU on-line.

Cc: <stable@vger.kernel.org>
Signed-off-by: Javier Martinez Canillas <javier at dowhile0.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index aea74f5bc34a..ebd8ad274d76 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -563,7 +563,8 @@ void smp_send_stop(void)
 
 	cpumask_copy(&mask, cpu_online_mask);
 	cpumask_clear_cpu(smp_processor_id(), &mask);
-	smp_cross_call(&mask, IPI_CPU_STOP);
+	if (!cpumask_empty(&mask))
+		smp_cross_call(&mask, IPI_CPU_STOP);
 
 	/* Wait up to one second for other CPUs to stop */
 	timeout = USEC_PER_SEC;

From 15ac49b65024f55c4371a53214879a9c77c4fbf9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 30 Jul 2012 19:42:10 +0100
Subject: [PATCH 09/10] ARM: Fix undefined instruction exception handling

While trying to get a v3.5 kernel booted on the cubox, I noticed that
VFP does not work correctly with VFP bounce handling.  This is because
of the confusion over 16-bit vs 32-bit instructions, and where PC is
supposed to point to.

The rule is that FP handlers are entered with regs->ARM_pc pointing at
the _next_ instruction to be executed.  However, if the exception is
not handled, regs->ARM_pc points at the faulting instruction.

This is easy for ARM mode, because we know that the next instruction and
previous instructions are separated by four bytes.  This is not true of
Thumb2 though.

Since all FP instructions are 32-bit in Thumb2, it makes things easy.
We just need to select the appropriate adjustment.  Do this by moving
the adjustment out of do_undefinstr() into the assembly code, as only
the assembly code knows whether it's dealing with a 32-bit or 16-bit
instruction.

Cc: <stable@vger.kernel.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-armv.S | 111 +++++++++++++++++++++++------------
 arch/arm/kernel/traps.c      |   8 ---
 arch/arm/vfp/entry.S         |  16 ++---
 arch/arm/vfp/vfphw.S         |  19 +++---
 4 files changed, 92 insertions(+), 62 deletions(-)

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0d1851ca6eb9..0f82098c9bfe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -244,6 +244,19 @@ svc_preempt:
 	b	1b
 #endif
 
+__und_fault:
+	@ Correct the PC such that it is pointing at the instruction
+	@ which caused the fault.  If the faulting instruction was ARM
+	@ the PC will be pointing at the next instruction, and have to
+	@ subtract 4.  Otherwise, it is Thumb, and the PC will be
+	@ pointing at the second half of the Thumb instruction.  We
+	@ have to subtract 2.
+	ldr	r2, [r0, #S_PC]
+	sub	r2, r2, r1
+	str	r2, [r0, #S_PC]
+	b	do_undefinstr
+ENDPROC(__und_fault)
+
 	.align	5
 __und_svc:
 #ifdef CONFIG_KPROBES
@@ -261,25 +274,32 @@ __und_svc:
 	@
 	@  r0 - instruction
 	@
-#ifndef	CONFIG_THUMB2_KERNEL
+#ifndef CONFIG_THUMB2_KERNEL
 	ldr	r0, [r4, #-4]
 #else
+	mov	r1, #2
 	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
 	cmp	r0, #0xe800			@ 32-bit instruction if xx >= 0
-	ldrhhs	r9, [r4]			@ bottom 16 bits
-	orrhs	r0, r9, r0, lsl #16
+	blo	__und_svc_fault
+	ldrh	r9, [r4]			@ bottom 16 bits
+	add	r4, r4, #2
+	str	r4, [sp, #S_PC]
+	orr	r0, r9, r0, lsl #16
 #endif
-	adr	r9, BSYM(1f)
+	adr	r9, BSYM(__und_svc_finish)
 	mov	r2, r4
 	bl	call_fpe
 
+	mov	r1, #4				@ PC correction to apply
+__und_svc_fault:
 	mov	r0, sp				@ struct pt_regs *regs
-	bl	do_undefinstr
+	bl	__und_fault
 
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@
-1:	disable_irq_notrace
+__und_svc_finish:
+	disable_irq_notrace
 
 	@
 	@ restore SPSR and restart the instruction
@@ -423,25 +443,33 @@ __und_usr:
 	mov	r2, r4
 	mov	r3, r5
 
+	@ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
+	@      faulting instruction depending on Thumb mode.
+	@ r3 = regs->ARM_cpsr
 	@
-	@ fall through to the emulation code, which returns using r9 if
-	@ it has emulated the instruction, or the more conventional lr
-	@ if we are to treat this as a real undefined instruction
-	@
-	@  r0 - instruction
+	@ The emulation code returns using r9 if it has emulated the
+	@ instruction, or the more conventional lr if we are to treat
+	@ this as a real undefined instruction
 	@
 	adr	r9, BSYM(ret_from_exception)
-	adr	lr, BSYM(__und_usr_unknown)
+
 	tst	r3, #PSR_T_BIT			@ Thumb mode?
-	itet	eq				@ explicit IT needed for the 1f label
-	subeq	r4, r2, #4			@ ARM instr at LR - 4
-	subne	r4, r2, #2			@ Thumb instr at LR - 2
-1:	ldreqt	r0, [r4]
+	bne	__und_usr_thumb
+	sub	r4, r2, #4			@ ARM instr at LR - 4
+1:	ldrt	r0, [r4]
 #ifdef CONFIG_CPU_ENDIAN_BE8
-	reveq	r0, r0				@ little endian instruction
+	rev	r0, r0				@ little endian instruction
 #endif
-	beq	call_fpe
+	@ r0 = 32-bit ARM instruction which caused the exception
+	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
+	@ r4 = PC value for the faulting instruction
+	@ lr = 32-bit undefined instruction function
+	adr	lr, BSYM(__und_usr_fault_32)
+	b	call_fpe
+
+__und_usr_thumb:
 	@ Thumb instruction
+	sub	r4, r2, #2			@ First half of thumb instr at LR - 2
 #if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
 /*
  * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
@@ -455,7 +483,7 @@ __und_usr:
 	ldr	r5, .LCcpu_architecture
 	ldr	r5, [r5]
 	cmp	r5, #CPU_ARCH_ARMv7
-	blo	__und_usr_unknown
+	blo	__und_usr_fault_16		@ 16bit undefined instruction
 /*
  * The following code won't get run unless the running CPU really is v7, so
  * coding round the lack of ldrht on older arches is pointless.  Temporarily
@@ -463,15 +491,18 @@ __und_usr:
  */
 	.arch	armv6t2
 #endif
-2:
- ARM(	ldrht	r5, [r4], #2	)
- THUMB(	ldrht	r5, [r4]	)
- THUMB(	add	r4, r4, #2	)
+2:	ldrht	r5, [r4]
 	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
-	blo	__und_usr_unknown
-3:	ldrht	r0, [r4]
+	blo	__und_usr_fault_16		@ 16bit undefined instruction
+3:	ldrht	r0, [r2]
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
+	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
 	orr	r0, r0, r5, lsl #16
+	adr	lr, BSYM(__und_usr_fault_32)
+	@ r0 = the two 16-bit Thumb instructions which caused the exception
+	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
+	@ r4 = PC value for the first 16-bit Thumb instruction
+	@ lr = 32bit undefined instruction function
 
 #if __LINUX_ARM_ARCH__ < 7
 /* If the target arch was overridden, change it back: */
@@ -482,17 +513,13 @@ __und_usr:
 #endif
 #endif /* __LINUX_ARM_ARCH__ < 7 */
 #else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
-	b	__und_usr_unknown
+	b	__und_usr_fault_16
 #endif
- UNWIND(.fnend		)
+ UNWIND(.fnend)
 ENDPROC(__und_usr)
 
-	@
-	@ fallthrough to call_fpe
-	@
-
 /*
- * The out of line fixup for the ldrt above.
+ * The out of line fixup for the ldrt instructions above.
  */
 	.pushsection .fixup, "ax"
 	.align	2
@@ -524,11 +551,12 @@ ENDPROC(__und_usr)
  * NEON handler code.
  *
  * Emulators may wish to make use of the following registers:
- *  r0  = instruction opcode.
- *  r2  = PC+4
+ *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
+ *  r2  = PC value to resume execution after successful emulation
  *  r9  = normal "successful" return address
- *  r10 = this threads thread_info structure.
+ *  r10 = this threads thread_info structure
  *  lr  = unrecognised instruction return address
+ * IRQs disabled, FIQs enabled.
  */
 	@
 	@ Fall-through from Thumb-2 __und_usr
@@ -659,12 +687,17 @@ ENTRY(no_fp)
 	mov	pc, lr
 ENDPROC(no_fp)
 
-__und_usr_unknown:
-	enable_irq
+__und_usr_fault_32:
+	mov	r1, #4
+	b	1f
+__und_usr_fault_16:
+	mov	r1, #2
+1:	enable_irq
 	mov	r0, sp
 	adr	lr, BSYM(ret_from_exception)
-	b	do_undefinstr
-ENDPROC(__und_usr_unknown)
+	b	__und_fault
+ENDPROC(__und_usr_fault_32)
+ENDPROC(__und_usr_fault_16)
 
 	.align	5
 __pabt_usr:
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3647170e9a16..c7cae6b9a4d9 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -370,18 +370,10 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
 
 asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 {
-	unsigned int correction = thumb_mode(regs) ? 2 : 4;
 	unsigned int instr;
 	siginfo_t info;
 	void __user *pc;
 
-	/*
-	 * According to the ARM ARM, PC is 2 or 4 bytes ahead,
-	 * depending whether we're in Thumb mode or not.
-	 * Correct this offset.
-	 */
-	regs->ARM_pc -= correction;
-
 	pc = (void __user *)instruction_pointer(regs);
 
 	if (processor_mode(regs) == SVC_MODE) {
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index 4fa9903b83cf..cc926c985981 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -7,18 +7,20 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Basic entry code, called from the kernel's undefined instruction trap.
- *  r0  = faulted instruction
- *  r5  = faulted PC+4
- *  r9  = successful return
- *  r10 = thread_info structure
- *  lr  = failure return
  */
 #include <asm/thread_info.h>
 #include <asm/vfpmacros.h>
 #include "../kernel/entry-header.S"
 
+@ VFP entry point.
+@
+@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
+@  r2  = PC value to resume execution after successful emulation
+@  r9  = normal "successful" return address
+@  r10 = this threads thread_info structure
+@  lr  = unrecognised instruction return address
+@  IRQs disabled.
+@
 ENTRY(do_vfp)
 #ifdef CONFIG_PREEMPT
 	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 2d30c7f6edd3..3a0efaad6090 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -61,13 +61,13 @@
 
 @ VFP hardware support entry point.
 @
-@  r0  = faulted instruction
-@  r2  = faulted PC+4
-@  r9  = successful return
+@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
+@  r2  = PC value to resume execution after successful emulation
+@  r9  = normal "successful" return address
 @  r10 = vfp_state union
 @  r11 = CPU number
-@  lr  = failure return
-
+@  lr  = unrecognised instruction return address
+@  IRQs enabled.
 ENTRY(vfp_support_entry)
 	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
 
@@ -161,9 +161,12 @@ vfp_hw_state_valid:
 					@ exception before retrying branch
 					@ out before setting an FPEXC that
 					@ stops us reading stuff
-	VFPFMXR	FPEXC, r1		@ restore FPEXC last
-	sub	r2, r2, #4
-	str	r2, [sp, #S_PC]		@ retry the instruction
+	VFPFMXR	FPEXC, r1		@ Restore FPEXC last
+	sub	r2, r2, #4		@ Retry current instruction - if Thumb
+	str	r2, [sp, #S_PC]		@ mode it's two 16-bit instructions,
+					@ else it's one 32-bit instruction, so
+					@ always subtract 4 from the following
+					@ instruction address.
 #ifdef CONFIG_PREEMPT
 	get_thread_info	r10
 	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count

From b74253f78400f9a4b42da84bb1de7540b88ce7c4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 23 Jul 2012 14:18:13 +0100
Subject: [PATCH 10/10] ARM: 7479/1: mm: avoid NULL dereference when flushing
 gate_vma with VIVT caches

The vivt_flush_cache_{range,page} functions check that the mm_struct
of the VMA being flushed has been active on the current CPU before
performing the cache maintenance.

The gate_vma has a NULL mm_struct pointer and, as such, will cause a
kernel fault if we try to flush it with the above operations. This
happens during ELF core dumps, which include the gate_vma as it may be
useful for debugging purposes.

This patch adds checks to the VIVT cache flushing functions so that VMAs
with a NULL mm_struct are flushed unconditionally (the vectors page may
be dirty if we use it to store the current TLS pointer).

Cc: <stable@vger.kernel.org> # 3.4+
Reported-by: Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
Tested-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 004c1bc95d2b..e4448e16046d 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -215,7 +215,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm)
 static inline void
 vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
 		__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
 					vma->vm_flags);
 }
@@ -223,7 +225,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
 static inline void
 vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 		unsigned long addr = user_addr & PAGE_MASK;
 		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
 	}