From 59ea746337c69f6a5f1bc4d5e8544b3cbf12f801 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 12 Jun 2008 13:56:40 +0200
Subject: [PATCH 001/142] MM: virtual address debug

Add some (configurable) expensive sanity checking to catch wrong address
translations on x86.

- create linux/mmdebug.h file to be able include this file in
  asm headers to not get unsolvable loops in header files
- __phys_addr on x86_32 became a function in ioremap.c since
  PAGE_OFFSET, is_vmalloc_addr and VMALLOC_* non-constasts are undefined
  if declared in page_32.h
- add __phys_addr_const for initializing doublefault_tss.__cr3

Tested on 386, 386pae, x86_64 and x86_64 numa=fake=2.

Contains Andi's enable numa virtual address debug patch.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/doublefault_32.c |  2 +-
 arch/x86/mm/ioremap.c            | 33 ++++++++++++++++++++++++--------
 include/asm-x86/mmzone_64.h      |  2 +-
 include/asm-x86/page_32.h        |  3 ++-
 include/linux/mm.h               |  7 +------
 include/linux/mmdebug.h          | 18 +++++++++++++++++
 lib/Kconfig.debug                |  9 +++++++++
 mm/vmalloc.c                     |  5 +++++
 8 files changed, 62 insertions(+), 17 deletions(-)
 create mode 100644 include/linux/mmdebug.h

diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index a47798b59f07..395acb12b0d1 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
 		.ds		= __USER_DS,
 		.fs		= __KERNEL_PERCPU,
 
-		.__cr3		= __pa(swapper_pg_dir)
+		.__cr3		= __phys_addr_const((unsigned long)swapper_pg_dir)
 	}
 };
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2b2bb3f9b683..a78ffef62a2b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -23,19 +23,27 @@
 
 #ifdef CONFIG_X86_64
 
-unsigned long __phys_addr(unsigned long x)
-{
-	if (x >= __START_KERNEL_map)
-		return x - __START_KERNEL_map + phys_base;
-	return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-
 static inline int phys_addr_valid(unsigned long addr)
 {
 	return addr < (1UL << boot_cpu_data.x86_phys_bits);
 }
 
+unsigned long __phys_addr(unsigned long x)
+{
+	if (x >= __START_KERNEL_map) {
+		x -= __START_KERNEL_map;
+		VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
+		x += phys_base;
+	} else {
+		VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+		x -= PAGE_OFFSET;
+		VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
+					!phys_addr_valid(x));
+	}
+	return x;
+}
+EXPORT_SYMBOL(__phys_addr);
+
 #else
 
 static inline int phys_addr_valid(unsigned long addr)
@@ -43,6 +51,15 @@ static inline int phys_addr_valid(unsigned long addr)
 	return 1;
 }
 
+unsigned long __phys_addr(unsigned long x)
+{
+	/* VMALLOC_* aren't constants; not available at the boot time */
+	VIRTUAL_BUG_ON(x < PAGE_OFFSET || (system_state != SYSTEM_BOOTING &&
+					is_vmalloc_addr((void *)x)));
+	return x - PAGE_OFFSET;
+}
+EXPORT_SYMBOL(__phys_addr);
+
 #endif
 
 int page_is_ram(unsigned long pagenr)
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 594bd0dc1d08..facde3e5314f 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -7,7 +7,7 @@
 
 #ifdef CONFIG_NUMA
 
-#define VIRTUAL_BUG_ON(x)
+#include <linux/mmdebug.h>
 
 #include <asm/smp.h>
 
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 424e82f8ae27..9159bfb9dcf9 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -64,7 +64,8 @@ typedef struct page *pgtable_t;
 #endif
 
 #ifndef __ASSEMBLY__
-#define __phys_addr(x)		((x) - PAGE_OFFSET)
+#define __phys_addr_const(x)	((x) - PAGE_OFFSET)
+extern unsigned long __phys_addr(unsigned long);
 #define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
 
 #ifdef CONFIG_FLATMEM
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 586a943cab01..3414a8813e97 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -7,6 +7,7 @@
 
 #include <linux/gfp.h>
 #include <linux/list.h>
+#include <linux/mmdebug.h>
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
 #include <linux/prio_tree.h>
@@ -210,12 +211,6 @@ struct inode;
  */
 #include <linux/page-flags.h>
 
-#ifdef CONFIG_DEBUG_VM
-#define VM_BUG_ON(cond) BUG_ON(cond)
-#else
-#define VM_BUG_ON(condition) do { } while(0)
-#endif
-
 /*
  * Methods to modify the page usage count.
  *
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
new file mode 100644
index 000000000000..860ed1a71bbe
--- /dev/null
+++ b/include/linux/mmdebug.h
@@ -0,0 +1,18 @@
+#ifndef LINUX_MM_DEBUG_H
+#define LINUX_MM_DEBUG_H 1
+
+#include <linux/autoconf.h>
+
+#ifdef CONFIG_DEBUG_VM
+#define VM_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VM_BUG_ON(cond) do { } while(0)
+#endif
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VIRTUAL_BUG_ON(cond) do { } while(0)
+#endif
+
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..9d9dc0ddf13a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -469,6 +469,15 @@ config DEBUG_VM
 
 	  If unsure, say N.
 
+config DEBUG_VIRTUAL
+	bool "Debug VM translations"
+	depends on DEBUG_KERNEL && X86
+	help
+	  Enable some costly sanity checks in virtual to page code. This can
+	  catch mistakes with virt_to_page() and friends.
+
+	  If unsure, say N.
+
 config DEBUG_WRITECOUNT
 	bool "Debug filesystem writers count"
 	depends on DEBUG_KERNEL
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6e45b0f3d125..dc41e9c8ca6f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -180,6 +180,11 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 
+	/* XXX we might need to change this if we add VIRTUAL_BUG_ON for
+	 * architectures that do not vmalloc module space */
+	VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
+			!is_module_address(addr));
+
 	if (!pgd_none(*pgd)) {
 		pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud)) {

From a1bf9631be7332ce0641e299ddafad2d8223100f Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 12 Jun 2008 13:56:40 +0200
Subject: [PATCH 002/142] x86, MM: virtual address debug, v2

I've removed the test from phys_to_nid and made a function from __phys_addr
only when the debugging is enabled (on x86_32).

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: tglx@linutronix.de
Cc: hpa@zytor.com
Cc: Mike Travis <travis@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: <x86@kernel.org>
Cc: linux-mm@kvack.org
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c       | 2 ++
 include/asm-x86/mmzone_64.h | 1 -
 include/asm-x86/page_32.h   | 4 ++++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a78ffef62a2b..9dd3cb905971 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -51,6 +51,7 @@ static inline int phys_addr_valid(unsigned long addr)
 	return 1;
 }
 
+#ifdef CONFIG_DEBUG_VIRTUAL
 unsigned long __phys_addr(unsigned long x)
 {
 	/* VMALLOC_* aren't constants; not available at the boot time */
@@ -59,6 +60,7 @@ unsigned long __phys_addr(unsigned long x)
 	return x - PAGE_OFFSET;
 }
 EXPORT_SYMBOL(__phys_addr);
+#endif
 
 #endif
 
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index facde3e5314f..5e3a6cbddb49 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -29,7 +29,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
 {
 	unsigned nid;
 	VIRTUAL_BUG_ON(!memnodemap);
-	VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize);
 	nid = memnodemap[addr >> memnode_shift];
 	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
 	return nid;
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 9159bfb9dcf9..71a2e424e584 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -65,7 +65,11 @@ typedef struct page *pgtable_t;
 
 #ifndef __ASSEMBLY__
 #define __phys_addr_const(x)	((x) - PAGE_OFFSET)
+#ifdef CONFIG_DEBUG_VIRTUAL
 extern unsigned long __phys_addr(unsigned long);
+#else
+#define __phys_addr(x)		((x) - PAGE_OFFSET)
+#endif
 #define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
 
 #ifdef CONFIG_FLATMEM

From 7aa413def76146f7b3784228556d9e4bc562eab3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 19 Jun 2008 13:28:11 +0200
Subject: [PATCH 003/142] x86, MM: virtual address debug, cleanups

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/mmdebug.h | 4 ++--
 mm/vmalloc.c            | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 860ed1a71bbe..8a5509877192 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -6,13 +6,13 @@
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
 #else
-#define VM_BUG_ON(cond) do { } while(0)
+#define VM_BUG_ON(cond) do { } while (0)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 #define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
 #else
-#define VIRTUAL_BUG_ON(cond) do { } while(0)
+#define VIRTUAL_BUG_ON(cond) do { } while (0)
 #endif
 
 #endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index dc41e9c8ca6f..830a5580c5d7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -180,8 +180,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 
-	/* XXX we might need to change this if we add VIRTUAL_BUG_ON for
-	 * architectures that do not vmalloc module space */
+	/*
+	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
+	 * architectures that do not vmalloc module space
+	 */
 	VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
 			!is_module_address(addr));
 

From 38ffbe66d59051fd9cfcfc8545f164700e2fa3bc Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 23 Jul 2008 14:21:18 -0700
Subject: [PATCH 004/142] x86/paravirt/xen: properly fill out the ldt ops

LTP testing showed that Xen does not properly implement
sys_modify_ldt().  This patch does the final little bits needed to
make the ldt work properly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ldt.c      |  9 ++++++++-
 arch/x86/kernel/paravirt.c |  4 ++++
 arch/x86/xen/enlighten.c   | 23 +++++++++++++++++++++++
 include/asm-x86/desc.h     | 10 +++++++++-
 include/asm-x86/paravirt.h | 13 +++++++++++++
 5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 3fee2aa50f3f..4895e0634d22 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
 	       (mincount - oldsize) * LDT_ENTRY_SIZE);
 
+	paravirt_alloc_ldt(newldt, mincount);
+
 #ifdef CONFIG_X86_64
 	/* CHECKME: Do we really need this ? */
 	wmb();
@@ -75,6 +77,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 #endif
 	}
 	if (oldsize) {
+		paravirt_free_ldt(oldldt, oldsize);
 		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(oldldt);
 		else
@@ -86,10 +89,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 {
 	int err = alloc_ldt(new, old->size, 0);
+	int i;
 
 	if (err < 0)
 		return err;
-	memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
+
+	for(i = 0; i < old->size; i++)
+		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
 	return 0;
 }
 
@@ -126,6 +132,7 @@ void destroy_context(struct mm_struct *mm)
 		if (mm == current->active_mm)
 			clear_LDT();
 #endif
+		paravirt_free_ldt(mm->context.ldt, mm->context.size);
 		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
 			vfree(mm->context.ldt);
 		else
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d52d798..d8f2277be5a0 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -348,6 +348,10 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.write_ldt_entry = native_write_ldt_entry,
 	.write_gdt_entry = native_write_gdt_entry,
 	.write_idt_entry = native_write_idt_entry,
+
+	.alloc_ldt = paravirt_nop,
+	.free_ldt = paravirt_nop,
+
 	.load_sp0 = native_load_sp0,
 
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3cbf08f..06219e60e9c8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -325,6 +325,26 @@ static unsigned long xen_store_tr(void)
 	return 0;
 }
 
+static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+	unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE);
+	void *v = ldt;
+	int i;
+
+	for(i = 0; i < pages; i += PAGE_SIZE)
+		make_lowmem_page_readonly(v + i);
+}
+
+static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+	unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE);
+	void *v = ldt;
+	int i;
+
+	for(i = 0; i < pages; i += PAGE_SIZE)
+		make_lowmem_page_readwrite(v + i);
+}
+
 static void xen_set_ldt(const void *addr, unsigned entries)
 {
 	struct mmuext_op *op;
@@ -1220,6 +1240,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.load_gs_index = xen_load_gs_index,
 #endif
 
+	.alloc_ldt = xen_alloc_ldt,
+	.free_ldt = xen_free_ldt,
+
 	.store_gdt = native_store_gdt,
 	.store_idt = native_store_idt,
 	.store_tr = xen_store_tr,
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a44c4dc70590..24a524f5e1a2 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -97,7 +97,15 @@ static inline int desc_empty(const void *ptr)
 	native_write_gdt_entry(dt, entry, desc, type)
 #define write_idt_entry(dt, entry, g)		\
 	native_write_idt_entry(dt, entry, g)
-#endif
+
+static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+}
+
+static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+}
+#endif	/* CONFIG_PARAVIRT */
 
 static inline void native_write_idt_entry(gate_desc *idt, int entry,
 					  const gate_desc *gate)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index fbbde93f12d6..db9b0647b346 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -124,6 +124,9 @@ struct pv_cpu_ops {
 				int entrynum, const void *desc, int size);
 	void (*write_idt_entry)(gate_desc *,
 				int entrynum, const gate_desc *gate);
+	void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
+	void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
+
 	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
 
 	void (*set_iopl_mask)(unsigned mask);
@@ -824,6 +827,16 @@ do {							\
 	(aux) = __aux;					\
 } while (0)
 
+static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+	PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
+}
+
+static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+	PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
+}
+
 static inline void load_TR_desc(void)
 {
 	PVOP_VCALL0(pv_cpu_ops.load_tr_desc);

From d5de8841355a48f7f634a04507185eaf1f9755e3 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 23 Jul 2008 13:28:58 -0700
Subject: [PATCH 005/142] x86: split spinlock implementations out into their
 own files

ftrace requires certain low-level code, like spinlocks and timestamps,
to be compiled without -pg in order to avoid infinite recursion.  This
patch splits out the core paravirt spinlocks and the Xen spinlocks
into separate files which can be compiled without -pg.

Also do xen/time.c while we're about it.  As a result, we can now use
ftrace within a Xen domain.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile             |   4 +-
 arch/x86/kernel/paravirt-spinlocks.c |  31 +++++
 arch/x86/kernel/paravirt.c           |  23 ----
 arch/x86/xen/Makefile                |   8 +-
 arch/x86/xen/smp.c                   | 167 ------------------------
 arch/x86/xen/spinlock.c              | 183 +++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h               |   3 +
 7 files changed, 226 insertions(+), 193 deletions(-)
 create mode 100644 arch/x86/kernel/paravirt-spinlocks.c
 create mode 100644 arch/x86/xen/spinlock.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..d679cb2c79b4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
-CFLAGS_REMOVE_paravirt.o = -pg
+CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 endif
 
 #
@@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
new file mode 100644
index 000000000000..38d7f7f1dbc9
--- /dev/null
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -0,0 +1,31 @@
+/*
+ * Split spinlock implementation out into its own file, so it can be
+ * compiled in a FTRACE-compatible way.
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+
+#include <asm/paravirt.h>
+
+struct pv_lock_ops pv_lock_ops = {
+#ifdef CONFIG_SMP
+	.spin_is_locked = __ticket_spin_is_locked,
+	.spin_is_contended = __ticket_spin_is_contended,
+
+	.spin_lock = __ticket_spin_lock,
+	.spin_trylock = __ticket_spin_trylock,
+	.spin_unlock = __ticket_spin_unlock,
+#endif
+};
+EXPORT_SYMBOL_GPL(pv_lock_ops);
+
+void __init paravirt_use_bytelocks(void)
+{
+#ifdef CONFIG_SMP
+	pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
+	pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
+	pv_lock_ops.spin_lock = __byte_spin_lock;
+	pv_lock_ops.spin_trylock = __byte_spin_trylock;
+	pv_lock_ops.spin_unlock = __byte_spin_unlock;
+#endif
+}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d52d798..0d71de9ff56d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 	return __get_cpu_var(paravirt_lazy_mode);
 }
 
-void __init paravirt_use_bytelocks(void)
-{
-#ifdef CONFIG_SMP
-	pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
-	pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
-	pv_lock_ops.spin_lock = __byte_spin_lock;
-	pv_lock_ops.spin_trylock = __byte_spin_trylock;
-	pv_lock_ops.spin_unlock = __byte_spin_unlock;
-#endif
-}
-
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
@@ -461,18 +450,6 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.set_fixmap = native_set_fixmap,
 };
 
-struct pv_lock_ops pv_lock_ops = {
-#ifdef CONFIG_SMP
-	.spin_is_locked = __ticket_spin_is_locked,
-	.spin_is_contended = __ticket_spin_is_contended,
-
-	.spin_lock = __ticket_spin_lock,
-	.spin_trylock = __ticket_spin_trylock,
-	.spin_unlock = __ticket_spin_unlock,
-#endif
-};
-EXPORT_SYMBOL_GPL(pv_lock_ops);
-
 EXPORT_SYMBOL_GPL(pv_time_ops);
 EXPORT_SYMBOL    (pv_cpu_ops);
 EXPORT_SYMBOL    (pv_mmu_ops);
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 59c1e539aed2..5bfee243cf9a 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,10 @@
+ifdef CONFIG_FTRACE
+# Do not profile debug and lowlevel utilities
+CFLAGS_REMOVE_spinlock.o = -pg
+CFLAGS_REMOVE_time.o = -pg
+endif
+
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
 			time.o xen-asm_$(BITS).o grant-table.o suspend.o
 
-obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_SMP)	+= smp.o spinlock.o
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d8faf79a0a1d..baca7f2fbd8a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,7 +15,6 @@
  * This does not handle HOTPLUG_CPU yet.
  */
 #include <linux/sched.h>
-#include <linux/kernel_stat.h>
 #include <linux/err.h>
 #include <linux/smp.h>
 
@@ -36,8 +35,6 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-static void __cpuinit xen_init_lock_cpu(int cpu);
-
 cpumask_t xen_cpu_initialized_map;
 
 static DEFINE_PER_CPU(int, resched_irq);
@@ -419,170 +416,6 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-struct xen_spinlock {
-	unsigned char lock;		/* 0 -> free; 1 -> locked */
-	unsigned short spinners;	/* count of waiting cpus */
-};
-
-static int xen_spin_is_locked(struct raw_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	return xl->lock != 0;
-}
-
-static int xen_spin_is_contended(struct raw_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	/* Not strictly true; this is only the count of contended
-	   lock-takers entering the slow path. */
-	return xl->spinners != 0;
-}
-
-static int xen_spin_trylock(struct raw_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	u8 old = 1;
-
-	asm("xchgb %b0,%1"
-	    : "+q" (old), "+m" (xl->lock) : : "memory");
-
-	return old == 0;
-}
-
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
-
-static inline void spinning_lock(struct xen_spinlock *xl)
-{
-	__get_cpu_var(lock_spinners) = xl;
-	wmb();			/* set lock of interest before count */
-	asm(LOCK_PREFIX " incw %0"
-	    : "+m" (xl->spinners) : : "memory");
-}
-
-static inline void unspinning_lock(struct xen_spinlock *xl)
-{
-	asm(LOCK_PREFIX " decw %0"
-	    : "+m" (xl->spinners) : : "memory");
-	wmb();			/* decrement count before clearing lock */
-	__get_cpu_var(lock_spinners) = NULL;
-}
-
-static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	int irq = __get_cpu_var(lock_kicker_irq);
-	int ret;
-
-	/* If kicker interrupts not initialized yet, just spin */
-	if (irq == -1)
-		return 0;
-
-	/* announce we're spinning */
-	spinning_lock(xl);
-
-	/* clear pending */
-	xen_clear_irq_pending(irq);
-
-	/* check again make sure it didn't become free while
-	   we weren't looking  */
-	ret = xen_spin_trylock(lock);
-	if (ret)
-		goto out;
-
-	/* block until irq becomes pending */
-	xen_poll_irq(irq);
-	kstat_this_cpu.irqs[irq]++;
-
-out:
-	unspinning_lock(xl);
-	return ret;
-}
-
-static void xen_spin_lock(struct raw_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	int timeout;
-	u8 oldval;
-
-	do {
-		timeout = 1 << 10;
-
-		asm("1: xchgb %1,%0\n"
-		    "   testb %1,%1\n"
-		    "   jz 3f\n"
-		    "2: rep;nop\n"
-		    "   cmpb $0,%0\n"
-		    "   je 1b\n"
-		    "   dec %2\n"
-		    "   jnz 2b\n"
-		    "3:\n"
-		    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
-		    : "1" (1)
-		    : "memory");
-
-	} while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
-}
-
-static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		/* XXX should mix up next cpu selection */
-		if (per_cpu(lock_spinners, cpu) == xl) {
-			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
-			break;
-		}
-	}
-}
-
-static void xen_spin_unlock(struct raw_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	smp_wmb();		/* make sure no writes get moved after unlock */
-	xl->lock = 0;		/* release lock */
-
-	/* make sure unlock happens before kick */
-	barrier();
-
-	if (unlikely(xl->spinners))
-		xen_spin_unlock_slow(xl);
-}
-
-static __cpuinit void xen_init_lock_cpu(int cpu)
-{
-	int irq;
-	const char *name;
-
-	name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
-	irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
-				     cpu,
-				     xen_reschedule_interrupt,
-				     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
-				     name,
-				     NULL);
-
-	if (irq >= 0) {
-		disable_irq(irq); /* make sure it's never delivered */
-		per_cpu(lock_kicker_irq, cpu) = irq;
-	}
-
-	printk("cpu %d spinlock event irq %d\n", cpu, irq);
-}
-
-static void __init xen_init_spinlocks(void)
-{
-	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
-	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
-	pv_lock_ops.spin_lock = xen_spin_lock;
-	pv_lock_ops.spin_trylock = xen_spin_trylock;
-	pv_lock_ops.spin_unlock = xen_spin_unlock;
-}
-
 static const struct smp_ops xen_smp_ops __initdata = {
 	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = xen_smp_prepare_cpus,
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
new file mode 100644
index 000000000000..8dc4d31da67f
--- /dev/null
+++ b/arch/x86/xen/spinlock.c
@@ -0,0 +1,183 @@
+/*
+ * Split spinlock implementation out into its own file, so it can be
+ * compiled in a FTRACE-compatible way.
+ */
+#include <linux/kernel_stat.h>
+#include <linux/spinlock.h>
+
+#include <asm/paravirt.h>
+
+#include <xen/interface/xen.h>
+#include <xen/events.h>
+
+#include "xen-ops.h"
+
+struct xen_spinlock {
+	unsigned char lock;		/* 0 -> free; 1 -> locked */
+	unsigned short spinners;	/* count of waiting cpus */
+};
+
+static int xen_spin_is_locked(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+	return xl->lock != 0;
+}
+
+static int xen_spin_is_contended(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+	/* Not strictly true; this is only the count of contended
+	   lock-takers entering the slow path. */
+	return xl->spinners != 0;
+}
+
+static int xen_spin_trylock(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+	u8 old = 1;
+
+	asm("xchgb %b0,%1"
+	    : "+q" (old), "+m" (xl->lock) : : "memory");
+
+	return old == 0;
+}
+
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
+
+static inline void spinning_lock(struct xen_spinlock *xl)
+{
+	__get_cpu_var(lock_spinners) = xl;
+	wmb();			/* set lock of interest before count */
+	asm(LOCK_PREFIX " incw %0"
+	    : "+m" (xl->spinners) : : "memory");
+}
+
+static inline void unspinning_lock(struct xen_spinlock *xl)
+{
+	asm(LOCK_PREFIX " decw %0"
+	    : "+m" (xl->spinners) : : "memory");
+	wmb();			/* decrement count before clearing lock */
+	__get_cpu_var(lock_spinners) = NULL;
+}
+
+static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+	int irq = __get_cpu_var(lock_kicker_irq);
+	int ret;
+
+	/* If kicker interrupts not initialized yet, just spin */
+	if (irq == -1)
+		return 0;
+
+	/* announce we're spinning */
+	spinning_lock(xl);
+
+	/* clear pending */
+	xen_clear_irq_pending(irq);
+
+	/* check again make sure it didn't become free while
+	   we weren't looking  */
+	ret = xen_spin_trylock(lock);
+	if (ret)
+		goto out;
+
+	/* block until irq becomes pending */
+	xen_poll_irq(irq);
+	kstat_this_cpu.irqs[irq]++;
+
+out:
+	unspinning_lock(xl);
+	return ret;
+}
+
+static void xen_spin_lock(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+	int timeout;
+	u8 oldval;
+
+	do {
+		timeout = 1 << 10;
+
+		asm("1: xchgb %1,%0\n"
+		    "   testb %1,%1\n"
+		    "   jz 3f\n"
+		    "2: rep;nop\n"
+		    "   cmpb $0,%0\n"
+		    "   je 1b\n"
+		    "   dec %2\n"
+		    "   jnz 2b\n"
+		    "3:\n"
+		    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
+		    : "1" (1)
+		    : "memory");
+
+	} while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
+}
+
+static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		/* XXX should mix up next cpu selection */
+		if (per_cpu(lock_spinners, cpu) == xl) {
+			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+			break;
+		}
+	}
+}
+
+static void xen_spin_unlock(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+	smp_wmb();		/* make sure no writes get moved after unlock */
+	xl->lock = 0;		/* release lock */
+
+	/* make sure unlock happens before kick */
+	barrier();
+
+	if (unlikely(xl->spinners))
+		xen_spin_unlock_slow(xl);
+}
+
+static irqreturn_t dummy_handler(int irq, void *dev_id)
+{
+	BUG();
+	return IRQ_HANDLED;
+}
+
+void __cpuinit xen_init_lock_cpu(int cpu)
+{
+	int irq;
+	const char *name;
+
+	name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
+	irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
+				     cpu,
+				     dummy_handler,
+				     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+				     name,
+				     NULL);
+
+	if (irq >= 0) {
+		disable_irq(irq); /* make sure it's never delivered */
+		per_cpu(lock_kicker_irq, cpu) = irq;
+	}
+
+	printk("cpu %d spinlock event irq %d\n", cpu, irq);
+}
+
+void __init xen_init_spinlocks(void)
+{
+	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
+	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
+	pv_lock_ops.spin_lock = xen_spin_lock;
+	pv_lock_ops.spin_trylock = xen_spin_trylock;
+	pv_lock_ops.spin_unlock = xen_spin_unlock;
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index dd3c23152a2e..8847fb34f17e 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -50,6 +50,9 @@ void __init xen_setup_vcpu_info_placement(void);
 #ifdef CONFIG_SMP
 void xen_smp_init(void);
 
+void __init xen_init_spinlocks(void);
+__cpuinit void xen_init_lock_cpu(int cpu);
+
 extern cpumask_t xen_cpu_initialized_map;
 #else
 static inline void xen_smp_init(void) {}

From 0af36739af81f152cc24a0fdfa0754ef657afe3d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 24 Jul 2008 17:27:57 -0700
Subject: [PATCH 006/142] usb: move ehci reg def

prepare x86: usb debug port early console

move ehci struct def to linux/usrb/ehci_def.h from host/ehci.h

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Arjan van de Ven" <arjan@infradead.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Greg KH" <greg@kroah.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/usb/host/ehci.h      | 138 +-----------------------------
 include/linux/usb/ehci_def.h | 160 +++++++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+), 137 deletions(-)
 create mode 100644 include/linux/usb/ehci_def.h

diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 5799298364fb..b697a13364ec 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -210,143 +210,7 @@ timer_action (struct ehci_hcd *ehci, enum ehci_timer_action action)
 
 /*-------------------------------------------------------------------------*/
 
-/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */
-
-/* Section 2.2 Host Controller Capability Registers */
-struct ehci_caps {
-	/* these fields are specified as 8 and 16 bit registers,
-	 * but some hosts can't perform 8 or 16 bit PCI accesses.
-	 */
-	u32		hc_capbase;
-#define HC_LENGTH(p)		(((p)>>00)&0x00ff)	/* bits 7:0 */
-#define HC_VERSION(p)		(((p)>>16)&0xffff)	/* bits 31:16 */
-	u32		hcs_params;     /* HCSPARAMS - offset 0x4 */
-#define HCS_DEBUG_PORT(p)	(((p)>>20)&0xf)	/* bits 23:20, debug port? */
-#define HCS_INDICATOR(p)	((p)&(1 << 16))	/* true: has port indicators */
-#define HCS_N_CC(p)		(((p)>>12)&0xf)	/* bits 15:12, #companion HCs */
-#define HCS_N_PCC(p)		(((p)>>8)&0xf)	/* bits 11:8, ports per CC */
-#define HCS_PORTROUTED(p)	((p)&(1 << 7))	/* true: port routing */
-#define HCS_PPC(p)		((p)&(1 << 4))	/* true: port power control */
-#define HCS_N_PORTS(p)		(((p)>>0)&0xf)	/* bits 3:0, ports on HC */
-
-	u32		hcc_params;      /* HCCPARAMS - offset 0x8 */
-#define HCC_EXT_CAPS(p)		(((p)>>8)&0xff)	/* for pci extended caps */
-#define HCC_ISOC_CACHE(p)       ((p)&(1 << 7))  /* true: can cache isoc frame */
-#define HCC_ISOC_THRES(p)       (((p)>>4)&0x7)  /* bits 6:4, uframes cached */
-#define HCC_CANPARK(p)		((p)&(1 << 2))  /* true: can park on async qh */
-#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1))  /* true: periodic_size changes*/
-#define HCC_64BIT_ADDR(p)       ((p)&(1))       /* true: can use 64-bit addr */
-	u8		portroute [8];	 /* nibbles for routing - offset 0xC */
-} __attribute__ ((packed));
-
-
-/* Section 2.3 Host Controller Operational Registers */
-struct ehci_regs {
-
-	/* USBCMD: offset 0x00 */
-	u32		command;
-/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */
-#define CMD_PARK	(1<<11)		/* enable "park" on async qh */
-#define CMD_PARK_CNT(c)	(((c)>>8)&3)	/* how many transfers to park for */
-#define CMD_LRESET	(1<<7)		/* partial reset (no ports, etc) */
-#define CMD_IAAD	(1<<6)		/* "doorbell" interrupt async advance */
-#define CMD_ASE		(1<<5)		/* async schedule enable */
-#define CMD_PSE		(1<<4)		/* periodic schedule enable */
-/* 3:2 is periodic frame list size */
-#define CMD_RESET	(1<<1)		/* reset HC not bus */
-#define CMD_RUN		(1<<0)		/* start/stop HC */
-
-	/* USBSTS: offset 0x04 */
-	u32		status;
-#define STS_ASS		(1<<15)		/* Async Schedule Status */
-#define STS_PSS		(1<<14)		/* Periodic Schedule Status */
-#define STS_RECL	(1<<13)		/* Reclamation */
-#define STS_HALT	(1<<12)		/* Not running (any reason) */
-/* some bits reserved */
-	/* these STS_* flags are also intr_enable bits (USBINTR) */
-#define STS_IAA		(1<<5)		/* Interrupted on async advance */
-#define STS_FATAL	(1<<4)		/* such as some PCI access errors */
-#define STS_FLR		(1<<3)		/* frame list rolled over */
-#define STS_PCD		(1<<2)		/* port change detect */
-#define STS_ERR		(1<<1)		/* "error" completion (overflow, ...) */
-#define STS_INT		(1<<0)		/* "normal" completion (short, ...) */
-
-	/* USBINTR: offset 0x08 */
-	u32		intr_enable;
-
-	/* FRINDEX: offset 0x0C */
-	u32		frame_index;	/* current microframe number */
-	/* CTRLDSSEGMENT: offset 0x10 */
-	u32		segment;	/* address bits 63:32 if needed */
-	/* PERIODICLISTBASE: offset 0x14 */
-	u32		frame_list;	/* points to periodic list */
-	/* ASYNCLISTADDR: offset 0x18 */
-	u32		async_next;	/* address of next async queue head */
-
-	u32		reserved [9];
-
-	/* CONFIGFLAG: offset 0x40 */
-	u32		configured_flag;
-#define FLAG_CF		(1<<0)		/* true: we'll support "high speed" */
-
-	/* PORTSC: offset 0x44 */
-	u32		port_status [0];	/* up to N_PORTS */
-/* 31:23 reserved */
-#define PORT_WKOC_E	(1<<22)		/* wake on overcurrent (enable) */
-#define PORT_WKDISC_E	(1<<21)		/* wake on disconnect (enable) */
-#define PORT_WKCONN_E	(1<<20)		/* wake on connect (enable) */
-/* 19:16 for port testing */
-#define PORT_LED_OFF	(0<<14)
-#define PORT_LED_AMBER	(1<<14)
-#define PORT_LED_GREEN	(2<<14)
-#define PORT_LED_MASK	(3<<14)
-#define PORT_OWNER	(1<<13)		/* true: companion hc owns this port */
-#define PORT_POWER	(1<<12)		/* true: has power (see PPC) */
-#define PORT_USB11(x) (((x)&(3<<10))==(1<<10))	/* USB 1.1 device */
-/* 11:10 for detecting lowspeed devices (reset vs release ownership) */
-/* 9 reserved */
-#define PORT_RESET	(1<<8)		/* reset port */
-#define PORT_SUSPEND	(1<<7)		/* suspend port */
-#define PORT_RESUME	(1<<6)		/* resume it */
-#define PORT_OCC	(1<<5)		/* over current change */
-#define PORT_OC		(1<<4)		/* over current active */
-#define PORT_PEC	(1<<3)		/* port enable change */
-#define PORT_PE		(1<<2)		/* port enable */
-#define PORT_CSC	(1<<1)		/* connect status change */
-#define PORT_CONNECT	(1<<0)		/* device connected */
-#define PORT_RWC_BITS   (PORT_CSC | PORT_PEC | PORT_OCC)
-} __attribute__ ((packed));
-
-#define USBMODE		0x68		/* USB Device mode */
-#define USBMODE_SDIS	(1<<3)		/* Stream disable */
-#define USBMODE_BE	(1<<2)		/* BE/LE endianness select */
-#define USBMODE_CM_HC	(3<<0)		/* host controller mode */
-#define USBMODE_CM_IDLE	(0<<0)		/* idle state */
-
-/* Appendix C, Debug port ... intended for use with special "debug devices"
- * that can help if there's no serial console.  (nonstandard enumeration.)
- */
-struct ehci_dbg_port {
-	u32	control;
-#define DBGP_OWNER	(1<<30)
-#define DBGP_ENABLED	(1<<28)
-#define DBGP_DONE	(1<<16)
-#define DBGP_INUSE	(1<<10)
-#define DBGP_ERRCODE(x)	(((x)>>7)&0x07)
-#	define DBGP_ERR_BAD	1
-#	define DBGP_ERR_SIGNAL	2
-#define DBGP_ERROR	(1<<6)
-#define DBGP_GO		(1<<5)
-#define DBGP_OUT	(1<<4)
-#define DBGP_LEN(x)	(((x)>>0)&0x0f)
-	u32	pids;
-#define DBGP_PID_GET(x)		(((x)>>16)&0xff)
-#define DBGP_PID_SET(data,tok)	(((data)<<8)|(tok))
-	u32	data03;
-	u32	data47;
-	u32	address;
-#define DBGP_EPADDR(dev,ep)	(((dev)<<8)|(ep))
-} __attribute__ ((packed));
+#include <linux/usb/ehci_def.h>
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
new file mode 100644
index 000000000000..5b88e36c9103
--- /dev/null
+++ b/include/linux/usb/ehci_def.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2001-2002 by David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_USB_EHCI_DEF_H
+#define __LINUX_USB_EHCI_DEF_H
+
+/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */
+
+/* Section 2.2 Host Controller Capability Registers */
+struct ehci_caps {
+	/* these fields are specified as 8 and 16 bit registers,
+	 * but some hosts can't perform 8 or 16 bit PCI accesses.
+	 */
+	u32		hc_capbase;
+#define HC_LENGTH(p)		(((p)>>00)&0x00ff)	/* bits 7:0 */
+#define HC_VERSION(p)		(((p)>>16)&0xffff)	/* bits 31:16 */
+	u32		hcs_params;     /* HCSPARAMS - offset 0x4 */
+#define HCS_DEBUG_PORT(p)	(((p)>>20)&0xf)	/* bits 23:20, debug port? */
+#define HCS_INDICATOR(p)	((p)&(1 << 16))	/* true: has port indicators */
+#define HCS_N_CC(p)		(((p)>>12)&0xf)	/* bits 15:12, #companion HCs */
+#define HCS_N_PCC(p)		(((p)>>8)&0xf)	/* bits 11:8, ports per CC */
+#define HCS_PORTROUTED(p)	((p)&(1 << 7))	/* true: port routing */
+#define HCS_PPC(p)		((p)&(1 << 4))	/* true: port power control */
+#define HCS_N_PORTS(p)		(((p)>>0)&0xf)	/* bits 3:0, ports on HC */
+
+	u32		hcc_params;      /* HCCPARAMS - offset 0x8 */
+#define HCC_EXT_CAPS(p)		(((p)>>8)&0xff)	/* for pci extended caps */
+#define HCC_ISOC_CACHE(p)       ((p)&(1 << 7))  /* true: can cache isoc frame */
+#define HCC_ISOC_THRES(p)       (((p)>>4)&0x7)  /* bits 6:4, uframes cached */
+#define HCC_CANPARK(p)		((p)&(1 << 2))  /* true: can park on async qh */
+#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1))  /* true: periodic_size changes*/
+#define HCC_64BIT_ADDR(p)       ((p)&(1))       /* true: can use 64-bit addr */
+	u8		portroute [8];	 /* nibbles for routing - offset 0xC */
+} __attribute__ ((packed));
+
+
+/* Section 2.3 Host Controller Operational Registers */
+struct ehci_regs {
+
+	/* USBCMD: offset 0x00 */
+	u32		command;
+/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */
+#define CMD_PARK	(1<<11)		/* enable "park" on async qh */
+#define CMD_PARK_CNT(c)	(((c)>>8)&3)	/* how many transfers to park for */
+#define CMD_LRESET	(1<<7)		/* partial reset (no ports, etc) */
+#define CMD_IAAD	(1<<6)		/* "doorbell" interrupt async advance */
+#define CMD_ASE		(1<<5)		/* async schedule enable */
+#define CMD_PSE		(1<<4)		/* periodic schedule enable */
+/* 3:2 is periodic frame list size */
+#define CMD_RESET	(1<<1)		/* reset HC not bus */
+#define CMD_RUN		(1<<0)		/* start/stop HC */
+
+	/* USBSTS: offset 0x04 */
+	u32		status;
+#define STS_ASS		(1<<15)		/* Async Schedule Status */
+#define STS_PSS		(1<<14)		/* Periodic Schedule Status */
+#define STS_RECL	(1<<13)		/* Reclamation */
+#define STS_HALT	(1<<12)		/* Not running (any reason) */
+/* some bits reserved */
+	/* these STS_* flags are also intr_enable bits (USBINTR) */
+#define STS_IAA		(1<<5)		/* Interrupted on async advance */
+#define STS_FATAL	(1<<4)		/* such as some PCI access errors */
+#define STS_FLR		(1<<3)		/* frame list rolled over */
+#define STS_PCD		(1<<2)		/* port change detect */
+#define STS_ERR		(1<<1)		/* "error" completion (overflow, ...) */
+#define STS_INT		(1<<0)		/* "normal" completion (short, ...) */
+
+	/* USBINTR: offset 0x08 */
+	u32		intr_enable;
+
+	/* FRINDEX: offset 0x0C */
+	u32		frame_index;	/* current microframe number */
+	/* CTRLDSSEGMENT: offset 0x10 */
+	u32		segment;	/* address bits 63:32 if needed */
+	/* PERIODICLISTBASE: offset 0x14 */
+	u32		frame_list;	/* points to periodic list */
+	/* ASYNCLISTADDR: offset 0x18 */
+	u32		async_next;	/* address of next async queue head */
+
+	u32		reserved [9];
+
+	/* CONFIGFLAG: offset 0x40 */
+	u32		configured_flag;
+#define FLAG_CF		(1<<0)		/* true: we'll support "high speed" */
+
+	/* PORTSC: offset 0x44 */
+	u32		port_status [0];	/* up to N_PORTS */
+/* 31:23 reserved */
+#define PORT_WKOC_E	(1<<22)		/* wake on overcurrent (enable) */
+#define PORT_WKDISC_E	(1<<21)		/* wake on disconnect (enable) */
+#define PORT_WKCONN_E	(1<<20)		/* wake on connect (enable) */
+/* 19:16 for port testing */
+#define PORT_LED_OFF	(0<<14)
+#define PORT_LED_AMBER	(1<<14)
+#define PORT_LED_GREEN	(2<<14)
+#define PORT_LED_MASK	(3<<14)
+#define PORT_OWNER	(1<<13)		/* true: companion hc owns this port */
+#define PORT_POWER	(1<<12)		/* true: has power (see PPC) */
+#define PORT_USB11(x) (((x)&(3<<10)) == (1<<10))	/* USB 1.1 device */
+/* 11:10 for detecting lowspeed devices (reset vs release ownership) */
+/* 9 reserved */
+#define PORT_RESET	(1<<8)		/* reset port */
+#define PORT_SUSPEND	(1<<7)		/* suspend port */
+#define PORT_RESUME	(1<<6)		/* resume it */
+#define PORT_OCC	(1<<5)		/* over current change */
+#define PORT_OC		(1<<4)		/* over current active */
+#define PORT_PEC	(1<<3)		/* port enable change */
+#define PORT_PE		(1<<2)		/* port enable */
+#define PORT_CSC	(1<<1)		/* connect status change */
+#define PORT_CONNECT	(1<<0)		/* device connected */
+#define PORT_RWC_BITS   (PORT_CSC | PORT_PEC | PORT_OCC)
+} __attribute__ ((packed));
+
+#define USBMODE		0x68		/* USB Device mode */
+#define USBMODE_SDIS	(1<<3)		/* Stream disable */
+#define USBMODE_BE	(1<<2)		/* BE/LE endianness select */
+#define USBMODE_CM_HC	(3<<0)		/* host controller mode */
+#define USBMODE_CM_IDLE	(0<<0)		/* idle state */
+
+/* Appendix C, Debug port ... intended for use with special "debug devices"
+ * that can help if there's no serial console.  (nonstandard enumeration.)
+ */
+struct ehci_dbg_port {
+	u32	control;
+#define DBGP_OWNER	(1<<30)
+#define DBGP_ENABLED	(1<<28)
+#define DBGP_DONE	(1<<16)
+#define DBGP_INUSE	(1<<10)
+#define DBGP_ERRCODE(x)	(((x)>>7)&0x07)
+#	define DBGP_ERR_BAD	1
+#	define DBGP_ERR_SIGNAL	2
+#define DBGP_ERROR	(1<<6)
+#define DBGP_GO		(1<<5)
+#define DBGP_OUT	(1<<4)
+#define DBGP_LEN(x)	(((x)>>0)&0x0f)
+	u32	pids;
+#define DBGP_PID_GET(x)		(((x)>>16)&0xff)
+#define DBGP_PID_SET(data, tok)	(((data)<<8)|(tok))
+	u32	data03;
+	u32	data47;
+	u32	address;
+#define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
+} __attribute__ ((packed));
+
+#endif /* __LINUX_USB_EHCI_DEF_H */

From 5c05917e7fe313a187ad6ebb94c1c6cf42862a0b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 24 Jul 2008 17:29:40 -0700
Subject: [PATCH 007/142] x86: usb debug port early console, v4

based on work from Eric, and add some timeout so don't dead loop when debug
device is not installed

v2: fix checkpatch warning
v3: move ehci struct def to linux/usrb/ehci_def.h from host/ehci.h
    also add CONFIG_EARLY_PRINTK_DBGP to disable it by default
v4: address comments from Ingo, seperate ehci reg def moving to another patch
    also add auto detect port that connect to debug device for Nvidia
    southbridge

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Arjan van de Ven" <arjan@infradead.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Greg KH" <greg@kroah.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |   3 +-
 arch/x86/Kconfig.debug              |  13 +
 arch/x86/kernel/early_printk.c      | 762 +++++++++++++++++++++++++++-
 3 files changed, 774 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e7bea3e85304..92ddd4afe174 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -657,11 +657,12 @@ and is between 256 and 4096 characters. It is defined in the file
 	earlyprintk=	[X86-32,X86-64,SH,BLACKFIN]
 			earlyprintk=vga
 			earlyprintk=serial[,ttySn[,baudrate]]
+			earlyprintk=dbgp
 
 			Append ",keep" to not disable it when the real console
 			takes over.
 
-			Only vga or serial at a time, not both.
+			Only vga or serial or usb debug port at a time.
 
 			Currently only ttyS0 and ttyS1 are supported.
 
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 092f019e033a..93422d2ecf61 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,6 +43,19 @@ config EARLY_PRINTK
 	  with klogd/syslogd or the X server. You should normally N here,
 	  unless you want to debug such a crash.
 
+config EARLY_PRINTK_DBGP
+	bool "Early printk via EHCI debug port"
+	default n
+	depends on EARLY_PRINTK
+	help
+	  Write kernel log output directly into the EHCI debug port.
+
+	  This is useful for kernel debugging when your machine crashes very
+	  early before the console code is initialized. For normal operation
+	  it is not recommended because it looks ugly and doesn't cooperate
+	  with klogd/syslogd or the X server. You should normally N here,
+	  unless you want to debug such a crash. You need usb debug device.
+
 config DEBUG_STACKOVERFLOW
 	bool "Check for stack overflows"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index ff9e7350da54..28155acf706e 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -3,11 +3,19 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/screen_info.h>
+#include <linux/usb/ch9.h>
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/errno.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/fcntl.h>
 #include <asm/setup.h>
 #include <xen/hvc-console.h>
+#include <asm/pci-direct.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <linux/usb/ehci_def.h>
 
 /* Simple VGA output */
 #define VGABASE		(__ISA_IO_base + 0xb8000)
@@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8;  /* ttyS0 */
 static int early_serial_putc(unsigned char ch)
 {
 	unsigned timeout = 0xffff;
+
 	while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
 		cpu_relax();
 	outb(ch, early_serial_base + TXR);
@@ -151,6 +160,721 @@ static struct console early_serial_console = {
 	.index =	-1,
 };
 
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+
+static struct ehci_caps __iomem *ehci_caps;
+static struct ehci_regs __iomem *ehci_regs;
+static struct ehci_dbg_port __iomem *ehci_debug;
+static unsigned int dbgp_endpoint_out;
+
+struct ehci_dev {
+	u32 bus;
+	u32 slot;
+	u32 func;
+};
+
+static struct ehci_dev ehci_dev;
+
+#define USB_DEBUG_DEVNUM 127
+
+#define DBGP_DATA_TOGGLE	0x8800
+
+static inline u32 dbgp_pid_update(u32 x, u32 tok)
+{
+	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
+}
+
+static inline u32 dbgp_len_update(u32 x, u32 len)
+{
+	return (x & ~0x0f) | (len & 0x0f);
+}
+
+/*
+ * USB Packet IDs (PIDs)
+ */
+
+/* token */
+#define USB_PID_OUT		0xe1
+#define USB_PID_IN		0x69
+#define USB_PID_SOF		0xa5
+#define USB_PID_SETUP		0x2d
+/* handshake */
+#define USB_PID_ACK		0xd2
+#define USB_PID_NAK		0x5a
+#define USB_PID_STALL		0x1e
+#define USB_PID_NYET		0x96
+/* data */
+#define USB_PID_DATA0		0xc3
+#define USB_PID_DATA1		0x4b
+#define USB_PID_DATA2		0x87
+#define USB_PID_MDATA		0x0f
+/* Special */
+#define USB_PID_PREAMBLE	0x3c
+#define USB_PID_ERR		0x3c
+#define USB_PID_SPLIT		0x78
+#define USB_PID_PING		0xb4
+#define USB_PID_UNDEF_0		0xf0
+
+#define USB_PID_DATA_TOGGLE	0x88
+#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
+
+#define PCI_CAP_ID_EHCI_DEBUG	0xa
+
+#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
+#define HUB_SHORT_RESET_TIME	10
+#define HUB_LONG_RESET_TIME	200
+#define HUB_RESET_TIMEOUT	500
+
+#define DBGP_MAX_PACKET		8
+
+static int dbgp_wait_until_complete(void)
+{
+	u32 ctrl;
+	int loop = 0x100000;
+
+	do {
+		ctrl = readl(&ehci_debug->control);
+		/* Stop when the transaction is finished */
+		if (ctrl & DBGP_DONE)
+			break;
+	} while (--loop > 0);
+
+	if (!loop)
+		return -1;
+
+	/*
+	 * Now that we have observed the completed transaction,
+	 * clear the done bit.
+	 */
+	writel(ctrl | DBGP_DONE, &ehci_debug->control);
+	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
+}
+
+static void dbgp_mdelay(int ms)
+{
+	int i;
+
+	while (ms--) {
+		for (i = 0; i < 1000; i++)
+			outb(0x1, 0x80);
+	}
+}
+
+static void dbgp_breath(void)
+{
+	/* Sleep to give the debug port a chance to breathe */
+}
+
+static int dbgp_wait_until_done(unsigned ctrl)
+{
+	u32 pids, lpid;
+	int ret;
+	int loop = 3;
+
+retry:
+	writel(ctrl | DBGP_GO, &ehci_debug->control);
+	ret = dbgp_wait_until_complete();
+	pids = readl(&ehci_debug->pids);
+	lpid = DBGP_PID_GET(pids);
+
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * If the port is getting full or it has dropped data
+	 * start pacing ourselves, not necessary but it's friendly.
+	 */
+	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
+		dbgp_breath();
+
+	/* If I get a NACK reissue the transmission */
+	if (lpid == USB_PID_NAK) {
+		if (--loop > 0)
+			goto retry;
+	}
+
+	return ret;
+}
+
+static void dbgp_set_data(const void *buf, int size)
+{
+	const unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = hi = 0;
+	for (i = 0; i < 4 && i < size; i++)
+		lo |= bytes[i] << (8*i);
+	for (; i < 8 && i < size; i++)
+		hi |= bytes[i] << (8*(i - 4));
+	writel(lo, &ehci_debug->data03);
+	writel(hi, &ehci_debug->data47);
+}
+
+static void dbgp_get_data(void *buf, int size)
+{
+	unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = readl(&ehci_debug->data03);
+	hi = readl(&ehci_debug->data47);
+	for (i = 0; i < 4 && i < size; i++)
+		bytes[i] = (lo >> (8*i)) & 0xff;
+	for (; i < 8 && i < size; i++)
+		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
+}
+
+static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
+			 const char *bytes, int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_OUT);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	dbgp_set_data(bytes, size);
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	return ret;
+}
+
+static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+				 int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_IN);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl &= ~DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	if (size > ret)
+		size = ret;
+	dbgp_get_data(data, size);
+	return ret;
+}
+
+static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
+	int value, int index, void *data, int size)
+{
+	u32 pids, addr, ctrl;
+	struct usb_ctrlrequest req;
+	int read;
+	int ret;
+
+	read = (requesttype & USB_DIR_IN) != 0;
+	if (size > (read ? DBGP_MAX_PACKET:0))
+		return -1;
+
+	/* Compute the control message */
+	req.bRequestType = requesttype;
+	req.bRequest = request;
+	req.wValue = value;
+	req.wIndex = index;
+	req.wLength = size;
+
+	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
+	addr = DBGP_EPADDR(devnum, 0);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, sizeof(req));
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	/* Send the setup message */
+	dbgp_set_data(&req, sizeof(req));
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	/* Read the result */
+	return dbgp_bulk_read(devnum, 0, data, size);
+}
+
+
+/* Find a PCI capability */
+static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
+{
+	u8 pos;
+	int bytes;
+
+	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+		PCI_STATUS_CAP_LIST))
+		return 0;
+
+	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
+		u8 id;
+
+		pos &= ~3;
+		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+
+		pos = read_pci_config_byte(num, slot, func,
+						 pos+PCI_CAP_LIST_NEXT);
+	}
+	return 0;
+}
+
+static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
+{
+	u32 class;
+
+	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
+	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
+		return 0;
+
+	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
+}
+
+static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
+{
+	u32 bus, slot, func;
+
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
+				unsigned cap;
+
+				cap = __find_dbgp(bus, slot, func);
+
+				if (!cap)
+					continue;
+				if (ehci_num-- != 0)
+					continue;
+				*rbus = bus;
+				*rslot = slot;
+				*rfunc = func;
+				return cap;
+			}
+		}
+	}
+	return 0;
+}
+
+static int ehci_reset_port(int port)
+{
+	u32 portsc;
+	u32 delay_time, delay;
+	int loop;
+
+	/* Reset the usb debug port */
+	portsc = readl(&ehci_regs->port_status[port - 1]);
+	portsc &= ~PORT_PE;
+	portsc |= PORT_RESET;
+	writel(portsc, &ehci_regs->port_status[port - 1]);
+
+	delay = HUB_ROOT_RESET_TIME;
+	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
+	     delay_time += delay) {
+		dbgp_mdelay(delay);
+
+		portsc = readl(&ehci_regs->port_status[port - 1]);
+		if (portsc & PORT_RESET) {
+			/* force reset to complete */
+			loop = 2;
+			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
+				&ehci_regs->port_status[port - 1]);
+			do {
+				portsc = readl(&ehci_regs->port_status[port-1]);
+			} while ((portsc & PORT_RESET) && (--loop > 0));
+		}
+
+		/* Device went away? */
+		if (!(portsc & PORT_CONNECT))
+			return -ENOTCONN;
+
+		/* bomb out completely if something weird happend */
+		if ((portsc & PORT_CSC))
+			return -EINVAL;
+
+		/* If we've finished resetting, then break out of the loop */
+		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
+			return 0;
+	}
+	return -EBUSY;
+}
+
+static int ehci_wait_for_port(int port)
+{
+	u32 status;
+	int ret, reps;
+
+	for (reps = 0; reps < 3; reps++) {
+		dbgp_mdelay(100);
+		status = readl(&ehci_regs->status);
+		if (status & STS_PCD) {
+			ret = ehci_reset_port(port);
+			if (ret == 0)
+				return 0;
+		}
+	}
+	return -ENOTCONN;
+}
+
+#ifdef DBGP_DEBUG
+# define dbgp_printk early_printk
+#else
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
+typedef void (*set_debug_port_t)(int port);
+
+static void default_set_debug_port(int port)
+{
+}
+
+static set_debug_port_t set_debug_port = default_set_debug_port;
+
+static void nvidia_set_debug_port(int port)
+{
+	u32 dword;
+	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+				 0x74);
+	dword &= ~(0x0f<<12);
+	dword |= ((port & 0x0f)<<12);
+	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
+				 dword);
+	dbgp_printk("set debug port to %d\n", port);
+}
+
+static void __init detect_set_debug_port(void)
+{
+	u32 vendorid;
+
+	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+		 0x00);
+
+	if ((vendorid & 0xffff) == 0x10de) {
+		dbgp_printk("using nvidia set_debug_port\n");
+		set_debug_port = nvidia_set_debug_port;
+	}
+}
+
+static int __init ehci_setup(void)
+{
+	struct usb_debug_descriptor dbgp_desc;
+	u32 cmd, ctrl, status, portsc, hcs_params;
+	u32 debug_port, new_debug_port = 0, n_ports;
+	u32  devnum;
+	int ret, i;
+	int loop;
+	int port_map_tried;
+	int playtimes = 3;
+
+try_next_time:
+	port_map_tried = 0;
+
+try_next_port:
+
+	hcs_params = readl(&ehci_caps->hcs_params);
+	debug_port = HCS_DEBUG_PORT(hcs_params);
+	n_ports    = HCS_N_PORTS(hcs_params);
+
+	dbgp_printk("debug_port: %d\n", debug_port);
+	dbgp_printk("n_ports:    %d\n", n_ports);
+
+	for (i = 1; i <= n_ports; i++) {
+		portsc = readl(&ehci_regs->port_status[i-1]);
+		dbgp_printk("portstatus%d: %08x\n", i, portsc);
+	}
+
+	if (port_map_tried && (new_debug_port != debug_port)) {
+		if (--playtimes) {
+			set_debug_port(new_debug_port);
+			goto try_next_time;
+		}
+		return -1;
+	}
+
+	loop = 10;
+	/* Reset the EHCI controller */
+	cmd = readl(&ehci_regs->command);
+	cmd |= CMD_RESET;
+	writel(cmd, &ehci_regs->command);
+	do {
+		cmd = readl(&ehci_regs->command);
+	} while ((cmd & CMD_RESET) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("can not reset ehci\n");
+		return -1;
+	}
+	dbgp_printk("ehci reset done\n");
+
+	/* Claim ownership, but do not enable yet */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_OWNER;
+	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+	writel(ctrl, &ehci_debug->control);
+
+	/* Start the ehci running */
+	cmd = readl(&ehci_regs->command);
+	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+	cmd |= CMD_RUN;
+	writel(cmd, &ehci_regs->command);
+
+	/* Ensure everything is routed to the EHCI */
+	writel(FLAG_CF, &ehci_regs->configured_flag);
+
+	/* Wait until the controller is no longer halted */
+	loop = 10;
+	do {
+		status = readl(&ehci_regs->status);
+	} while ((status & STS_HALT) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("ehci can be started\n");
+		return -1;
+	}
+	dbgp_printk("ehci started\n");
+
+	/* Wait for a device to show up in the debug port */
+	ret = ehci_wait_for_port(debug_port);
+	if (ret < 0) {
+		dbgp_printk("No device found in debug port\n");
+		goto next_debug_port;
+	}
+	dbgp_printk("ehci wait for port done\n");
+
+	/* Enable the debug port */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_CLAIM;
+	writel(ctrl, &ehci_debug->control);
+	ctrl = readl(&ehci_debug->control);
+	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+		dbgp_printk("No device in debug port\n");
+		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+		goto err;
+	}
+	dbgp_printk("debug ported enabled\n");
+
+	/* Completely transfer the debug device to the debug controller */
+	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
+	portsc &= ~PORT_PE;
+	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
+
+	dbgp_mdelay(100);
+
+	/* Find the debug device and make it device number 127 */
+	for (devnum = 0; devnum <= 127; devnum++) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+			&dbgp_desc, sizeof(dbgp_desc));
+		if (ret > 0)
+			break;
+	}
+	if (devnum > 127) {
+		dbgp_printk("Could not find attached debug device\n");
+		goto err;
+	}
+	if (ret < 0) {
+		dbgp_printk("Attached device is not a debug device\n");
+		goto err;
+	}
+	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+	/* Move the device to 127 if it isn't already there */
+	if (devnum != USB_DEBUG_DEVNUM) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+		if (ret < 0) {
+			dbgp_printk("Could not move attached device to %d\n",
+				USB_DEBUG_DEVNUM);
+			goto err;
+		}
+		devnum = USB_DEBUG_DEVNUM;
+		dbgp_printk("debug device renamed to 127\n");
+	}
+
+	/* Enable the debug interface */
+	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+	if (ret < 0) {
+		dbgp_printk(" Could not enable the debug device\n");
+		goto err;
+	}
+	dbgp_printk("debug interface enabled\n");
+
+	/* Perform a small write to get the even/odd data state in sync
+	 */
+	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+	if (ret < 0) {
+		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+		goto err;
+	}
+	dbgp_printk("small write doned\n");
+
+	return 0;
+err:
+	/* Things didn't work so remove my claim */
+	ctrl = readl(&ehci_debug->control);
+	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+	writel(ctrl, &ehci_debug->control);
+	return -1;
+
+next_debug_port:
+	port_map_tried |= (1<<(debug_port - 1));
+	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
+	if (port_map_tried != ((1<<n_ports) - 1)) {
+		set_debug_port(new_debug_port);
+		goto try_next_port;
+	}
+	if (--playtimes) {
+		set_debug_port(new_debug_port);
+		goto try_next_time;
+	}
+
+	return -1;
+}
+
+static int __init early_dbgp_init(char *s)
+{
+	u32 debug_port, bar, offset;
+	u32 bus, slot, func, cap;
+	void __iomem *ehci_bar;
+	u32 dbgp_num;
+	u32 bar_val;
+	char *e;
+	int ret;
+	u8 byte;
+
+	if (!early_pci_allowed())
+		return -1;
+
+	dbgp_num = 0;
+	if (*s)
+		dbgp_num = simple_strtoul(s, &e, 10);
+	dbgp_printk("dbgp_num: %d\n", dbgp_num);
+
+	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
+	if (!cap)
+		return -1;
+
+	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
+			 func);
+
+	debug_port = read_pci_config(bus, slot, func, cap);
+	bar = (debug_port >> 29) & 0x7;
+	bar = (bar * 4) + 0xc;
+	offset = (debug_port >> 16) & 0xfff;
+	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
+	if (bar != PCI_BASE_ADDRESS_0) {
+		dbgp_printk("only debug ports on bar 1 handled.\n");
+
+		return -1;
+	}
+
+	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
+	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
+		dbgp_printk("only simple 32bit mmio bars supported\n");
+
+		return -1;
+	}
+
+	/* double check if the mem space is enabled */
+	byte = read_pci_config_byte(bus, slot, func, 0x04);
+	if (!(byte & 0x2)) {
+		byte  |= 0x02;
+		write_pci_config_byte(bus, slot, func, 0x04, byte);
+		dbgp_printk("mmio for ehci enabled\n");
+	}
+
+	/*
+	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
+	 * than enough.  1K is the biggest I have seen.
+	 */
+	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
+	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
+	ehci_bar += bar_val & ~PAGE_MASK;
+	dbgp_printk("ehci_bar: %p\n", ehci_bar);
+
+	ehci_caps  = ehci_bar;
+	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
+	ehci_debug = ehci_bar + offset;
+	ehci_dev.bus = bus;
+	ehci_dev.slot = slot;
+	ehci_dev.func = func;
+
+	detect_set_debug_port();
+
+	ret = ehci_setup();
+	if (ret < 0) {
+		dbgp_printk("ehci_setup failed\n");
+		ehci_debug = 0;
+
+		return -1;
+	}
+
+	return 0;
+}
+
+static void early_dbgp_write(struct console *con, const char *str, u32 n)
+{
+	int chunk, ret;
+
+	if (!ehci_debug)
+		return;
+	while (n > 0) {
+		chunk = n;
+		if (chunk > DBGP_MAX_PACKET)
+			chunk = DBGP_MAX_PACKET;
+		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+			dbgp_endpoint_out, str, chunk);
+		str += chunk;
+		n -= chunk;
+	}
+}
+
+static struct console early_dbgp_console = {
+	.name =		"earlydbg",
+	.write =	early_dbgp_write,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+#endif
+
 /* Console interface to a host file on AMD's SimNow! */
 
 static int simnow_fd;
@@ -165,6 +889,7 @@ enum {
 static noinline long simnow(long cmd, long a, long b, long c)
 {
 	long ret;
+
 	asm volatile("cpuid" :
 		     "=a" (ret) :
 		     "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
@@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
 static void __init simnow_init(char *str)
 {
 	char *fn = "klog";
+
 	if (*str == '=')
 		fn = ++str;
 	/* error ignored */
@@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...)
 	va_end(ap);
 }
 
-static int __initdata keep_early;
 
 static int __init setup_early_printk(char *buf)
 {
+	int keep_early;
+
 	if (!buf)
 		return 0;
 
@@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf)
 		return 0;
 	early_console_initialized = 1;
 
-	if (strstr(buf, "keep"))
-		keep_early = 1;
+	keep_early = (strstr(buf, "keep") != NULL);
 
 	if (!strncmp(buf, "serial", 6)) {
 		early_serial_init(buf + 6);
@@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf)
 		simnow_init(buf + 6);
 		early_console = &simnow_console;
 		keep_early = 1;
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+	} else if (!strncmp(buf, "dbgp", 4)) {
+		if (early_dbgp_init(buf+4) < 0)
+			return 0;
+		early_console = &early_dbgp_console;
+		/*
+		 * usb subsys will reset ehci controller, so don't keep
+		 * that early console
+		 */
+		keep_early = 0;
+#endif
 #ifdef CONFIG_HVC_XEN
 	} else if (!strncmp(buf, "xen", 3)) {
 		early_console = &xenboot_console;
@@ -251,4 +988,23 @@ static int __init setup_early_printk(char *buf)
 	register_console(early_console);
 	return 0;
 }
+
+void __init enable_debug_console(char *buf)
+{
+#ifdef DBGP_DEBUG
+	struct console *old_early_console = NULL;
+
+	if (early_console_initialized && early_console) {
+		old_early_console = early_console;
+		unregister_console(early_console);
+		early_console_initialized = 0;
+	}
+
+	setup_early_printk(buf);
+
+	if (early_console == old_early_console && old_early_console)
+		register_console(old_early_console);
+#endif
+}
+
 early_param("earlyprintk", setup_early_printk);

From 9749986a878e91182ff027ff0010ab8e3211031a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 26 Jul 2008 17:28:11 +0200
Subject: [PATCH 008/142] x86: usb debug port early console, fix

fix:

 arch/x86/kernel/built-in.o: In function `nvidia_set_debug_port':
 early_printk.c:(.text+0xf8b1): undefined reference to `read_pci_config'
 early_printk.c:(.text+0xf8dc): undefined reference to `write_pci_config'
 arch/x86/kernel/built-in.o: In function `setup_early_printk':
 early_printk.c:(.init.text+0x5487): undefined reference to `early_pci_allowed'
 early_printk.c:(.init.text+0x54cb): undefined reference to `read_pci_config'
 early_printk.c:(.init.text+0x54ec): undefined reference to `read_pci_config_16'
 [...]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 93422d2ecf61..2a3dfbd5e677 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -46,7 +46,7 @@ config EARLY_PRINTK
 config EARLY_PRINTK_DBGP
 	bool "Early printk via EHCI debug port"
 	default n
-	depends on EARLY_PRINTK
+	depends on EARLY_PRINTK && PCI
 	help
 	  Write kernel log output directly into the EHCI debug port.
 

From b56afe1d41653fb07ab1b5af5ccc12001c4dd5a0 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Thu, 24 Jul 2008 12:15:45 -0300
Subject: [PATCH 009/142] x86, xen: Use native_pte_flags instead of
 native_pte_val for .pte_flags

Using native_pte_val triggers the BUG_ON() in the paravirt_ops
version of pte_flags().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 06219e60e9c8..e2767c28dac7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1347,7 +1347,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
 
 	.pte_val = xen_pte_val,
-	.pte_flags = native_pte_val,
+	.pte_flags = native_pte_flags,
 	.pgd_val = xen_pgd_val,
 
 	.make_pte = xen_make_pte,

From 64f53a0492b4bc11868307990bb8f7c1e0764f89 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sun, 27 Jul 2008 08:42:32 -0700
Subject: [PATCH 010/142] x86: fix initialization of 'l' bit in ldt descriptors

Make sure that fill_ldt() initializes the 'l' bit in the descriptor.
It always sets it to 0, ignoring 'lm' in user_desc, preserving original
x86_64 behaviour.

Previously it was leaving 'l' uninitialized.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Glauber de Oliveira Costa <gcosta@redhat.com>
---
 include/asm-x86/desc.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 24a524f5e1a2..06f786f4b4fb 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc,
 	desc->d = info->seg_32bit;
 	desc->g = info->limit_in_pages;
 	desc->base2 = (info->base_addr & 0xff000000) >> 24;
+	/*
+	 * Don't allow setting of the lm bit. It is useless anyway
+	 * because 64bit system calls require __USER_CS:
+	 */
+	desc->l = 0;
 }
 
 extern struct desc_ptr idt_descr;

From a05d2ebab28011c2f3f520833f4bfdd2fd1b9c02 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sun, 27 Jul 2008 08:45:02 -0700
Subject: [PATCH 011/142] xen: fix allocation and use of large ldts

When the ldt gets to more than 1 page in size, the kernel uses vmalloc
to allocate it.  This means that:

 - when making the ldt RO, we must update the pages in both the vmalloc
   mapping and the linear mapping to make sure there are no RW aliases.

 - we need to use arbitrary_virt_to_machine to compute the machine addr
   for each update

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 51 ++++++++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e2767c28dac7..b011e4a5dbbe 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -325,24 +325,55 @@ static unsigned long xen_store_tr(void)
 	return 0;
 }
 
+/*
+ * If 'v' is a vmalloc mapping, then find the linear mapping of the
+ * page (if any) and also set its protections to match:
+ */
+static void set_aliased_prot(void *v, pgprot_t prot)
+{
+	int level;
+	pte_t *ptep;
+	pte_t pte;
+	unsigned long pfn;
+	struct page *page;
+
+	ptep = lookup_address((unsigned long)v, &level);
+	BUG_ON(ptep == NULL);
+
+	pfn = pte_pfn(*ptep);
+	page = pfn_to_page(pfn);
+
+	pte = pfn_pte(pfn, prot);
+
+	if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
+		BUG();
+
+	if (!PageHighMem(page)) {
+		void *av = __va(PFN_PHYS(pfn));
+
+		if (av != v)
+			if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
+				BUG();
+	} else
+		kmap_flush_unused();
+}
+
 static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 {
-	unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE);
-	void *v = ldt;
+	const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
 	int i;
 
-	for(i = 0; i < pages; i += PAGE_SIZE)
-		make_lowmem_page_readonly(v + i);
+	for(i = 0; i < entries; i += entries_per_page)
+		set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
 }
 
 static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
 {
-	unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE);
-	void *v = ldt;
+	const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
 	int i;
 
-	for(i = 0; i < pages; i += PAGE_SIZE)
-		make_lowmem_page_readwrite(v + i);
+	for(i = 0; i < entries; i += entries_per_page)
+		set_aliased_prot(ldt + i, PAGE_KERNEL);
 }
 
 static void xen_set_ldt(const void *addr, unsigned entries)
@@ -446,7 +477,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 				const void *ptr)
 {
 	unsigned long lp = (unsigned long)&dt[entrynum];
-	xmaddr_t mach_lp = virt_to_machine(lp);
+	xmaddr_t mach_lp = arbitrary_virt_to_machine(lp);
 	u64 entry = *(u64 *)ptr;
 
 	preempt_disable();
@@ -579,7 +610,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
 }
 
 static void xen_load_sp0(struct tss_struct *tss,
-			  struct thread_struct *thread)
+			 struct thread_struct *thread)
 {
 	struct multicall_space mcs = xen_mc_entry(0);
 	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);

From d974ae379a2fbe8948f01eabbc6b19c0a80f09bc Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 24 Jul 2008 16:27:46 -0700
Subject: [PATCH 012/142] generic, memparse(): constify argument

memparse()'s first argument can be const, so it should be.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 2 +-
 lib/cmdline.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fdbbf72ca2eb..7889c2f9b75d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -176,7 +176,7 @@ extern int vsscanf(const char *, const char *, va_list)
 
 extern int get_option(char **str, int *pint);
 extern char *get_options(const char *str, int nints, int *ints);
-extern unsigned long long memparse(char *ptr, char **retptr);
+extern unsigned long long memparse(const char *ptr, char **retptr);
 
 extern int core_kernel_text(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 5ba8a942a478..f5f3ad8b62ff 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints)
  *	megabyte, or one gigabyte, respectively.
  */
 
-unsigned long long memparse(char *ptr, char **retptr)
+unsigned long long memparse(const char *ptr, char **retptr)
 {
 	char *endptr;	/* local pointer to end of parsed string */
 

From 167e6cf62979df03513898966f9227847d192327 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 24 Jul 2008 16:27:52 -0700
Subject: [PATCH 013/142] xen-balloon: fix up sysfs issues

1. Set the class so it doesn't clash with the normal memory class
2. Fix up the sysfs show functions to match the new prototype
3. Clean up use of memparse

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: "viets@work.de" <viets@work.de>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/balloon.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d4427cb86979..cdec2d843e4c 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -60,7 +60,7 @@
 
 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
 
-#define BALLOON_CLASS_NAME "memory"
+#define BALLOON_CLASS_NAME "xen_memory"
 
 struct balloon_stats {
 	/* We aim for 'current allocation' == 'target allocation'. */
@@ -588,12 +588,13 @@ static void balloon_release_driver_page(struct page *page)
 }
 
 
-#define BALLOON_SHOW(name, format, args...)			\
-	static ssize_t show_##name(struct sys_device *dev,	\
-				   char *buf)			\
-	{							\
-		return sprintf(buf, format, ##args);		\
-	}							\
+#define BALLOON_SHOW(name, format, args...)				\
+	static ssize_t show_##name(struct sys_device *dev,		\
+				   struct sysdev_attribute *attr,	\
+				   char *buf)				\
+	{								\
+		return sprintf(buf, format, ##args);			\
+	}								\
 	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
 
 BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
@@ -604,7 +605,8 @@ BALLOON_SHOW(hard_limit_kb,
 	     (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
 BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
 
-static ssize_t show_target_kb(struct sys_device *dev, char *buf)
+static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
+			      char *buf)
 {
 	return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
 }
@@ -614,19 +616,14 @@ static ssize_t store_target_kb(struct sys_device *dev,
 			       const char *buf,
 			       size_t count)
 {
-	char memstring[64], *endchar;
+	char *endchar;
 	unsigned long long target_bytes;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (count <= 1)
-		return -EBADMSG; /* runt */
-	if (count > sizeof(memstring))
-		return -EFBIG;   /* too long */
-	strcpy(memstring, buf);
+	target_bytes = memparse(buf, &endchar);
 
-	target_bytes = memparse(memstring, &endchar);
 	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
 
 	return count;

From fde28e8f49ed86e771667a3fe81fcc25c93ede8e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 24 Jul 2008 16:28:00 -0700
Subject: [PATCH 014/142] xen-balloon: clean up unused functions

Remove some unused functions:

balloon_update_driver_allowance
balloon_release_driver_page
	only used on the (obsolete, removed) flip path in netfront
alloc_empty_pages_and_pagevec
free_empty_pages_and_pagevec
	only used in backend drivers; can be reintroduced when needed

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/balloon.c | 147 +-----------------------------------------
 1 file changed, 3 insertions(+), 144 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index cdec2d843e4c..fff987b10e0f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -226,9 +226,8 @@ static int increase_reservation(unsigned long nr_pages)
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
-	reservation.nr_extents   = nr_pages;
-	rc = HYPERVISOR_memory_op(
-		XENMEM_populate_physmap, &reservation);
+	reservation.nr_extents = nr_pages;
+	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
 	if (rc < nr_pages) {
 		if (rc > 0) {
 			int ret;
@@ -236,7 +235,7 @@ static int increase_reservation(unsigned long nr_pages)
 			/* We hit the Xen hard limit: reprobe. */
 			reservation.nr_extents = rc;
 			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-					&reservation);
+						   &reservation);
 			BUG_ON(ret != rc);
 		}
 		if (rc >= 0)
@@ -464,130 +463,6 @@ static void balloon_exit(void)
 
 module_exit(balloon_exit);
 
-static void balloon_update_driver_allowance(long delta)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&balloon_lock, flags);
-	balloon_stats.driver_pages += delta;
-	spin_unlock_irqrestore(&balloon_lock, flags);
-}
-
-static int dealloc_pte_fn(
-	pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
-{
-	unsigned long mfn = pte_mfn(*pte);
-	int ret;
-	struct xen_memory_reservation reservation = {
-		.nr_extents   = 1,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	set_xen_guest_handle(reservation.extent_start, &mfn);
-	set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
-	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
-	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-	BUG_ON(ret != 1);
-	return 0;
-}
-
-static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
-{
-	unsigned long vaddr, flags;
-	struct page *page, **pagevec;
-	int i, ret;
-
-	pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
-	if (pagevec == NULL)
-		return NULL;
-
-	for (i = 0; i < nr_pages; i++) {
-		page = pagevec[i] = alloc_page(GFP_KERNEL);
-		if (page == NULL)
-			goto err;
-
-		vaddr = (unsigned long)page_address(page);
-
-		scrub_page(page);
-
-		spin_lock_irqsave(&balloon_lock, flags);
-
-		if (xen_feature(XENFEAT_auto_translated_physmap)) {
-			unsigned long gmfn = page_to_pfn(page);
-			struct xen_memory_reservation reservation = {
-				.nr_extents   = 1,
-				.extent_order = 0,
-				.domid        = DOMID_SELF
-			};
-			set_xen_guest_handle(reservation.extent_start, &gmfn);
-			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-						   &reservation);
-			if (ret == 1)
-				ret = 0; /* success */
-		} else {
-			ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
-						  dealloc_pte_fn, NULL);
-		}
-
-		if (ret != 0) {
-			spin_unlock_irqrestore(&balloon_lock, flags);
-			__free_page(page);
-			goto err;
-		}
-
-		totalram_pages = --balloon_stats.current_pages;
-
-		spin_unlock_irqrestore(&balloon_lock, flags);
-	}
-
- out:
-	schedule_work(&balloon_worker);
-	flush_tlb_all();
-	return pagevec;
-
- err:
-	spin_lock_irqsave(&balloon_lock, flags);
-	while (--i >= 0)
-		balloon_append(pagevec[i]);
-	spin_unlock_irqrestore(&balloon_lock, flags);
-	kfree(pagevec);
-	pagevec = NULL;
-	goto out;
-}
-
-static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
-{
-	unsigned long flags;
-	int i;
-
-	if (pagevec == NULL)
-		return;
-
-	spin_lock_irqsave(&balloon_lock, flags);
-	for (i = 0; i < nr_pages; i++) {
-		BUG_ON(page_count(pagevec[i]) != 1);
-		balloon_append(pagevec[i]);
-	}
-	spin_unlock_irqrestore(&balloon_lock, flags);
-
-	kfree(pagevec);
-
-	schedule_work(&balloon_worker);
-}
-
-static void balloon_release_driver_page(struct page *page)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&balloon_lock, flags);
-	balloon_append(page);
-	balloon_stats.driver_pages--;
-	spin_unlock_irqrestore(&balloon_lock, flags);
-
-	schedule_work(&balloon_worker);
-}
-
-
 #define BALLOON_SHOW(name, format, args...)				\
 	static ssize_t show_##name(struct sys_device *dev,		\
 				   struct sysdev_attribute *attr,	\
@@ -691,20 +566,4 @@ static int register_balloon(struct sys_device *sysdev)
 	return error;
 }
 
-static void unregister_balloon(struct sys_device *sysdev)
-{
-	int i;
-
-	sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
-	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
-		sysdev_remove_file(sysdev, balloon_attrs[i]);
-	sysdev_unregister(sysdev);
-	sysdev_class_unregister(&balloon_sysdev_class);
-}
-
-static void balloon_sysfs_exit(void)
-{
-	unregister_balloon(&balloon_sysdev);
-}
-
 MODULE_LICENSE("GPL");

From d89961e2dc87b6e30b8e3f60bd2af5cd92cf4643 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 24 Jul 2008 13:48:58 -0700
Subject: [PATCH 015/142] xen: suppress known wrmsrs

In general, Xen doesn't support wrmsr from an unprivileged domain; it
just ends up ignoring the instruction and printing a message on the
console.

Given that there are sets of MSRs we know the kernel will try to write
to, but we don't care, just eat them in xen_write_msr to cut down on
console noise.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b011e4a5dbbe..b795470ec069 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -854,6 +854,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 			ret = -EFAULT;
 		break;
 #endif
+
+	case MSR_STAR:
+	case MSR_CSTAR:
+	case MSR_LSTAR:
+	case MSR_SYSCALL_MASK:
+	case MSR_IA32_SYSENTER_CS:
+	case MSR_IA32_SYSENTER_ESP:
+	case MSR_IA32_SYSENTER_EIP:
+		/* Fast syscall setup is all done in hypercalls, so
+		   these are all ignored.  Stub them out here to stop
+		   Xen console noise. */
+		break;
+
 	default:
 		ret = native_write_msr_safe(msr, low, high);
 	}

From e7f5b309c9bd6142f395c4a36123ebac4bcdc1b0 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:11 +0200
Subject: [PATCH 016/142] x86: AMD microcode patch loading support v2

Add an entry to the MAINTAINERS file for AMD CPU microcode
patch loading support.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 MAINTAINERS | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 03c5d6ccb9f8..28f867d99a8c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -384,6 +384,11 @@ M:	joerg.roedel@amd.com
 L:	iommu@lists.linux-foundation.org
 S:	Supported
 
+AMD MICROCODE UPDATE SUPPORT
+P:      Peter Oruba
+M:      peter.oruba@amd.com
+S:      Supported
+
 AMS (Apple Motion Sensor) DRIVER
 P:	Stelian Pop
 M:	stelian@popies.net

From 9a56a0f80b52cb41c5e0add47c7ce0bb2ef25eb0 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:13 +0200
Subject: [PATCH 017/142] x86: moved Intel microcode patch loader declarations
 to seperate header file

Intel specific microcode declarations have been moved to a seperate header file.
There are no code changes to the code itself and no side effects to other parts.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c |  1 +
 include/asm-x86/microcode.h | 34 ++++++++++++++++++++++++++++++++++
 include/asm-x86/processor.h | 35 -----------------------------------
 3 files changed, 35 insertions(+), 35 deletions(-)
 create mode 100644 include/asm-x86/microcode.h

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 6994c751590e..0d654bd32928 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -93,6 +93,7 @@
 #include <asm/msr.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
+#include <asm/microcode.h>
 
 MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
new file mode 100644
index 000000000000..5a0556855154
--- /dev/null
+++ b/include/asm-x86/microcode.h
@@ -0,0 +1,34 @@
+struct microcode_header {
+	unsigned int            hdrver;
+	unsigned int            rev;
+	unsigned int            date;
+	unsigned int            sig;
+	unsigned int            cksum;
+	unsigned int            ldrver;
+	unsigned int            pf;
+	unsigned int            datasize;
+	unsigned int            totalsize;
+	unsigned int            reserved[3];
+};
+
+struct microcode {
+	struct microcode_header hdr;
+	unsigned int            bits[0];
+};
+
+typedef struct microcode          microcode_t;
+typedef struct microcode_header   microcode_header_t;
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+	unsigned int            sig;
+	unsigned int            pf;
+	unsigned int            cksum;
+};
+
+struct extended_sigtable {
+	unsigned int            count;
+	unsigned int            cksum;
+	unsigned int            reserved[3];
+	struct extended_signature sigs[0];
+};
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 5f58da401b43..58a76f69ee31 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -561,41 +561,6 @@ static inline void clear_in_cr4(unsigned long mask)
 	write_cr4(cr4);
 }
 
-struct microcode_header {
-	unsigned int		hdrver;
-	unsigned int		rev;
-	unsigned int		date;
-	unsigned int		sig;
-	unsigned int		cksum;
-	unsigned int		ldrver;
-	unsigned int		pf;
-	unsigned int		datasize;
-	unsigned int		totalsize;
-	unsigned int		reserved[3];
-};
-
-struct microcode {
-	struct microcode_header	hdr;
-	unsigned int		bits[0];
-};
-
-typedef struct microcode	microcode_t;
-typedef struct microcode_header	microcode_header_t;
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
-	unsigned int		sig;
-	unsigned int		pf;
-	unsigned int		cksum;
-};
-
-struct extended_sigtable {
-	unsigned int		count;
-	unsigned int		cksum;
-	unsigned int		reserved[3];
-	struct extended_signature sigs[0];
-};
-
 typedef struct {
 	unsigned long		seg;
 } mm_segment_t;

From 8e61028dfdc6b8ca996abfe8f9baef6792ea2904 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:14 +0200
Subject: [PATCH 018/142] x86: typedef removal

Removed typedefs. No functional changes to the code.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c | 24 ++++++++++++------------
 include/asm-x86/microcode.h |  3 ---
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 0d654bd32928..74e6a77bf190 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -102,17 +102,17 @@ MODULE_LICENSE("GPL");
 #define MICROCODE_VERSION 	"1.14a"
 
 #define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
-#define MC_HEADER_SIZE		(sizeof (microcode_header_t))  	  /* 48 bytes */
+#define MC_HEADER_SIZE		(sizeof (struct microcode_header))  	  /* 48 bytes */
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
 #define EXT_HEADER_SIZE		(sizeof (struct extended_sigtable)) /* 20 bytes */
 #define EXT_SIGNATURE_SIZE	(sizeof (struct extended_signature)) /* 12 bytes */
 #define DWSIZE			(sizeof (u32))
 #define get_totalsize(mc) \
-	(((microcode_t *)mc)->hdr.totalsize ? \
-	 ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
+	(((struct microcode *)mc)->hdr.totalsize ? \
+	 ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
 #define get_datasize(mc) \
-	(((microcode_t *)mc)->hdr.datasize ? \
-	 ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
+	(((struct microcode *)mc)->hdr.datasize ? \
+	 ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
 
 #define sigmatch(s1, s2, p1, p2) \
 	(((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
@@ -130,7 +130,7 @@ static struct ucode_cpu_info {
 	unsigned int sig;
 	unsigned int pf;
 	unsigned int rev;
-	microcode_t *mc;
+	struct microcode *mc;
 } ucode_cpu_info[NR_CPUS];
 
 static void collect_cpu_info(int cpu_num)
@@ -171,7 +171,7 @@ static void collect_cpu_info(int cpu_num)
 }
 
 static inline int microcode_update_match(int cpu_num,
-	microcode_header_t *mc_header, int sig, int pf)
+	struct microcode_header *mc_header, int sig, int pf)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 
@@ -183,7 +183,7 @@ static inline int microcode_update_match(int cpu_num,
 
 static int microcode_sanity_check(void *mc)
 {
-	microcode_header_t *mc_header = mc;
+	struct microcode_header *mc_header = mc;
 	struct extended_sigtable *ext_header = NULL;
 	struct extended_signature *ext_sig;
 	unsigned long total_size, data_size, ext_table_size;
@@ -268,7 +268,7 @@ static int microcode_sanity_check(void *mc)
 static int get_maching_microcode(void *mc, int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	microcode_header_t *mc_header = mc;
+	struct microcode_header *mc_header = mc;
 	struct extended_sigtable *ext_header;
 	unsigned long total_size = get_totalsize(mc_header);
 	int ext_sigcount, i;
@@ -355,7 +355,7 @@ static unsigned int user_buffer_size;	/* it's size */
 
 static long get_next_ucode(void **mc, long offset)
 {
-	microcode_header_t mc_header;
+	struct microcode_header mc_header;
 	unsigned long total_size;
 
 	/* No more data */
@@ -497,13 +497,13 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
 	unsigned long size, long offset)
 {
-	microcode_header_t *mc_header;
+	struct microcode_header *mc_header;
 	unsigned long total_size;
 
 	/* No more data */
 	if (offset >= size)
 		return 0;
-	mc_header = (microcode_header_t *)(buf + offset);
+	mc_header = (struct microcode_header *)(buf + offset);
 	total_size = get_totalsize(mc_header);
 
 	if (offset + total_size > size) {
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index 5a0556855154..1519ef0674bb 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -16,9 +16,6 @@ struct microcode {
 	unsigned int            bits[0];
 };
 
-typedef struct microcode          microcode_t;
-typedef struct microcode_header   microcode_header_t;
-
 /* microcode format is extended from prescott processors */
 struct extended_signature {
 	unsigned int            sig;

From c3b71bcec0380836caac9b524fa1585b469b7456 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:15 +0200
Subject: [PATCH 019/142] x86: move per CPU microcode structure declaration to
 header file

This structure will be later used by other modules as well and
needs therfore to be moved out to a header file.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c | 8 +-------
 include/asm-x86/microcode.h | 8 ++++++++
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 74e6a77bf190..4e7b2f65fed6 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -125,13 +125,7 @@ static DEFINE_SPINLOCK(microcode_update_lock);
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
 static DEFINE_MUTEX(microcode_mutex);
 
-static struct ucode_cpu_info {
-	int valid;
-	unsigned int sig;
-	unsigned int pf;
-	unsigned int rev;
-	struct microcode *mc;
-} ucode_cpu_info[NR_CPUS];
+static struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
 
 static void collect_cpu_info(int cpu_num)
 {
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index 1519ef0674bb..d34a1fc1b173 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -29,3 +29,11 @@ struct extended_sigtable {
 	unsigned int            reserved[3];
 	struct extended_signature sigs[0];
 };
+
+struct ucode_cpu_info {
+	int valid;
+	unsigned int sig;
+	unsigned int pf;
+	unsigned int rev;
+	struct microcode *mc;
+};

From 1abae31096007cc993f67ae2576fe8f812270ad6 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:16 +0200
Subject: [PATCH 020/142] x86: move microcode.c to microcode_intel.c

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile                           | 2 +-
 arch/x86/kernel/{microcode.c => microcode_intel.c} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename arch/x86/kernel/{microcode.c => microcode_intel.c} (100%)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..a9be2a0dde80 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -51,7 +51,7 @@ obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
 obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
-obj-$(CONFIG_MICROCODE)		+= microcode.o
+obj-$(CONFIG_MICROCODE)		+= microcode_intel.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode_intel.c
similarity index 100%
rename from arch/x86/kernel/microcode.c
rename to arch/x86/kernel/microcode_intel.c

From 3e135d887c973b525d43fbb67dfc5972694882f6 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:17 +0200
Subject: [PATCH 021/142] x86: code split to two parts

Split off existing code into two seperate files. One file holds general
code, the other file vendor specific parts.

No functional changes, only refactoring.

Temporarily Introduced a new module name 'ucode' for result,
due to already taken name 'microcode'.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile          |   3 +-
 arch/x86/kernel/microcode.c       | 463 ++++++++++++++++++++++++++++++
 arch/x86/kernel/microcode_intel.c | 387 ++-----------------------
 3 files changed, 488 insertions(+), 365 deletions(-)
 create mode 100644 arch/x86/kernel/microcode.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a9be2a0dde80..abb32aed7f4b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -51,7 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
 obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
-obj-$(CONFIG_MICROCODE)		+= microcode_intel.o
+obj-$(CONFIG_MICROCODE)		+= ucode.o
+ucode-objs                      := microcode.o microcode_intel.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
new file mode 100644
index 000000000000..c1047d7f7ede
--- /dev/null
+++ b/arch/x86/kernel/microcode.c
@@ -0,0 +1,463 @@
+/*
+ *	Intel CPU Microcode Update Driver for Linux
+ *
+ *	Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *		      2006	Shaohua Li <shaohua.li@intel.com>
+ *
+ *	This driver allows to upgrade microcode on Intel processors
+ *	belonging to IA-32 family - PentiumPro, Pentium II,
+ *	Pentium III, Xeon, Pentium 4, etc.
+ *
+ *	Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
+ *	Software Developer's Manual
+ *	Order Number 253668 or free download from:
+ *
+ *	http://developer.intel.com/design/pentium4/manuals/253668.htm
+ *
+ *	For more information, go to http://www.urbanmyth.org/microcode
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	1.0	16 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Initial release.
+ *	1.01	18 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added read() support + cleanups.
+ *	1.02	21 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Added 'device trimming' support. open(O_WRONLY) zeroes
+ *		and frees the saved copy of applied microcode.
+ *	1.03	29 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ *		Made to use devfs (/dev/cpu/microcode) + cleanups.
+ *	1.04	06 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Added misc device support (now uses both devfs and misc).
+ *		Added MICROCODE_IOCFREE ioctl to clear memory.
+ *	1.05	09 Jun 2000, Simon Trimmer <simon@veritas.com>
+ *		Messages for error cases (non Intel & no suitable microcode).
+ *	1.06	03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->release(). Removed exclusive open and status bitmap.
+ *		Added microcode_rwsem to serialize read()/write()/ioctl().
+ *		Removed global kernel lock usage.
+ *	1.07	07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
+ *		Write 0 to 0x8B msr and then cpuid before reading revision,
+ *		so that it works even if there were no update done by the
+ *		BIOS. Otherwise, reading from 0x8B gives junk (which happened
+ *		to be 0 on my machine which is why it worked even when I
+ *		disabled update by the BIOS)
+ *		Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
+ *	1.08	11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
+ *			     Tigran Aivazian <tigran@veritas.com>
+ *		Intel Pentium 4 processor support and bugfixes.
+ *	1.09	30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
+ *		Bugfix for HT (Hyper-Threading) enabled processors
+ *		whereby processor resources are shared by all logical processors
+ *		in a single CPU package.
+ *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
+ *		Tigran Aivazian <tigran@veritas.com>,
+ *		Serialize updates as required on HT processors due to speculative
+ *		nature of implementation.
+ *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
+ *		Fix the panic when writing zero-length microcode chunk.
+ *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
+ *		Jun Nakajima <jun.nakajima@intel.com>
+ *		Support for the microcode updates in the new format.
+ *	1.13	10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
+ *		Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
+ *		because we no longer hold a copy of applied microcode
+ *		in kernel memory.
+ *	1.14	25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
+ *		Fix sigmatch() macro to handle old CPUs with pf == 0.
+ *		Thanks to Stuart Swales for pointing out this bug.
+ */
+
+//#define DEBUG /* pr_debug */
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/microcode.h>
+
+MODULE_DESCRIPTION("Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+MODULE_LICENSE("GPL");
+
+#define MICROCODE_VERSION 	"1.14a"
+
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+DEFINE_MUTEX(microcode_mutex);
+
+struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+
+extern long get_next_ucode(void **mc, long offset);
+extern int microcode_sanity_check(void *mc);
+extern int get_matching_microcode(void *mc, int cpu);
+extern void collect_cpu_info(int cpu_num);
+extern int cpu_request_microcode(int cpu);
+extern void microcode_fini_cpu(int cpu);
+extern void apply_microcode(int cpu);
+extern int apply_microcode_check_cpu(int cpu);
+
+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+void __user *user_buffer;	/* user area microcode data buffer */
+unsigned int user_buffer_size;	/* it's size */
+
+static int do_microcode_update (void)
+{
+	long cursor = 0;
+	int error = 0;
+	void *new_mc = NULL;
+	int cpu;
+	cpumask_t old;
+	cpumask_of_cpu_ptr_declare(newmask);
+
+	old = current->cpus_allowed;
+
+	while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
+		error = microcode_sanity_check(new_mc);
+		if (error)
+			goto out;
+		/*
+		 * It's possible the data file has multiple matching ucode,
+		 * lets keep searching till the latest version
+		 */
+		for_each_online_cpu(cpu) {
+			struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+			if (!uci->valid)
+				continue;
+			cpumask_of_cpu_ptr_next(newmask, cpu);
+			set_cpus_allowed_ptr(current, newmask);
+			error = get_maching_microcode(new_mc, cpu);
+			if (error < 0)
+				goto out;
+			if (error == 1)
+				apply_microcode(cpu);
+		}
+		vfree(new_mc);
+	}
+out:
+	if (cursor > 0)
+		vfree(new_mc);
+	if (cursor < 0)
+		error = cursor;
+	set_cpus_allowed_ptr(current, &old);
+	return error;
+}
+
+static int microcode_open (struct inode *unused1, struct file *unused2)
+{
+	cycle_kernel_lock();
+	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
+{
+	ssize_t ret;
+
+	if ((len >> PAGE_SHIFT) > num_physpages) {
+		printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
+		return -EINVAL;
+	}
+
+	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
+	user_buffer = (void __user *) buf;
+	user_buffer_size = (int) len;
+
+	ret = do_microcode_update();
+	if (!ret)
+		ret = (ssize_t)len;
+
+	mutex_unlock(&microcode_mutex);
+	put_online_cpus();
+
+	return ret;
+}
+
+static const struct file_operations microcode_fops = {
+	.owner		= THIS_MODULE,
+	.write		= microcode_write,
+	.open		= microcode_open,
+};
+
+static struct miscdevice microcode_dev = {
+	.minor		= MICROCODE_MINOR,
+	.name		= "microcode",
+	.fops		= &microcode_fops,
+};
+
+static int __init microcode_dev_init (void)
+{
+	int error;
+
+	error = misc_register(&microcode_dev);
+	if (error) {
+		printk(KERN_ERR
+			"microcode: can't misc_register on minor=%d\n",
+			MICROCODE_MINOR);
+		return error;
+	}
+
+	return 0;
+}
+
+static void microcode_dev_exit (void)
+{
+	misc_deregister(&microcode_dev);
+}
+
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+#else
+#define microcode_dev_init() 0
+#define microcode_dev_exit() do { } while(0)
+#endif
+
+/* fake device for request_firmware */
+struct platform_device *microcode_pdev;
+
+static void microcode_init_cpu(int cpu, int resume)
+{
+	cpumask_t old;
+	cpumask_of_cpu_ptr(newmask, cpu);
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	old = current->cpus_allowed;
+
+	set_cpus_allowed_ptr(current, newmask);
+	mutex_lock(&microcode_mutex);
+	collect_cpu_info(cpu);
+	if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
+		cpu_request_microcode(cpu);
+	mutex_unlock(&microcode_mutex);
+	set_cpus_allowed_ptr(current, &old);
+}
+
+static ssize_t reload_store(struct sys_device *dev,
+			    struct sysdev_attribute *attr,
+			    const char *buf, size_t sz)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+	char *end;
+	unsigned long val = simple_strtoul(buf, &end, 0);
+	int err = 0;
+	int cpu = dev->id;
+
+	if (end == buf)
+		return -EINVAL;
+	if (val == 1) {
+		cpumask_t old;
+		cpumask_of_cpu_ptr(newmask, cpu);
+
+		old = current->cpus_allowed;
+
+		get_online_cpus();
+		set_cpus_allowed_ptr(current, newmask);
+
+		mutex_lock(&microcode_mutex);
+		if (uci->valid)
+			err = cpu_request_microcode(cpu);
+		mutex_unlock(&microcode_mutex);
+		put_online_cpus();
+		set_cpus_allowed_ptr(current, &old);
+	}
+	if (err)
+		return err;
+	return sz;
+}
+
+static ssize_t version_show(struct sys_device *dev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+	return sprintf(buf, "0x%x\n", uci->rev);
+}
+
+static ssize_t pf_show(struct sys_device *dev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+	return sprintf(buf, "0x%x\n", uci->pf);
+}
+
+static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
+static SYSDEV_ATTR(version, 0400, version_show, NULL);
+static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
+
+static struct attribute *mc_default_attrs[] = {
+	&attr_reload.attr,
+	&attr_version.attr,
+	&attr_processor_flags.attr,
+	NULL
+};
+
+static struct attribute_group mc_attr_group = {
+	.attrs = mc_default_attrs,
+	.name = "microcode",
+};
+
+static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
+{
+	int err, cpu = sys_dev->id;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	if (!cpu_online(cpu))
+		return 0;
+
+	pr_debug("microcode: CPU%d added\n", cpu);
+	memset(uci, 0, sizeof(*uci));
+
+	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
+	if (err)
+		return err;
+
+	microcode_init_cpu(cpu, resume);
+
+	return 0;
+}
+
+static int mc_sysdev_add(struct sys_device *sys_dev)
+{
+	return __mc_sysdev_add(sys_dev, 0);
+}
+
+static int mc_sysdev_remove(struct sys_device *sys_dev)
+{
+	int cpu = sys_dev->id;
+
+	if (!cpu_online(cpu))
+		return 0;
+
+	pr_debug("microcode: CPU%d removed\n", cpu);
+	microcode_fini_cpu(cpu);
+	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+	return 0;
+}
+
+static int mc_sysdev_resume(struct sys_device *dev)
+{
+	int cpu = dev->id;
+
+	if (!cpu_online(cpu))
+		return 0;
+	pr_debug("microcode: CPU%d resumed\n", cpu);
+	/* only CPU 0 will apply ucode here */
+	apply_microcode(0);
+	return 0;
+}
+
+static struct sysdev_driver mc_sysdev_driver = {
+	.add = mc_sysdev_add,
+	.remove = mc_sysdev_remove,
+	.resume = mc_sysdev_resume,
+};
+
+static __cpuinit int
+mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_UP_CANCELED_FROZEN:
+		/* The CPU refused to come up during a system resume */
+		microcode_fini_cpu(cpu);
+		break;
+	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
+		mc_sysdev_add(sys_dev);
+		break;
+	case CPU_ONLINE_FROZEN:
+		/* System-wide resume is in progress, try to apply microcode */
+		if (apply_microcode_check_cpu(cpu)) {
+			/* The application of microcode failed */
+			microcode_fini_cpu(cpu);
+			__mc_sysdev_add(sys_dev, 1);
+			break;
+		}
+	case CPU_DOWN_FAILED_FROZEN:
+		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
+			printk(KERN_ERR "microcode: Failed to create the sysfs "
+				"group for CPU%d\n", cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		mc_sysdev_remove(sys_dev);
+		break;
+	case CPU_DOWN_PREPARE_FROZEN:
+		/* Suspend is in progress, only remove the interface */
+		sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata mc_cpu_notifier = {
+	.notifier_call = mc_cpu_callback,
+};
+
+static int __init microcode_init (void)
+{
+	int error;
+
+	printk(KERN_INFO
+		"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
+
+	error = microcode_dev_init();
+	if (error)
+		return error;
+	microcode_pdev = platform_device_register_simple("microcode", -1,
+							 NULL, 0);
+	if (IS_ERR(microcode_pdev)) {
+		microcode_dev_exit();
+		return PTR_ERR(microcode_pdev);
+	}
+
+	get_online_cpus();
+	error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
+	put_online_cpus();
+	if (error) {
+		microcode_dev_exit();
+		platform_device_unregister(microcode_pdev);
+		return error;
+	}
+
+	register_hotcpu_notifier(&mc_cpu_notifier);
+	return 0;
+}
+
+static void __exit microcode_exit (void)
+{
+	microcode_dev_exit();
+
+	unregister_hotcpu_notifier(&mc_cpu_notifier);
+
+	get_online_cpus();
+	sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+	put_online_cpus();
+
+	platform_device_unregister(microcode_pdev);
+}
+
+module_init(microcode_init)
+module_exit(microcode_exit)
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 4e7b2f65fed6..eded0a154ea8 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -95,18 +95,16 @@
 #include <asm/processor.h>
 #include <asm/microcode.h>
 
-MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
+MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
 MODULE_LICENSE("GPL");
 
-#define MICROCODE_VERSION 	"1.14a"
-
 #define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
-#define MC_HEADER_SIZE		(sizeof (struct microcode_header))  	  /* 48 bytes */
+#define MC_HEADER_SIZE		(sizeof(struct microcode_header))  	  /* 48 bytes */
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
-#define EXT_HEADER_SIZE		(sizeof (struct extended_sigtable)) /* 20 bytes */
-#define EXT_SIGNATURE_SIZE	(sizeof (struct extended_signature)) /* 12 bytes */
-#define DWSIZE			(sizeof (u32))
+#define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable)) /* 20 bytes */
+#define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature)) /* 12 bytes */
+#define DWSIZE			(sizeof(u32))
 #define get_totalsize(mc) \
 	(((struct microcode *)mc)->hdr.totalsize ? \
 	 ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
@@ -123,11 +121,11 @@ MODULE_LICENSE("GPL");
 static DEFINE_SPINLOCK(microcode_update_lock);
 
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-static DEFINE_MUTEX(microcode_mutex);
+extern struct mutex microcode_mutex;
 
-static struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
 
-static void collect_cpu_info(int cpu_num)
+void collect_cpu_info(int cpu_num)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
@@ -140,7 +138,7 @@ static void collect_cpu_info(int cpu_num)
 	uci->valid = 1;
 
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
-	    	cpu_has(c, X86_FEATURE_IA64)) {
+	    cpu_has(c, X86_FEATURE_IA64)) {
 		printk(KERN_ERR "microcode: CPU%d not a capable Intel "
 			"processor\n", cpu_num);
 		uci->valid = 0;
@@ -175,7 +173,7 @@ static inline int microcode_update_match(int cpu_num,
 	return 1;
 }
 
-static int microcode_sanity_check(void *mc)
+int microcode_sanity_check(void *mc)
 {
 	struct microcode_header *mc_header = mc;
 	struct extended_sigtable *ext_header = NULL;
@@ -259,7 +257,7 @@ static int microcode_sanity_check(void *mc)
  * return 1 - found update
  * return < 0 - error
  */
-static int get_maching_microcode(void *mc, int cpu)
+int get_matching_microcode(void *mc, int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct microcode_header *mc_header = mc;
@@ -288,7 +286,7 @@ static int get_maching_microcode(void *mc, int cpu)
 	return 0;
 find:
 	pr_debug("microcode: CPU%d found a matching microcode update with"
-		" version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
+		 " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev);
 	new_mc = vmalloc(total_size);
 	if (!new_mc) {
 		printk(KERN_ERR "microcode: error! Can not allocate memory\n");
@@ -303,7 +301,7 @@ static int get_maching_microcode(void *mc, int cpu)
 	return 1;
 }
 
-static void apply_microcode(int cpu)
+void apply_microcode(int cpu)
 {
 	unsigned long flags;
 	unsigned int val[2];
@@ -344,10 +342,10 @@ static void apply_microcode(int cpu)
 }
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
-static void __user *user_buffer;	/* user area microcode data buffer */
-static unsigned int user_buffer_size;	/* it's size */
+extern void __user *user_buffer;        /* user area microcode data buffer */
+extern unsigned int user_buffer_size;   /* it's size */
 
-static long get_next_ucode(void **mc, long offset)
+long get_next_ucode(void **mc, long offset)
 {
 	struct microcode_header mc_header;
 	unsigned long total_size;
@@ -375,117 +373,6 @@ static long get_next_ucode(void **mc, long offset)
 	}
 	return offset + total_size;
 }
-
-static int do_microcode_update (void)
-{
-	long cursor = 0;
-	int error = 0;
-	void *new_mc = NULL;
-	int cpu;
-	cpumask_t old;
-	cpumask_of_cpu_ptr_declare(newmask);
-
-	old = current->cpus_allowed;
-
-	while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
-		error = microcode_sanity_check(new_mc);
-		if (error)
-			goto out;
-		/*
-		 * It's possible the data file has multiple matching ucode,
-		 * lets keep searching till the latest version
-		 */
-		for_each_online_cpu(cpu) {
-			struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-			if (!uci->valid)
-				continue;
-			cpumask_of_cpu_ptr_next(newmask, cpu);
-			set_cpus_allowed_ptr(current, newmask);
-			error = get_maching_microcode(new_mc, cpu);
-			if (error < 0)
-				goto out;
-			if (error == 1)
-				apply_microcode(cpu);
-		}
-		vfree(new_mc);
-	}
-out:
-	if (cursor > 0)
-		vfree(new_mc);
-	if (cursor < 0)
-		error = cursor;
-	set_cpus_allowed_ptr(current, &old);
-	return error;
-}
-
-static int microcode_open (struct inode *unused1, struct file *unused2)
-{
-	cycle_kernel_lock();
-	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
-}
-
-static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
-{
-	ssize_t ret;
-
-	if ((len >> PAGE_SHIFT) > num_physpages) {
-		printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
-		return -EINVAL;
-	}
-
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-
-	user_buffer = (void __user *) buf;
-	user_buffer_size = (int) len;
-
-	ret = do_microcode_update();
-	if (!ret)
-		ret = (ssize_t)len;
-
-	mutex_unlock(&microcode_mutex);
-	put_online_cpus();
-
-	return ret;
-}
-
-static const struct file_operations microcode_fops = {
-	.owner		= THIS_MODULE,
-	.write		= microcode_write,
-	.open		= microcode_open,
-};
-
-static struct miscdevice microcode_dev = {
-	.minor		= MICROCODE_MINOR,
-	.name		= "microcode",
-	.fops		= &microcode_fops,
-};
-
-static int __init microcode_dev_init (void)
-{
-	int error;
-
-	error = misc_register(&microcode_dev);
-	if (error) {
-		printk(KERN_ERR
-			"microcode: can't misc_register on minor=%d\n",
-			MICROCODE_MINOR);
-		return error;
-	}
-
-	return 0;
-}
-
-static void microcode_dev_exit (void)
-{
-	misc_deregister(&microcode_dev);
-}
-
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
-#else
-#define microcode_dev_init() 0
-#define microcode_dev_exit() do { } while(0)
 #endif
 
 static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
@@ -515,9 +402,9 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
 }
 
 /* fake device for request_firmware */
-static struct platform_device *microcode_pdev;
+extern struct platform_device *microcode_pdev;
 
-static int cpu_request_microcode(int cpu)
+int cpu_request_microcode(int cpu)
 {
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -530,7 +417,7 @@ static int cpu_request_microcode(int cpu)
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu != raw_smp_processor_id());
-	sprintf(name,"intel-ucode/%02x-%02x-%02x",
+	sprintf(name, "intel-ucode/%02x-%02x-%02x",
 		c->x86, c->x86_model, c->x86_mask);
 	error = request_firmware(&firmware, name, &microcode_pdev->dev);
 	if (error) {
@@ -544,7 +431,7 @@ static int cpu_request_microcode(int cpu)
 		error = microcode_sanity_check(mc);
 		if (error)
 			break;
-		error = get_maching_microcode(mc, cpu);
+		error = get_matching_microcode(mc, cpu);
 		if (error < 0)
 			break;
 		/*
@@ -566,7 +453,7 @@ static int cpu_request_microcode(int cpu)
 	return error;
 }
 
-static int apply_microcode_check_cpu(int cpu)
+int apply_microcode_check_cpu(int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -615,241 +502,13 @@ static int apply_microcode_check_cpu(int cpu)
 	return err;
 }
 
-static void microcode_init_cpu(int cpu, int resume)
-{
-	cpumask_t old;
-	cpumask_of_cpu_ptr(newmask, cpu);
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	old = current->cpus_allowed;
-
-	set_cpus_allowed_ptr(current, newmask);
-	mutex_lock(&microcode_mutex);
-	collect_cpu_info(cpu);
-	if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
-		cpu_request_microcode(cpu);
-	mutex_unlock(&microcode_mutex);
-	set_cpus_allowed_ptr(current, &old);
-}
-
-static void microcode_fini_cpu(int cpu)
+void microcode_fini_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	mutex_lock(&microcode_mutex);
 	uci->valid = 0;
-	vfree(uci->mc);
+	kfree(uci->mc);
 	uci->mc = NULL;
 	mutex_unlock(&microcode_mutex);
 }
-
-static ssize_t reload_store(struct sys_device *dev,
-			    struct sysdev_attribute *attr,
-			    const char *buf, size_t sz)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
-	char *end;
-	unsigned long val = simple_strtoul(buf, &end, 0);
-	int err = 0;
-	int cpu = dev->id;
-
-	if (end == buf)
-		return -EINVAL;
-	if (val == 1) {
-		cpumask_t old;
-		cpumask_of_cpu_ptr(newmask, cpu);
-
-		old = current->cpus_allowed;
-
-		get_online_cpus();
-		set_cpus_allowed_ptr(current, newmask);
-
-		mutex_lock(&microcode_mutex);
-		if (uci->valid)
-			err = cpu_request_microcode(cpu);
-		mutex_unlock(&microcode_mutex);
-		put_online_cpus();
-		set_cpus_allowed_ptr(current, &old);
-	}
-	if (err)
-		return err;
-	return sz;
-}
-
-static ssize_t version_show(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
-
-	return sprintf(buf, "0x%x\n", uci->rev);
-}
-
-static ssize_t pf_show(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
-
-	return sprintf(buf, "0x%x\n", uci->pf);
-}
-
-static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
-static SYSDEV_ATTR(version, 0400, version_show, NULL);
-static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
-
-static struct attribute *mc_default_attrs[] = {
-	&attr_reload.attr,
-	&attr_version.attr,
-	&attr_processor_flags.attr,
-	NULL
-};
-
-static struct attribute_group mc_attr_group = {
-	.attrs = mc_default_attrs,
-	.name = "microcode",
-};
-
-static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
-{
-	int err, cpu = sys_dev->id;
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	if (!cpu_online(cpu))
-		return 0;
-
-	pr_debug("microcode: CPU%d added\n", cpu);
-	memset(uci, 0, sizeof(*uci));
-
-	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
-	if (err)
-		return err;
-
-	microcode_init_cpu(cpu, resume);
-
-	return 0;
-}
-
-static int mc_sysdev_add(struct sys_device *sys_dev)
-{
-	return __mc_sysdev_add(sys_dev, 0);
-}
-
-static int mc_sysdev_remove(struct sys_device *sys_dev)
-{
-	int cpu = sys_dev->id;
-
-	if (!cpu_online(cpu))
-		return 0;
-
-	pr_debug("microcode: CPU%d removed\n", cpu);
-	microcode_fini_cpu(cpu);
-	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
-	return 0;
-}
-
-static int mc_sysdev_resume(struct sys_device *dev)
-{
-	int cpu = dev->id;
-
-	if (!cpu_online(cpu))
-		return 0;
-	pr_debug("microcode: CPU%d resumed\n", cpu);
-	/* only CPU 0 will apply ucode here */
-	apply_microcode(0);
-	return 0;
-}
-
-static struct sysdev_driver mc_sysdev_driver = {
-	.add = mc_sysdev_add,
-	.remove = mc_sysdev_remove,
-	.resume = mc_sysdev_resume,
-};
-
-static __cpuinit int
-mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
-
-	sys_dev = get_cpu_sysdev(cpu);
-	switch (action) {
-	case CPU_UP_CANCELED_FROZEN:
-		/* The CPU refused to come up during a system resume */
-		microcode_fini_cpu(cpu);
-		break;
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		mc_sysdev_add(sys_dev);
-		break;
-	case CPU_ONLINE_FROZEN:
-		/* System-wide resume is in progress, try to apply microcode */
-		if (apply_microcode_check_cpu(cpu)) {
-			/* The application of microcode failed */
-			microcode_fini_cpu(cpu);
-			__mc_sysdev_add(sys_dev, 1);
-			break;
-		}
-	case CPU_DOWN_FAILED_FROZEN:
-		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
-			printk(KERN_ERR "microcode: Failed to create the sysfs "
-				"group for CPU%d\n", cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-		mc_sysdev_remove(sys_dev);
-		break;
-	case CPU_DOWN_PREPARE_FROZEN:
-		/* Suspend is in progress, only remove the interface */
-		sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __refdata mc_cpu_notifier = {
-	.notifier_call = mc_cpu_callback,
-};
-
-static int __init microcode_init (void)
-{
-	int error;
-
-	printk(KERN_INFO
-		"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
-
-	error = microcode_dev_init();
-	if (error)
-		return error;
-	microcode_pdev = platform_device_register_simple("microcode", -1,
-							 NULL, 0);
-	if (IS_ERR(microcode_pdev)) {
-		microcode_dev_exit();
-		return PTR_ERR(microcode_pdev);
-	}
-
-	get_online_cpus();
-	error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
-	put_online_cpus();
-	if (error) {
-		microcode_dev_exit();
-		platform_device_unregister(microcode_pdev);
-		return error;
-	}
-
-	register_hotcpu_notifier(&mc_cpu_notifier);
-	return 0;
-}
-
-static void __exit microcode_exit (void)
-{
-	microcode_dev_exit();
-
-	unregister_hotcpu_notifier(&mc_cpu_notifier);
-
-	get_online_cpus();
-	sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
-	put_online_cpus();
-
-	platform_device_unregister(microcode_pdev);
-}
-
-module_init(microcode_init)
-module_exit(microcode_exit)

From d4ee36686853d5714437c4409f17ad42bfaf4211 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:18 +0200
Subject: [PATCH 022/142] x86: structure declaration renaming

Renamed common structures to vendor specific naming scheme
so other vendors will be able to use the same naming
convention.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_intel.c | 46 ++++++++++++++++---------------
 include/asm-x86/microcode.h       | 10 ++++---
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index eded0a154ea8..ca9861bf067e 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -100,17 +100,19 @@ MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
 MODULE_LICENSE("GPL");
 
 #define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
-#define MC_HEADER_SIZE		(sizeof(struct microcode_header))  	  /* 48 bytes */
+#define MC_HEADER_SIZE		(sizeof(struct microcode_header_intel)) /* 48 bytes */
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
 #define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable)) /* 20 bytes */
 #define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature)) /* 12 bytes */
 #define DWSIZE			(sizeof(u32))
 #define get_totalsize(mc) \
-	(((struct microcode *)mc)->hdr.totalsize ? \
-	 ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
+	(((struct microcode_intel *)mc)->hdr.totalsize ? \
+	 ((struct microcode_intel *)mc)->hdr.totalsize : \
+	 DEFAULT_UCODE_TOTALSIZE)
+
 #define get_datasize(mc) \
-	(((struct microcode *)mc)->hdr.datasize ? \
-	 ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
+	(((struct microcode_intel *)mc)->hdr.datasize ? \
+	 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
 
 #define sigmatch(s1, s2, p1, p2) \
 	(((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
@@ -134,7 +136,7 @@ void collect_cpu_info(int cpu_num)
 	/* We should bind the task to the CPU */
 	BUG_ON(raw_smp_processor_id() != cpu_num);
 	uci->pf = uci->rev = 0;
-	uci->mc = NULL;
+	uci->mc.mc_intel = NULL;
 	uci->valid = 1;
 
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -163,7 +165,7 @@ void collect_cpu_info(int cpu_num)
 }
 
 static inline int microcode_update_match(int cpu_num,
-	struct microcode_header *mc_header, int sig, int pf)
+	struct microcode_header_intel *mc_header, int sig, int pf)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 
@@ -175,7 +177,7 @@ static inline int microcode_update_match(int cpu_num,
 
 int microcode_sanity_check(void *mc)
 {
-	struct microcode_header *mc_header = mc;
+	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header = NULL;
 	struct extended_signature *ext_sig;
 	unsigned long total_size, data_size, ext_table_size;
@@ -260,7 +262,7 @@ int microcode_sanity_check(void *mc)
 int get_matching_microcode(void *mc, int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct microcode_header *mc_header = mc;
+	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header;
 	unsigned long total_size = get_totalsize(mc_header);
 	int ext_sigcount, i;
@@ -294,10 +296,10 @@ int get_matching_microcode(void *mc, int cpu)
 	}
 
 	/* free previous update file */
-	vfree(uci->mc);
+	vfree(uci->mc.mc_intel);
 
 	memcpy(new_mc, mc, total_size);
-	uci->mc = new_mc;
+	uci->mc.mc_intel = new_mc;
 	return 1;
 }
 
@@ -311,7 +313,7 @@ void apply_microcode(int cpu)
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu_num != cpu);
 
-	if (uci->mc == NULL)
+	if (uci->mc.mc_intel == NULL)
 		return;
 
 	/* serialize access to the physical write to MSR 0x79 */
@@ -319,8 +321,8 @@ void apply_microcode(int cpu)
 
 	/* write microcode via MSR 0x79 */
 	wrmsr(MSR_IA32_UCODE_WRITE,
-		(unsigned long) uci->mc->bits,
-		(unsigned long) uci->mc->bits >> 16 >> 16);
+	      (unsigned long) uci->mc.mc_intel->bits,
+	      (unsigned long) uci->mc.mc_intel->bits >> 16 >> 16);
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 
 	/* see notes above for revision 1.07.  Apparent chip bug */
@@ -330,14 +332,14 @@ void apply_microcode(int cpu)
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
-	if (val[1] != uci->mc->hdr.rev) {
+	if (val[1] != uci->mc.mc_intel->hdr.rev) {
 		printk(KERN_ERR "microcode: CPU%d update from revision "
 			"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
 		return;
 	}
 	printk(KERN_INFO "microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x, date = %08x \n",
-	       cpu_num, uci->rev, val[1], uci->mc->hdr.date);
+	       cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date);
 	uci->rev = val[1];
 }
 
@@ -347,7 +349,7 @@ extern unsigned int user_buffer_size;   /* it's size */
 
 long get_next_ucode(void **mc, long offset)
 {
-	struct microcode_header mc_header;
+	struct microcode_header_intel mc_header;
 	unsigned long total_size;
 
 	/* No more data */
@@ -378,13 +380,13 @@ long get_next_ucode(void **mc, long offset)
 static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
 	unsigned long size, long offset)
 {
-	struct microcode_header *mc_header;
+	struct microcode_header_intel *mc_header;
 	unsigned long total_size;
 
 	/* No more data */
 	if (offset >= size)
 		return 0;
-	mc_header = (struct microcode_header *)(buf + offset);
+	mc_header = (struct microcode_header_intel *)(buf + offset);
 	total_size = get_totalsize(mc_header);
 
 	if (offset + total_size > size) {
@@ -463,7 +465,7 @@ int apply_microcode_check_cpu(int cpu)
 	int err = 0;
 
 	/* Check if the microcode is available */
-	if (!uci->mc)
+	if (!uci->mc.mc_intel)
 		return 0;
 
 	old = current->cpus_allowed;
@@ -508,7 +510,7 @@ void microcode_fini_cpu(int cpu)
 
 	mutex_lock(&microcode_mutex);
 	uci->valid = 0;
-	kfree(uci->mc);
-	uci->mc = NULL;
+	kfree(uci->mc.mc_intel);
+	uci->mc.mc_intel = NULL;
 	mutex_unlock(&microcode_mutex);
 }
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index d34a1fc1b173..ef77c6f438bf 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -1,4 +1,4 @@
-struct microcode_header {
+struct microcode_header_intel {
 	unsigned int            hdrver;
 	unsigned int            rev;
 	unsigned int            date;
@@ -11,8 +11,8 @@ struct microcode_header {
 	unsigned int            reserved[3];
 };
 
-struct microcode {
-	struct microcode_header hdr;
+struct microcode_intel {
+	struct microcode_header_intel hdr;
 	unsigned int            bits[0];
 };
 
@@ -35,5 +35,7 @@ struct ucode_cpu_info {
 	unsigned int sig;
 	unsigned int pf;
 	unsigned int rev;
-	struct microcode *mc;
+	union {
+		struct microcode_intel *mc_intel;
+	} mc;
 };

From 9835fd4ad9ee5fc6b909df72aa3e3dba04415f4b Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:19 +0200
Subject: [PATCH 023/142] x86: add AMD specific declarations

Added AMD specific declarations to header file.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/microcode.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index ef77c6f438bf..4e941721c0d1 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -30,6 +30,35 @@ struct extended_sigtable {
 	struct extended_signature sigs[0];
 };
 
+struct equiv_cpu_entry {
+	unsigned int installed_cpu;
+	unsigned int fixed_errata_mask;
+	unsigned int fixed_errata_compare;
+	unsigned int equiv_cpu;
+};
+
+struct microcode_header_amd {
+	unsigned int  data_code;
+	unsigned int  patch_id;
+	unsigned char mc_patch_data_id[2];
+	unsigned char mc_patch_data_len;
+	unsigned char init_flag;
+	unsigned int  mc_patch_data_checksum;
+	unsigned int  nb_dev_id;
+	unsigned int  sb_dev_id;
+	unsigned char processor_rev_id[2];
+	unsigned char nb_rev_id;
+	unsigned char sb_rev_id;
+	unsigned char bios_api_rev;
+	unsigned char reserved1[3];
+	unsigned int  match_reg[8];
+};
+
+struct microcode_amd {
+	struct microcode_header_amd hdr;
+	unsigned int mpb[0];
+};
+
 struct ucode_cpu_info {
 	int valid;
 	unsigned int sig;
@@ -37,5 +66,6 @@ struct ucode_cpu_info {
 	unsigned int rev;
 	union {
 		struct microcode_intel *mc_intel;
+		struct microcode_amd *mc_amd;
 	} mc;
 };

From 26bf7a48c33906cc3415a4492aa9ead7a75f1353 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:20 +0200
Subject: [PATCH 024/142] x86: first step of refactoring, introducing
 microcode_ops

Refactoring with the goal of having one general module and separate
vendor specific modules that hook into the general one.

Microcode_ops is a function pointer structure in which vendor
specific modules will enter all functions that differ between
vendors and that need to be accessed from the general module.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/microcode.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index 4e941721c0d1..9231c876e374 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -1,3 +1,16 @@
+struct microcode_ops {
+	long (*get_next_ucode)(void **mc, long offset);
+	long (*microcode_get_next_ucode)(void **mc, long offset);
+	int (*get_matching_microcode)(void *mc, int cpu);
+	int (*apply_microcode_check_cpu)(int cpu);
+	int (*microcode_sanity_check)(void *mc);
+	int (*cpu_request_microcode)(int cpu);
+	void (*collect_cpu_info)(int cpu_num);
+	void (*apply_microcode)(int cpu);
+	void (*microcode_fini_cpu)(int cpu);
+	void (*clear_patch)(void *data);
+};
+
 struct microcode_header_intel {
 	unsigned int            hdrver;
 	unsigned int            rev;

From 8d86f390d9bb5b39f0a315838d1616de6363e1b9 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:21 +0200
Subject: [PATCH 025/142] x86: major refactoring

Refactored code by introducing a two-module solution.

There is one general module in which vendor specific modules can hook into.
However, that is exclusive, there is only one vendor specific module
allowed at a time. A CPU vendor check makes sure only the correct
module for the underlying system gets called.

Functinally in terms of patch loading itself there are no changes. This
refactoring provides a basis for future implementations of other vendors'
patch loaders.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                  | 25 ++++++++--
 arch/x86/kernel/Makefile          |  4 +-
 arch/x86/kernel/microcode.c       | 80 ++++++++++++++++++-------------
 arch/x86/kernel/microcode_intel.c | 50 +++++++++++++++----
 include/asm-x86/microcode.h       |  3 ++
 5 files changed, 112 insertions(+), 50 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b6fa2877b173..6b0b885cf47a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -782,7 +782,7 @@ config X86_REBOOTFIXUPS
 	  Say N otherwise.
 
 config MICROCODE
-	tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+	tristate "/dev/cpu/microcode - microcode support"
 	select FW_LOADER
 	---help---
 	  If you say Y here, you will be able to update the microcode on
@@ -791,14 +791,29 @@ config MICROCODE
 	  actual microcode binary data itself which is not shipped with the
 	  Linux kernel.
 
-	  For latest news and information on obtaining all the required
-	  ingredients for this driver, check:
-	  <http://www.urbanmyth.org/microcode/>.
+	  This option selects the general module only, you need to select
+	  at least one vendor specific module as well.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called microcode.
 
-config MICROCODE_OLD_INTERFACE
+config MICROCODE_INTEL
+       tristate "Intel microcode patch loading support"
+       depends on MICROCODE
+       default MICROCODE
+       select FW_LOADER
+       --help---
+         This options enables microcode patch loading support for Intel
+         processors.
+
+         For latest news and information on obtaining all the required
+         Intel ingredients for this driver, check:
+         <http://www.urbanmyth.org/microcode/>.
+
+         This driver is only available as a module: the module
+         will be called microcode_intel.  
+
+   config MICROCODE_OLD_INTERFACE
 	def_bool y
 	depends on MICROCODE
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index abb32aed7f4b..f2f9f6da8c18 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -51,8 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
 obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
-obj-$(CONFIG_MICROCODE)		+= ucode.o
-ucode-objs                      := microcode.o microcode_intel.o
+obj-$(CONFIG_MICROCODE)		+= microcode.o
+obj-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index c1047d7f7ede..1e42e79ca694 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -99,25 +99,22 @@ MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
 MODULE_LICENSE("GPL");
 
-#define MICROCODE_VERSION 	"1.14a"
+#define MICROCODE_VERSION 	"2.00"
+
+struct microcode_ops *microcode_ops;
 
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-DEFINE_MUTEX(microcode_mutex);
+static DEFINE_MUTEX(microcode_mutex);
+EXPORT_SYMBOL_GPL(microcode_mutex);
 
-struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
-
-extern long get_next_ucode(void **mc, long offset);
-extern int microcode_sanity_check(void *mc);
-extern int get_matching_microcode(void *mc, int cpu);
-extern void collect_cpu_info(int cpu_num);
-extern int cpu_request_microcode(int cpu);
-extern void microcode_fini_cpu(int cpu);
-extern void apply_microcode(int cpu);
-extern int apply_microcode_check_cpu(int cpu);
+static struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
-void __user *user_buffer;	/* user area microcode data buffer */
-unsigned int user_buffer_size;	/* it's size */
+static void __user *user_buffer;	/* user area microcode data buffer */
+EXPORT_SYMBOL_GPL(user_buffer);
+static unsigned int user_buffer_size;	/* it's size */
+EXPORT_SYMBOL_GPL(user_buffer_size);
 
 static int do_microcode_update (void)
 {
@@ -130,8 +127,8 @@ static int do_microcode_update (void)
 
 	old = current->cpus_allowed;
 
-	while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
-		error = microcode_sanity_check(new_mc);
+	while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) {
+		error = microcode_ops->microcode_sanity_check(new_mc);
 		if (error)
 			goto out;
 		/*
@@ -145,11 +142,12 @@ static int do_microcode_update (void)
 				continue;
 			cpumask_of_cpu_ptr_next(newmask, cpu);
 			set_cpus_allowed_ptr(current, newmask);
-			error = get_maching_microcode(new_mc, cpu);
+			error = microcode_ops->get_matching_microcode(new_mc,
+								      cpu);
 			if (error < 0)
 				goto out;
 			if (error == 1)
-				apply_microcode(cpu);
+				microcode_ops->apply_microcode(cpu);
 		}
 		vfree(new_mc);
 	}
@@ -232,7 +230,8 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 #endif
 
 /* fake device for request_firmware */
-struct platform_device *microcode_pdev;
+static struct platform_device *microcode_pdev;
+EXPORT_SYMBOL_GPL(microcode_pdev);
 
 static void microcode_init_cpu(int cpu, int resume)
 {
@@ -244,9 +243,9 @@ static void microcode_init_cpu(int cpu, int resume)
 
 	set_cpus_allowed_ptr(current, newmask);
 	mutex_lock(&microcode_mutex);
-	collect_cpu_info(cpu);
+	microcode_ops->collect_cpu_info(cpu);
 	if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
-		cpu_request_microcode(cpu);
+		microcode_ops->cpu_request_microcode(cpu);
 	mutex_unlock(&microcode_mutex);
 	set_cpus_allowed_ptr(current, &old);
 }
@@ -274,7 +273,7 @@ static ssize_t reload_store(struct sys_device *dev,
 
 		mutex_lock(&microcode_mutex);
 		if (uci->valid)
-			err = cpu_request_microcode(cpu);
+			err = microcode_ops->cpu_request_microcode(cpu);
 		mutex_unlock(&microcode_mutex);
 		put_online_cpus();
 		set_cpus_allowed_ptr(current, &old);
@@ -349,7 +348,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 		return 0;
 
 	pr_debug("microcode: CPU%d removed\n", cpu);
-	microcode_fini_cpu(cpu);
+	microcode_ops->microcode_fini_cpu(cpu);
 	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
 	return 0;
 }
@@ -362,7 +361,7 @@ static int mc_sysdev_resume(struct sys_device *dev)
 		return 0;
 	pr_debug("microcode: CPU%d resumed\n", cpu);
 	/* only CPU 0 will apply ucode here */
-	apply_microcode(0);
+	microcode_ops->apply_microcode(0);
 	return 0;
 }
 
@@ -382,7 +381,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 	switch (action) {
 	case CPU_UP_CANCELED_FROZEN:
 		/* The CPU refused to come up during a system resume */
-		microcode_fini_cpu(cpu);
+		microcode_ops->microcode_fini_cpu(cpu);
 		break;
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
@@ -390,9 +389,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 		break;
 	case CPU_ONLINE_FROZEN:
 		/* System-wide resume is in progress, try to apply microcode */
-		if (apply_microcode_check_cpu(cpu)) {
+		if (microcode_ops->apply_microcode_check_cpu(cpu)) {
 			/* The application of microcode failed */
-			microcode_fini_cpu(cpu);
+			microcode_ops->microcode_fini_cpu(cpu);
 			__mc_sysdev_add(sys_dev, 1);
 			break;
 		}
@@ -416,12 +415,17 @@ static struct notifier_block __refdata mc_cpu_notifier = {
 	.notifier_call = mc_cpu_callback,
 };
 
-static int __init microcode_init (void)
+static int microcode_init(void *opaque, struct module *module)
 {
+	struct microcode_ops *ops = (struct microcode_ops *)opaque;
 	int error;
 
-	printk(KERN_INFO
-		"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
+	if (microcode_ops) {
+		printk(KERN_ERR "microcode: already loaded the other module\n");
+		return -EEXIST;
+	}
+
+	microcode_ops = ops;
 
 	error = microcode_dev_init();
 	if (error)
@@ -443,8 +447,15 @@ static int __init microcode_init (void)
 	}
 
 	register_hotcpu_notifier(&mc_cpu_notifier);
+
+	printk(KERN_INFO
+	       "Microcode Update Driver: v" MICROCODE_VERSION
+	       " <tigran@aivazian.fsnet.co.uk>"
+	       " <peter.oruba@amd.com>\n");
+
 	return 0;
 }
+EXPORT_SYMBOL_GPL(microcode_init);
 
 static void __exit microcode_exit (void)
 {
@@ -457,7 +468,10 @@ static void __exit microcode_exit (void)
 	put_online_cpus();
 
 	platform_device_unregister(microcode_pdev);
-}
 
-module_init(microcode_init)
-module_exit(microcode_exit)
+	microcode_ops = NULL;
+
+	printk(KERN_INFO
+	       "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+}
+EXPORT_SYMBOL_GPL(microcode_exit);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index ca9861bf067e..831db5363dba 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -127,7 +127,7 @@ extern struct mutex microcode_mutex;
 
 extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
 
-void collect_cpu_info(int cpu_num)
+static void collect_cpu_info(int cpu_num)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
@@ -175,7 +175,7 @@ static inline int microcode_update_match(int cpu_num,
 	return 1;
 }
 
-int microcode_sanity_check(void *mc)
+static int microcode_sanity_check(void *mc)
 {
 	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header = NULL;
@@ -259,7 +259,7 @@ int microcode_sanity_check(void *mc)
  * return 1 - found update
  * return < 0 - error
  */
-int get_matching_microcode(void *mc, int cpu)
+static int get_matching_microcode(void *mc, int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct microcode_header_intel *mc_header = mc;
@@ -288,7 +288,8 @@ int get_matching_microcode(void *mc, int cpu)
 	return 0;
 find:
 	pr_debug("microcode: CPU%d found a matching microcode update with"
-		 " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev);
+		 " version 0x%x (current=0x%x)\n",
+		 cpu, mc_header->rev, uci->rev);
 	new_mc = vmalloc(total_size);
 	if (!new_mc) {
 		printk(KERN_ERR "microcode: error! Can not allocate memory\n");
@@ -303,7 +304,7 @@ int get_matching_microcode(void *mc, int cpu)
 	return 1;
 }
 
-void apply_microcode(int cpu)
+static void apply_microcode(int cpu)
 {
 	unsigned long flags;
 	unsigned int val[2];
@@ -347,7 +348,7 @@ void apply_microcode(int cpu)
 extern void __user *user_buffer;        /* user area microcode data buffer */
 extern unsigned int user_buffer_size;   /* it's size */
 
-long get_next_ucode(void **mc, long offset)
+static long get_next_ucode(void **mc, long offset)
 {
 	struct microcode_header_intel mc_header;
 	unsigned long total_size;
@@ -406,7 +407,7 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
 /* fake device for request_firmware */
 extern struct platform_device *microcode_pdev;
 
-int cpu_request_microcode(int cpu)
+static int cpu_request_microcode(int cpu)
 {
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -455,7 +456,7 @@ int cpu_request_microcode(int cpu)
 	return error;
 }
 
-int apply_microcode_check_cpu(int cpu)
+static int apply_microcode_check_cpu(int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -504,13 +505,42 @@ int apply_microcode_check_cpu(int cpu)
 	return err;
 }
 
-void microcode_fini_cpu(int cpu)
+static void microcode_fini_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	mutex_lock(&microcode_mutex);
 	uci->valid = 0;
-	kfree(uci->mc.mc_intel);
+	vfree(uci->mc.mc_intel);
 	uci->mc.mc_intel = NULL;
 	mutex_unlock(&microcode_mutex);
 }
+
+static struct microcode_ops microcode_intel_ops = {
+	.get_next_ucode                   = get_next_ucode,
+	.get_matching_microcode           = get_matching_microcode,
+	.microcode_sanity_check           = microcode_sanity_check,
+	.apply_microcode_check_cpu        = apply_microcode_check_cpu,
+	.cpu_request_microcode            = cpu_request_microcode,
+	.collect_cpu_info                 = collect_cpu_info,
+	.apply_microcode                  = apply_microcode,
+	.microcode_fini_cpu               = microcode_fini_cpu,
+};
+
+static int __init microcode_intel_module_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(get_cpu());
+
+	if (c->x86_vendor == X86_VENDOR_INTEL)
+		return microcode_init(&microcode_intel_ops, THIS_MODULE);
+	else
+		return -ENODEV;
+}
+
+static void __exit microcode_intel_module_exit(void)
+{
+	microcode_exit();
+}
+
+module_init(microcode_intel_module_init)
+module_exit(microcode_intel_module_exit)
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index 9231c876e374..18b2aeec2adf 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -1,3 +1,6 @@
+extern int microcode_init(void *opaque, struct module *module);
+extern void microcode_exit(void);
+
 struct microcode_ops {
 	long (*get_next_ucode)(void **mc, long offset);
 	long (*microcode_get_next_ucode)(void **mc, long offset);

From 80cc9f1020f49c9e5b50898c102fd444de70a0a3 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Mon, 28 Jul 2008 18:44:22 +0200
Subject: [PATCH 026/142] x86: AMD microcode patch loading support

This patch introduces microcode patch loading for AMD
processors. It is based on previous corresponding work
for Intel processors.

It hooks into the general patch loading module. Main
difference is that a container file format is used to hold
all patch data for multiple processors as well as an
equivalent CPU table, which comes seperately, as opposed
to Intel's microcode patching solution.

Kconfig and Makefile have been changed provice config
and build option for new source file.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                |  21 +-
 arch/x86/kernel/Makefile        |   1 +
 arch/x86/kernel/microcode_amd.c | 521 ++++++++++++++++++++++++++++++++
 3 files changed, 539 insertions(+), 4 deletions(-)
 create mode 100644 arch/x86/kernel/microcode_amd.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6b0b885cf47a..7bb461758d31 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -786,10 +786,12 @@ config MICROCODE
 	select FW_LOADER
 	---help---
 	  If you say Y here, you will be able to update the microcode on
-	  Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II,
-	  Pentium III, Pentium 4, Xeon etc.  You will obviously need the
-	  actual microcode binary data itself which is not shipped with the
-	  Linux kernel.
+	  certain Intel and AMD processors. The Intel support is for the
+	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III,
+	  Pentium 4, Xeon etc. The AMD support is for family 0x10 and
+	  0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra.
+	  You will obviously need the actual microcode binary data itself
+	  which is not shipped with the Linux kernel.
 
 	  This option selects the general module only, you need to select
 	  at least one vendor specific module as well.
@@ -813,6 +815,17 @@ config MICROCODE_INTEL
          This driver is only available as a module: the module
          will be called microcode_intel.  
 
+config MICROCODE_AMD
+       tristate "AMD microcode patch loading support"
+       depends on MICROCODE
+       select FW_LOADER
+       --help---
+         If you select this option, microcode patch loading support for AMD
+	 processors will be enabled.
+
+	 This driver is only available as a module: the module
+	 will be called microcode_amd.
+
    config MICROCODE_OLD_INTERFACE
 	def_bool y
 	depends on MICROCODE
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f2f9f6da8c18..be454f348c3b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_MICROCODE)		+= microcode.o
 obj-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
+obj-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
new file mode 100644
index 000000000000..db199e3fdd1f
--- /dev/null
+++ b/arch/x86/kernel/microcode_amd.c
@@ -0,0 +1,521 @@
+/*
+ *  AMD CPU Microcode Update Driver for Linux
+ *  Copyright (C) 2008 Advanced Micro Devices Inc.
+ *
+ *  Author: Peter Oruba <peter.oruba@amd.com>
+ *
+ *  Based on work by:
+ *  Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *
+ *  This driver allows to upgrade microcode on AMD
+ *  family 0x10 and 0x11 processors.
+ *
+ *  Licensed unter the terms of the GNU General Public
+ *  License version 2. See file COPYING for details.
+*/
+
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/microcode.h>
+
+MODULE_DESCRIPTION("AMD Microcode Update Driver");
+MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>");
+MODULE_LICENSE("GPLv2");
+
+#define UCODE_MAGIC                0x00414d44
+#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
+#define UCODE_UCODE_TYPE           0x00000001
+
+#define UCODE_MAX_SIZE          (2048)
+#define DEFAULT_UCODE_DATASIZE	(896)	  /* 896 bytes */
+#define MC_HEADER_SIZE		(sizeof(struct microcode_header_amd))	  /* 64 bytes */
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */
+#define DWSIZE			(sizeof(u32))
+/* For now we support a fixed ucode total size only */
+#define get_totalsize(mc) \
+	((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
+	 + MC_HEADER_SIZE)
+
+extern int microcode_init(void *opaque, struct module *module);
+extern void microcode_exit(void);
+
+/* serialize access to the physical write */
+static DEFINE_SPINLOCK(microcode_update_lock);
+
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+extern struct mutex (microcode_mutex);
+
+struct equiv_cpu_entry *equiv_cpu_table;
+
+extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+
+static void collect_cpu_info_amd(int cpu)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(raw_smp_processor_id() != cpu);
+	uci->rev = 0;
+	uci->pf = 0;
+	uci->mc.mc_amd = NULL;
+	uci->valid = 1;
+
+	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
+		printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
+		       cpu);
+		uci->valid = 0;
+		return;
+	}
+
+	asm volatile("movl %1, %%ecx; rdmsr"
+		     : "=a" (uci->rev)
+		     : "i" (0x0000008B) : "ecx");
+
+	printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
+	       uci->rev);
+}
+
+static int get_matching_microcode_amd(void *mc, int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct microcode_header_amd *mc_header = mc;
+	unsigned long total_size = get_totalsize(mc_header);
+	void *new_mc;
+	struct pci_dev *nb_pci_dev, *sb_pci_dev;
+	unsigned int current_cpu_id;
+	unsigned int equiv_cpu_id = 0x00;
+	unsigned int i = 0;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu != raw_smp_processor_id());
+
+	/* This is a tricky part. We might be called from a write operation */
+	/* to the device file instead of the usual process of firmware */
+	/* loading. This routine needs to be able to distinguish both */
+/* cases. This is done by checking if there alread is a equivalent */
+	/* CPU table installed. If not, we're written through */
+	/* /dev/cpu/microcode. */
+/* Since we ignore all checks. The error case in which going through */
+/* firmware loading and that table is not loaded has already been */
+	/* checked earlier. */
+	if (equiv_cpu_table == NULL) {
+		printk(KERN_INFO "microcode: CPU%d microcode update with "
+		       "version 0x%x (current=0x%x)\n",
+		       cpu, mc_header->patch_id, uci->rev);
+		goto out;
+	}
+
+	current_cpu_id = cpuid_eax(0x00000001);
+
+	while (equiv_cpu_table[i].installed_cpu != 0) {
+		if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
+			equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
+			break;
+		}
+		i++;
+	}
+
+	if (!equiv_cpu_id) {
+		printk(KERN_ERR "microcode: CPU%d cpu_id "
+		       "not found in equivalent cpu table \n", cpu);
+		return 0;
+	}
+
+	if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) {
+		printk(KERN_ERR
+			"microcode: CPU%d patch does not match "
+			"(patch is %x, cpu extended is %x) \n",
+			cpu, mc_header->processor_rev_id[0],
+			(equiv_cpu_id & 0xff));
+		return 0;
+	}
+
+	if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) {
+		printk(KERN_ERR "microcode: CPU%d patch does not match "
+			"(patch is %x, cpu base id is %x) \n",
+			cpu, mc_header->processor_rev_id[1],
+			((equiv_cpu_id >> 16) & 0xff));
+
+		return 0;
+	}
+
+	/* ucode may be northbridge specific */
+	if (mc_header->nb_dev_id) {
+		nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
+					    (mc_header->nb_dev_id & 0xff),
+					    NULL);
+		if ((!nb_pci_dev) ||
+		    (mc_header->nb_rev_id != nb_pci_dev->revision)) {
+			printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
+			pci_dev_put(nb_pci_dev);
+			return 0;
+		}
+		pci_dev_put(nb_pci_dev);
+	}
+
+	/* ucode may be southbridge specific */
+	if (mc_header->sb_dev_id) {
+		sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
+					    (mc_header->sb_dev_id & 0xff),
+					    NULL);
+		if ((!sb_pci_dev) ||
+		    (mc_header->sb_rev_id != sb_pci_dev->revision)) {
+			printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
+			pci_dev_put(sb_pci_dev);
+			return 0;
+		}
+		pci_dev_put(sb_pci_dev);
+	}
+
+	if (mc_header->patch_id <= uci->rev)
+		return 0;
+
+	printk(KERN_INFO "microcode: CPU%d found a matching microcode "
+	       "update with version 0x%x (current=0x%x)\n",
+	       cpu, mc_header->patch_id, uci->rev);
+
+out:
+	new_mc = vmalloc(UCODE_MAX_SIZE);
+	if (!new_mc) {
+		printk(KERN_ERR "microcode: error, can't allocate memory\n");
+		return -ENOMEM;
+	}
+	memset(new_mc, 0, UCODE_MAX_SIZE);
+
+	/* free previous update file */
+	vfree(uci->mc.mc_amd);
+
+	memcpy(new_mc, mc, total_size);
+
+	uci->mc.mc_amd = new_mc;
+	return 1;
+}
+
+static void apply_microcode_amd(int cpu)
+{
+	unsigned long flags;
+	unsigned int eax, edx;
+	unsigned int rev;
+	int cpu_num = raw_smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu_num != cpu);
+
+	if (uci->mc.mc_amd == NULL)
+		return;
+
+	spin_lock_irqsave(&microcode_update_lock, flags);
+
+	edx = (unsigned int)(((unsigned long)
+			      &(uci->mc.mc_amd->hdr.data_code)) >> 32);
+	eax = (unsigned int)(((unsigned long)
+			      &(uci->mc.mc_amd->hdr.data_code)) & 0xffffffffL);
+
+	asm volatile("movl %0, %%ecx; wrmsr" :
+		     : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
+
+	/* get patch id after patching */
+	asm volatile("movl %1, %%ecx; rdmsr"
+		     : "=a" (rev)
+		     : "i" (0x0000008B) : "ecx");
+
+	spin_unlock_irqrestore(&microcode_update_lock, flags);
+
+	/* check current patch id and patch's id for match */
+	if (rev != uci->mc.mc_amd->hdr.patch_id) {
+		printk(KERN_ERR "microcode: CPU%d update from revision "
+		       "0x%x to 0x%x failed\n", cpu_num,
+		       uci->mc.mc_amd->hdr.patch_id, rev);
+		return;
+	}
+
+	printk(KERN_INFO "microcode: CPU%d updated from revision "
+	       "0x%x to 0x%x \n",
+	       cpu_num, uci->rev, uci->mc.mc_amd->hdr.patch_id);
+
+	uci->rev = rev;
+}
+
+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+extern void __user *user_buffer;        /* user area microcode data buffer */
+extern unsigned int user_buffer_size;   /* it's size */
+
+static long get_next_ucode_amd(void **mc, long offset)
+{
+	struct microcode_header_amd mc_header;
+	unsigned long total_size;
+
+	/* No more data */
+	if (offset >= user_buffer_size)
+		return 0;
+	if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
+		printk(KERN_ERR "microcode: error! Can not read user data\n");
+		return -EFAULT;
+	}
+	total_size = get_totalsize(&mc_header);
+	if (offset + total_size > user_buffer_size) {
+		printk(KERN_ERR "microcode: error! Bad total size in microcode "
+		       "data file\n");
+		return -EINVAL;
+	}
+	*mc = vmalloc(UCODE_MAX_SIZE);
+	if (!*mc)
+		return -ENOMEM;
+	memset(*mc, 0, UCODE_MAX_SIZE);
+
+	if (copy_from_user(*mc, user_buffer + offset, total_size)) {
+		printk(KERN_ERR "microcode: error! Can not read user data\n");
+		vfree(*mc);
+		return -EFAULT;
+	}
+	return offset + total_size;
+}
+#else
+#define get_next_ucode_amd() NULL
+#endif
+
+static long get_next_ucode_from_buffer_amd(void **mc, void *buf,
+				       unsigned long size, long offset)
+{
+	struct microcode_header_amd *mc_header;
+	unsigned long total_size;
+	unsigned char *buf_pos = buf;
+
+	/* No more data */
+	if (offset >= size)
+		return 0;
+
+	if (buf_pos[offset] != UCODE_UCODE_TYPE) {
+		printk(KERN_ERR "microcode: error! "
+		       "Wrong microcode payload type field\n");
+		return -EINVAL;
+	}
+
+	mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]);
+
+	total_size = (unsigned long) (buf_pos[offset+4] +
+				      (buf_pos[offset+5] << 8));
+
+	printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
+		size, total_size, offset);
+
+	if (offset + total_size > size) {
+		printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+		return -EINVAL;
+	}
+
+	*mc = vmalloc(UCODE_MAX_SIZE);
+	if (!*mc) {
+		printk(KERN_ERR "microcode: error! "
+		       "Can not allocate memory for microcode patch\n");
+		return -ENOMEM;
+	}
+
+	memset(*mc, 0, UCODE_MAX_SIZE);
+	memcpy(*mc, buf + offset + 8, total_size);
+
+	return offset + total_size + 8;
+}
+
+static long install_equiv_cpu_table(void *buf, unsigned long size, long offset)
+{
+	unsigned int *buf_pos = buf;
+
+	/* No more data */
+	if (offset >= size)
+		return 0;
+
+	if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE) {
+		printk(KERN_ERR "microcode: error! "
+		       "Wrong microcode equivalnet cpu table type field\n");
+		return 0;
+	}
+
+	if (size == 0) {
+		printk(KERN_ERR "microcode: error! "
+		       "Wrong microcode equivalnet cpu table length\n");
+		return 0;
+	}
+
+	equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
+	if (!equiv_cpu_table) {
+		printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n");
+		return 0;
+	}
+
+	memset(equiv_cpu_table, 0, size);
+	memcpy(equiv_cpu_table, &buf_pos[3], size);
+
+	return size + 12; /* add header length */
+}
+
+/* fake device for request_firmware */
+extern struct platform_device *microcode_pdev;
+
+static int cpu_request_microcode_amd(int cpu)
+{
+	char name[30];
+	const struct firmware *firmware;
+	void *buf;
+	unsigned int *buf_pos;
+	unsigned long size;
+	long offset = 0;
+	int error;
+	void *mc;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu != raw_smp_processor_id());
+
+	sprintf(name, "amd-ucode/microcode_amd.bin");
+	error = request_firmware(&firmware, "amd-ucode/microcode_amd.bin",
+				 &microcode_pdev->dev);
+	if (error) {
+		printk(KERN_ERR "microcode: ucode data file %s load failed\n",
+		       name);
+		return error;
+	}
+
+	buf_pos = buf = firmware->data;
+	size = firmware->size;
+
+	if (buf_pos[0] != UCODE_MAGIC) {
+		printk(KERN_ERR "microcode: error! Wrong microcode patch file magic\n");
+		return -EINVAL;
+	}
+
+	offset = install_equiv_cpu_table(buf, buf_pos[2], offset);
+
+	if (!offset) {
+		printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
+		return -EINVAL;
+	}
+
+	while ((offset =
+		get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0) {
+		error = get_matching_microcode_amd(mc, cpu);
+		if (error < 0)
+			break;
+		/*
+		 * It's possible the data file has multiple matching ucode,
+		 * lets keep searching till the latest version
+		 */
+		if (error == 1) {
+			apply_microcode_amd(cpu);
+			error = 0;
+		}
+		vfree(mc);
+	}
+	if (offset > 0) {
+		vfree(mc);
+		vfree(equiv_cpu_table);
+		equiv_cpu_table = NULL;
+	}
+	if (offset < 0)
+		error = offset;
+	release_firmware(firmware);
+
+	return error;
+}
+
+static int apply_microcode_check_cpu_amd(int cpu)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	unsigned int rev;
+	cpumask_t old;
+	cpumask_of_cpu_ptr(newmask, cpu);
+	int err = 0;
+
+	/* Check if the microcode is available */
+	if (!uci->mc.mc_amd)
+		return 0;
+
+	old = current->cpus_allowed;
+	set_cpus_allowed(current, newmask);
+
+	/* Check if the microcode we have in memory matches the CPU */
+	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 16)
+		err = -EINVAL;
+
+	if (!err) {
+		asm volatile("movl %1, %%ecx; rdmsr"
+		     : "=a" (rev)
+		     : "i" (0x0000008B) : "ecx");
+
+		if (uci->rev != rev)
+			err = -EINVAL;
+	}
+
+	if (!err)
+		apply_microcode_amd(cpu);
+	else
+		printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
+		       " rev=0x%x\n",
+		       cpu,  uci->rev);
+
+	set_cpus_allowed(current, old);
+	return err;
+}
+
+static void microcode_fini_cpu_amd(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	mutex_lock(&microcode_mutex);
+	uci->valid = 0;
+	vfree(uci->mc.mc_amd);
+	uci->mc.mc_amd = NULL;
+	mutex_unlock(&microcode_mutex);
+}
+
+static struct microcode_ops microcode_amd_ops = {
+	.get_next_ucode                   = get_next_ucode_amd,
+	.get_matching_microcode           = get_matching_microcode_amd,
+	.microcode_sanity_check           = NULL,
+	.apply_microcode_check_cpu        = apply_microcode_check_cpu_amd,
+	.cpu_request_microcode            = cpu_request_microcode_amd,
+	.collect_cpu_info                 = collect_cpu_info_amd,
+	.apply_microcode                  = apply_microcode_amd,
+	.microcode_fini_cpu               = microcode_fini_cpu_amd,
+};
+
+static int __init microcode_amd_module_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(get_cpu());
+
+	equiv_cpu_table = NULL;
+	if (c->x86_vendor == X86_VENDOR_AMD)
+		return microcode_init(&microcode_amd_ops, THIS_MODULE);
+	else
+		return -ENODEV;
+}
+
+static void __exit microcode_amd_module_exit(void)
+{
+	microcode_exit();
+}
+
+module_init(microcode_amd_module_init)
+module_exit(microcode_amd_module_exit)

From 45b1e23eca1c53fa79a611a2bc8c93697ede6c97 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 29 Jul 2008 09:42:17 +0200
Subject: [PATCH 027/142] x86, microcode support: fix build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

  arch/x86/kernel/microcode.c:412: error: static declaration of ‘microcode_init’ follows non-static declaration
  include/asm/microcode.h:1: error: previous declaration of ‘microcode_init’ was here
  arch/x86/kernel/microcode.c:454: error: static declaration of ‘microcode_exit’ follows non-static declaration
  include/asm/microcode.h:2: error: previous declaration of ‘microcode_exit’ was here

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 1e42e79ca694..9a881845b35b 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -415,7 +415,7 @@ static struct notifier_block __refdata mc_cpu_notifier = {
 	.notifier_call = mc_cpu_callback,
 };
 
-static int microcode_init(void *opaque, struct module *module)
+int microcode_init(void *opaque, struct module *module)
 {
 	struct microcode_ops *ops = (struct microcode_ops *)opaque;
 	int error;
@@ -457,7 +457,7 @@ static int microcode_init(void *opaque, struct module *module)
 }
 EXPORT_SYMBOL_GPL(microcode_init);
 
-static void __exit microcode_exit (void)
+void __exit microcode_exit (void)
 {
 	microcode_dev_exit();
 

From 224e946b81166d732f3e9b414cb69612072fb500 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 29 Jul 2008 09:52:55 +0200
Subject: [PATCH 028/142] x86, microcode: fix symbol exports

fix tons of build errors:

 arch/x86/kernel/built-in.o: In function `microcode_fini_cpu':
 microcode_intel.c:(.text+0x11598): undefined reference to `microcode_mutex'
 microcode_intel.c:(.text+0x115a4): undefined reference to `ucode_cpu_info'
 microcode_intel.c:(.text+0x115ae): undefined reference to `ucode_cpu_info'
 microcode_intel.c:(.text+0x115bc): undefined reference to `microcode_mutex'
 [...]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 9a881845b35b..758b958a6637 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -104,16 +104,16 @@ MODULE_LICENSE("GPL");
 struct microcode_ops *microcode_ops;
 
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-static DEFINE_MUTEX(microcode_mutex);
+DEFINE_MUTEX(microcode_mutex);
 EXPORT_SYMBOL_GPL(microcode_mutex);
 
-static struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
-static void __user *user_buffer;	/* user area microcode data buffer */
+void __user *user_buffer;		/* user area microcode data buffer */
 EXPORT_SYMBOL_GPL(user_buffer);
-static unsigned int user_buffer_size;	/* it's size */
+unsigned int user_buffer_size;		/* it's size */
 EXPORT_SYMBOL_GPL(user_buffer_size);
 
 static int do_microcode_update (void)
@@ -230,7 +230,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 #endif
 
 /* fake device for request_firmware */
-static struct platform_device *microcode_pdev;
+struct platform_device *microcode_pdev;
 EXPORT_SYMBOL_GPL(microcode_pdev);
 
 static void microcode_init_cpu(int cpu, int resume)

From 5d7b605245b1aa1a9cd6549b1f57d69273eb0c37 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 29 Jul 2008 10:07:36 +0200
Subject: [PATCH 029/142] x86, microcode: fix module license string

fix:

 FATAL: modpost: GPL-incompatible module microcode_amd.ko uses GPL-only symbol 'set_cpus_allowed_ptr'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index db199e3fdd1f..fd9e68e88899 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -40,7 +40,7 @@
 
 MODULE_DESCRIPTION("AMD Microcode Update Driver");
 MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>");
-MODULE_LICENSE("GPLv2");
+MODULE_LICENSE("GPL v2");
 
 #define UCODE_MAGIC                0x00414d44
 #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000

From 0d1edf46ba229b46efacf75c0544b88c05a7b266 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 28 Jul 2008 11:53:57 -0700
Subject: [PATCH 030/142] xen: compile irq functions without -pg for ftrace

For some reason I managed to miss a bunch of irq-related functions
which also need to be compiled without -pg when using ftrace.  This
patch moves them into their own file, and starts a cleanup process
I've been meaning to do anyway.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: "Alex Nixon (Intern)" <Alex.Nixon@eu.citrix.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/Makefile     |   3 +-
 arch/x86/xen/enlighten.c  | 122 +-------------------------------
 arch/x86/xen/irq.c        | 143 ++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-asm_32.S |   2 +-
 arch/x86/xen/xen-asm_64.S |   2 +-
 arch/x86/xen/xen-ops.h    |   1 +
 drivers/xen/events.c      |  11 ---
 7 files changed, 150 insertions(+), 134 deletions(-)
 create mode 100644 arch/x86/xen/irq.c

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 5bfee243cf9a..9ee745fa5527 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -2,9 +2,10 @@ ifdef CONFIG_FTRACE
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_spinlock.o = -pg
 CFLAGS_REMOVE_time.o = -pg
+CFLAGS_REMOVE_irq.o = -pg
 endif
 
-obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
+obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm_$(BITS).o grant-table.o suspend.o
 
 obj-$(CONFIG_SMP)	+= smp.o spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b795470ec069..cf8b3a93122b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -30,7 +30,6 @@
 #include <xen/interface/xen.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/vcpu.h>
-#include <xen/interface/sched.h>
 #include <xen/features.h>
 #include <xen/page.h>
 #include <xen/hvc-console.h>
@@ -226,94 +225,6 @@ static unsigned long xen_get_debugreg(int reg)
 	return HYPERVISOR_get_debugreg(reg);
 }
 
-static unsigned long xen_save_fl(void)
-{
-	struct vcpu_info *vcpu;
-	unsigned long flags;
-
-	vcpu = x86_read_percpu(xen_vcpu);
-
-	/* flag has opposite sense of mask */
-	flags = !vcpu->evtchn_upcall_mask;
-
-	/* convert to IF type flag
-	   -0 -> 0x00000000
-	   -1 -> 0xffffffff
-	*/
-	return (-flags) & X86_EFLAGS_IF;
-}
-
-static void xen_restore_fl(unsigned long flags)
-{
-	struct vcpu_info *vcpu;
-
-	/* convert from IF type flag */
-	flags = !(flags & X86_EFLAGS_IF);
-
-	/* There's a one instruction preempt window here.  We need to
-	   make sure we're don't switch CPUs between getting the vcpu
-	   pointer and updating the mask. */
-	preempt_disable();
-	vcpu = x86_read_percpu(xen_vcpu);
-	vcpu->evtchn_upcall_mask = flags;
-	preempt_enable_no_resched();
-
-	/* Doesn't matter if we get preempted here, because any
-	   pending event will get dealt with anyway. */
-
-	if (flags == 0) {
-		preempt_check_resched();
-		barrier(); /* unmask then check (avoid races) */
-		if (unlikely(vcpu->evtchn_upcall_pending))
-			force_evtchn_callback();
-	}
-}
-
-static void xen_irq_disable(void)
-{
-	/* There's a one instruction preempt window here.  We need to
-	   make sure we're don't switch CPUs between getting the vcpu
-	   pointer and updating the mask. */
-	preempt_disable();
-	x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
-	preempt_enable_no_resched();
-}
-
-static void xen_irq_enable(void)
-{
-	struct vcpu_info *vcpu;
-
-	/* We don't need to worry about being preempted here, since
-	   either a) interrupts are disabled, so no preemption, or b)
-	   the caller is confused and is trying to re-enable interrupts
-	   on an indeterminate processor. */
-
-	vcpu = x86_read_percpu(xen_vcpu);
-	vcpu->evtchn_upcall_mask = 0;
-
-	/* Doesn't matter if we get preempted here, because any
-	   pending event will get dealt with anyway. */
-
-	barrier(); /* unmask then check (avoid races) */
-	if (unlikely(vcpu->evtchn_upcall_pending))
-		force_evtchn_callback();
-}
-
-static void xen_safe_halt(void)
-{
-	/* Blocking includes an implicit local_irq_enable(). */
-	if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
-		BUG();
-}
-
-static void xen_halt(void)
-{
-	if (irqs_disabled())
-		HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
-	else
-		xen_safe_halt();
-}
-
 static void xen_leave_lazy(void)
 {
 	paravirt_leave_lazy(paravirt_get_lazy_mode());
@@ -1308,36 +1219,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	},
 };
 
-static void __init __xen_init_IRQ(void)
-{
-#ifdef CONFIG_X86_64
-	int i;
-
-	/* Create identity vector->irq map */
-	for(i = 0; i < NR_VECTORS; i++) {
-		int cpu;
-
-		for_each_possible_cpu(cpu)
-			per_cpu(vector_irq, cpu)[i] = i;
-	}
-#endif	/* CONFIG_X86_64 */
-
-	xen_init_IRQ();
-}
-
-static const struct pv_irq_ops xen_irq_ops __initdata = {
-	.init_IRQ = __xen_init_IRQ,
-	.save_fl = xen_save_fl,
-	.restore_fl = xen_restore_fl,
-	.irq_disable = xen_irq_disable,
-	.irq_enable = xen_irq_enable,
-	.safe_halt = xen_safe_halt,
-	.halt = xen_halt,
-#ifdef CONFIG_X86_64
-	.adjust_exception_frame = xen_adjust_exception_frame,
-#endif
-};
-
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.apic_write = xen_apic_write,
@@ -1740,10 +1621,11 @@ asmlinkage void __init xen_start_kernel(void)
 	pv_init_ops = xen_init_ops;
 	pv_time_ops = xen_time_ops;
 	pv_cpu_ops = xen_cpu_ops;
-	pv_irq_ops = xen_irq_ops;
 	pv_apic_ops = xen_apic_ops;
 	pv_mmu_ops = xen_mmu_ops;
 
+	xen_init_irq_ops();
+
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
 		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
 		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
new file mode 100644
index 000000000000..28b85ab8422e
--- /dev/null
+++ b/arch/x86/xen/irq.c
@@ -0,0 +1,143 @@
+#include <linux/hardirq.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/sched.h>
+#include <xen/interface/vcpu.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include "xen-ops.h"
+
+/*
+ * Force a proper event-channel callback from Xen after clearing the
+ * callback mask. We do this in a very simple manner, by making a call
+ * down into Xen. The pending flag will be checked by Xen on return.
+ */
+void xen_force_evtchn_callback(void)
+{
+	(void)HYPERVISOR_xen_version(0, NULL);
+}
+
+static void __init __xen_init_IRQ(void)
+{
+#ifdef CONFIG_X86_64
+	int i;
+
+	/* Create identity vector->irq map */
+	for(i = 0; i < NR_VECTORS; i++) {
+		int cpu;
+
+		for_each_possible_cpu(cpu)
+			per_cpu(vector_irq, cpu)[i] = i;
+	}
+#endif	/* CONFIG_X86_64 */
+
+	xen_init_IRQ();
+}
+
+static unsigned long xen_save_fl(void)
+{
+	struct vcpu_info *vcpu;
+	unsigned long flags;
+
+	vcpu = x86_read_percpu(xen_vcpu);
+
+	/* flag has opposite sense of mask */
+	flags = !vcpu->evtchn_upcall_mask;
+
+	/* convert to IF type flag
+	   -0 -> 0x00000000
+	   -1 -> 0xffffffff
+	*/
+	return (-flags) & X86_EFLAGS_IF;
+}
+
+static void xen_restore_fl(unsigned long flags)
+{
+	struct vcpu_info *vcpu;
+
+	/* convert from IF type flag */
+	flags = !(flags & X86_EFLAGS_IF);
+
+	/* There's a one instruction preempt window here.  We need to
+	   make sure we're don't switch CPUs between getting the vcpu
+	   pointer and updating the mask. */
+	preempt_disable();
+	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu->evtchn_upcall_mask = flags;
+	preempt_enable_no_resched();
+
+	/* Doesn't matter if we get preempted here, because any
+	   pending event will get dealt with anyway. */
+
+	if (flags == 0) {
+		preempt_check_resched();
+		barrier(); /* unmask then check (avoid races) */
+		if (unlikely(vcpu->evtchn_upcall_pending))
+			xen_force_evtchn_callback();
+	}
+}
+
+static void xen_irq_disable(void)
+{
+	/* There's a one instruction preempt window here.  We need to
+	   make sure we're don't switch CPUs between getting the vcpu
+	   pointer and updating the mask. */
+	preempt_disable();
+	x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
+	preempt_enable_no_resched();
+}
+
+static void xen_irq_enable(void)
+{
+	struct vcpu_info *vcpu;
+
+	/* We don't need to worry about being preempted here, since
+	   either a) interrupts are disabled, so no preemption, or b)
+	   the caller is confused and is trying to re-enable interrupts
+	   on an indeterminate processor. */
+
+	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu->evtchn_upcall_mask = 0;
+
+	/* Doesn't matter if we get preempted here, because any
+	   pending event will get dealt with anyway. */
+
+	barrier(); /* unmask then check (avoid races) */
+	if (unlikely(vcpu->evtchn_upcall_pending))
+		xen_force_evtchn_callback();
+}
+
+static void xen_safe_halt(void)
+{
+	/* Blocking includes an implicit local_irq_enable(). */
+	if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
+		BUG();
+}
+
+static void xen_halt(void)
+{
+	if (irqs_disabled())
+		HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+	else
+		xen_safe_halt();
+}
+
+static const struct pv_irq_ops xen_irq_ops __initdata = {
+	.init_IRQ = __xen_init_IRQ,
+	.save_fl = xen_save_fl,
+	.restore_fl = xen_restore_fl,
+	.irq_disable = xen_irq_disable,
+	.irq_enable = xen_irq_enable,
+	.safe_halt = xen_safe_halt,
+	.halt = xen_halt,
+#ifdef CONFIG_X86_64
+	.adjust_exception_frame = xen_adjust_exception_frame,
+#endif
+};
+
+void __init xen_init_irq_ops()
+{
+	pv_irq_ops = xen_irq_ops;
+}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 2497a30f41de..42786f59d9c0 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -298,7 +298,7 @@ check_events:
 	push %eax
 	push %ecx
 	push %edx
-	call force_evtchn_callback
+	call xen_force_evtchn_callback
 	pop %edx
 	pop %ecx
 	pop %eax
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 7f58304fafb3..3b9bda46487a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -122,7 +122,7 @@ check_events:
 	push %r9
 	push %r10
 	push %r11
-	call force_evtchn_callback
+	call xen_force_evtchn_callback
 	pop %r11
 	pop %r10
 	pop %r9
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 8847fb34f17e..3c70ebc50b1b 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -31,6 +31,7 @@ void xen_vcpu_restore(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
 
+void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
 unsigned long xen_tsc_khz(void);
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0e0c28574af8..a0837036d898 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS];
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
 
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
-	(void)HYPERVISOR_xen_version(0, NULL);
-}
-EXPORT_SYMBOL_GPL(force_evtchn_callback);
-
 static struct irq_chip xen_dynamic_chip;
 
 /* Constructor for packed IRQ information. */

From cef43bf6b3afd819f7cdcba356af0e8220fb3789 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 28 Jul 2008 13:33:44 -0700
Subject: [PATCH 031/142] xen: fix allocation and use of large ldts, cleanup

Add a proper comment for set_aliased_prot() and fix an
unsigned long/void * warning.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index cf8b3a93122b..04ec69e4d02e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -237,8 +237,10 @@ static unsigned long xen_store_tr(void)
 }
 
 /*
- * If 'v' is a vmalloc mapping, then find the linear mapping of the
- * page (if any) and also set its protections to match:
+ * Set the page permissions for a particular virtual address.  If the
+ * address is a vmalloc mapping (or other non-linear mapping), then
+ * find the linear mapping of the page and also set its protections to
+ * match.
  */
 static void set_aliased_prot(void *v, pgprot_t prot)
 {
@@ -387,8 +389,7 @@ static void xen_load_gs_index(unsigned int idx)
 static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 				const void *ptr)
 {
-	unsigned long lp = (unsigned long)&dt[entrynum];
-	xmaddr_t mach_lp = arbitrary_virt_to_machine(lp);
+	xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
 	u64 entry = *(u64 *)ptr;
 
 	preempt_disable();

From 169ad16bb87c10a3f7c108bb7008ebc0270f617a Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 28 Jul 2008 18:32:09 -0300
Subject: [PATCH 032/142] xen_alloc_ptpage: cast PFN_PHYS() argument to
 unsigned long

Currently paravirt_ops alloc_p*() uses u32 for the pfn args. We should
change that later, but while the pfn parameter is still u32, we need to
cast the PFN_PHYS() argument at xen_alloc_ptpage() to unsigned long,
otherwise it will lose bits on the shift.

I think PFN_PHYS() should behave better when fed with smaller integers,
but a cast to unsigned long won't be enough for all cases on 32-bit PAE,
and a cast to u64 would be overkill for most users of PFN_PHYS().

We could have two different flavors of PFN_PHYS: one for low pages
only (unsigned long) and another that works for any page (u64)),
but while we don't have it, we will need the cast to unsigned long on
xen_alloc_ptpage().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 04ec69e4d02e..53afa14eb314 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -822,7 +822,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
 		SetPagePinned(page);
 
 		if (!PageHighMem(page)) {
-			make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
+			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
 			if (level == PT_PTE)
 				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
 		} else

From a0ac87d61ba20932e54f08464d3f3439d78fa878 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Tue, 29 Jul 2008 17:41:04 +0200
Subject: [PATCH 033/142] x86: AMD microcode patch loader style corrections

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 07e52beacb44..6789530ef33b 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -47,9 +47,9 @@ MODULE_LICENSE("GPL v2");
 #define UCODE_UCODE_TYPE           0x00000001
 
 #define UCODE_MAX_SIZE          (2048)
-#define DEFAULT_UCODE_DATASIZE	(896)	  /* 896 bytes */
-#define MC_HEADER_SIZE		(sizeof(struct microcode_header_amd))	  /* 64 bytes */
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */
+#define DEFAULT_UCODE_DATASIZE	(896)
+#define MC_HEADER_SIZE		(sizeof(struct microcode_header_amd))
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
 #define DWSIZE			(sizeof(u32))
 /* For now we support a fixed ucode total size only */
 #define get_totalsize(mc) \

From f516526febb75500f280def2108688d388df4e1e Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Tue, 29 Jul 2008 17:41:05 +0200
Subject: [PATCH 034/142] x86: Intel microcode patch loader style corrections

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_intel.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 6da4a85ff465..a61986e8b691 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -55,8 +55,8 @@
  *		in a single CPU package.
  *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
  *		Tigran Aivazian <tigran@veritas.com>,
- *		Serialize updates as required on HT processors due to speculative
- *		nature of implementation.
+ *		Serialize updates as required on HT processors due to
+ *		speculative nature of implementation.
  *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
  *		Fix the panic when writing zero-length microcode chunk.
  *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
@@ -71,7 +71,7 @@
  *		Thanks to Stuart Swales for pointing out this bug.
  */
 
-//#define DEBUG /* pr_debug */
+/* #define DEBUG */ /* pr_debug */
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -99,11 +99,11 @@ MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
 MODULE_LICENSE("GPL");
 
-#define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
-#define MC_HEADER_SIZE		(sizeof(struct microcode_header_intel)) /* 48 bytes */
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
-#define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable)) /* 20 bytes */
-#define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature)) /* 12 bytes */
+#define DEFAULT_UCODE_DATASIZE 	(2000)
+#define MC_HEADER_SIZE		(sizeof(struct microcode_header_intel))
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
+#define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable))
+#define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature))
 #define DWSIZE			(sizeof(u32))
 #define get_totalsize(mc) \
 	(((struct microcode_intel *)mc)->hdr.totalsize ? \

From 831f9bd3151ebd22959a0cb2a20b44a06b26d4ed Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Tue, 29 Jul 2008 17:41:06 +0200
Subject: [PATCH 035/142] x86: moved function declarations out from AMD
 microcode patch loader to heade file

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 6789530ef33b..2b98e6ab1bf9 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -56,9 +56,6 @@ MODULE_LICENSE("GPL v2");
 	((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
 	 + MC_HEADER_SIZE)
 
-extern int microcode_init(void *opaque, struct module *module);
-extern void microcode_exit(void);
-
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
 

From daa9c0fee1cbe1d49fbe29af3d4bb16312043b1e Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Tue, 29 Jul 2008 17:41:07 +0200
Subject: [PATCH 036/142] x86: minor pointer type cast in AMD microcode patch
 loader

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 2b98e6ab1bf9..33b2a217a8c5 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -394,7 +394,8 @@ static int cpu_request_microcode_amd(int cpu)
 		return error;
 	}
 
-	buf_pos = buf = firmware->data;
+	buf_pos = (unsigned int *)firmware->data;
+	buf = (void *)firmware->data;
 	size = firmware->size;
 
 	if (buf_pos[0] != UCODE_MAGIC) {

From 34a1b9fc4995102ecbb3a980b348aebf168d8196 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Tue, 12 Aug 2008 13:25:44 +0100
Subject: [PATCH 037/142] Fix date output in x86 microcode driver.

The microcode stores its date in a uint32_t in some weird order
approximating pdp-endian. Rather than printing it like that, print it
properly in ISO standard form.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_intel.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index a61986e8b691..d2d9d74f4cbb 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -339,8 +339,11 @@ static void apply_microcode(int cpu)
 		return;
 	}
 	printk(KERN_INFO "microcode: CPU%d updated from revision "
-	       "0x%x to 0x%x, date = %08x \n",
-	       cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date);
+	       "0x%x to 0x%x, date = %04x-%02x-%02x \n",
+		cpu_num, uci->rev, val[1],
+		uci->mc.mc_intel->hdr.date & 0xffff,
+		uci->mc.mc_intel->hdr.date >> 24,
+		(uci->mc.mc_intel->hdr.date >> 16) & 0xff);
 	uci->rev = val[1];
 }
 

From d33dcb9e7d272cc3171dcc3a21049902185ab03d Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Fri, 1 Aug 2008 12:46:45 +0200
Subject: [PATCH 038/142] x86: Microcode patch loader style corrections

Style corrections to main microcode module.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 28dba1a6e4aa..39961bb83293 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -55,8 +55,8 @@
  *		in a single CPU package.
  *	1.10	28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
  *		Tigran Aivazian <tigran@veritas.com>,
- *		Serialize updates as required on HT processors due to speculative
- *		nature of implementation.
+ *		Serialize updates as required on HT processors due to
+ *		speculative nature of implementation.
  *	1.11	22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
  *		Fix the panic when writing zero-length microcode chunk.
  *	1.12	29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
@@ -71,7 +71,7 @@
  *		Thanks to Stuart Swales for pointing out this bug.
  */
 
-//#define DEBUG /* pr_debug */
+/*#define DEBUG pr_debug */
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -116,7 +116,7 @@ EXPORT_SYMBOL_GPL(user_buffer);
 unsigned int user_buffer_size;		/* it's size */
 EXPORT_SYMBOL_GPL(user_buffer_size);
 
-static int do_microcode_update (void)
+static int do_microcode_update(void)
 {
 	long cursor = 0;
 	int error = 0;
@@ -158,18 +158,20 @@ static int do_microcode_update (void)
 	return error;
 }
 
-static int microcode_open (struct inode *unused1, struct file *unused2)
+static int microcode_open(struct inode *unused1, struct file *unused2)
 {
 	cycle_kernel_lock();
 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
-static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
+static ssize_t microcode_write(struct file *file, const char __user *buf,
+			       size_t len, loff_t *ppos)
 {
 	ssize_t ret;
 
 	if ((len >> PAGE_SHIFT) > num_physpages) {
-		printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
+		printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
+		       num_physpages);
 		return -EINVAL;
 	}
 
@@ -201,7 +203,7 @@ static struct miscdevice microcode_dev = {
 	.fops		= &microcode_fops,
 };
 
-static int __init microcode_dev_init (void)
+static int __init microcode_dev_init(void)
 {
 	int error;
 
@@ -216,7 +218,7 @@ static int __init microcode_dev_init (void)
 	return 0;
 }
 
-static void microcode_dev_exit (void)
+static void microcode_dev_exit(void)
 {
 	misc_deregister(&microcode_dev);
 }
@@ -224,7 +226,7 @@ static void microcode_dev_exit (void)
 MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 #else
 #define microcode_dev_init() 0
-#define microcode_dev_exit() do { } while(0)
+#define microcode_dev_exit() do { } while (0)
 #endif
 
 /* fake device for request_firmware */
@@ -451,7 +453,7 @@ int microcode_init(void *opaque, struct module *module)
 }
 EXPORT_SYMBOL_GPL(microcode_init);
 
-void __exit microcode_exit (void)
+void __exit microcode_exit(void)
 {
 	microcode_dev_exit();
 

From 636a31781684d0f49208aa163f1a5a3a74210eb4 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Fri, 1 Aug 2008 12:46:46 +0200
Subject: [PATCH 039/142] x86: Fixed NULL function pointer dereference in AMD
 microcode patch loader.

Dereference took place in code part responsible for manual installation
of microcode patches through /dev/cpu/microcode.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Cc: Peter Oruba <peter.oruba@amd.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 39961bb83293..ad136ad99cb3 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -127,7 +127,8 @@ static int do_microcode_update(void)
 	old = current->cpus_allowed;
 
 	while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) {
-		error = microcode_ops->microcode_sanity_check(new_mc);
+		if (microcode_ops->microcode_sanity_check != NULL)
+			error = microcode_ops->microcode_sanity_check(new_mc);
 		if (error)
 			goto out;
 		/*

From 467cd0529a480c9c25f06154b788d1d1ba77828a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 18 Aug 2008 16:56:29 -0700
Subject: [PATCH 040/142] x86: early_printk.c trivial sparse fixes

arch/x86/kernel/early_printk.c:404:13: warning: incorrect type in assignment (different base types)
arch/x86/kernel/early_printk.c:404:13:    expected restricted __le16 [assigned] [usertype] wValue
arch/x86/kernel/early_printk.c:404:13:    got int [signed] value
arch/x86/kernel/early_printk.c:405:13: warning: incorrect type in assignment (different base types)
arch/x86/kernel/early_printk.c:405:13:    expected restricted __le16 [assigned] [usertype] wIndex
arch/x86/kernel/early_printk.c:405:13:    got int [signed] index
arch/x86/kernel/early_printk.c:406:14: warning: incorrect type in assignment (different base types)
arch/x86/kernel/early_printk.c:406:14:    expected restricted __le16 [assigned] [usertype] wLength
arch/x86/kernel/early_printk.c:406:14:    got int [signed] size
arch/x86/kernel/early_printk.c:845:16: warning: Using plain integer as NULL pointer
arch/x86/kernel/early_printk.c:992:13: warning: symbol 'enable_debug_console' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/early_printk.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 28155acf706e..825e9136b026 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -401,9 +401,9 @@ static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
 	/* Compute the control message */
 	req.bRequestType = requesttype;
 	req.bRequest = request;
-	req.wValue = value;
-	req.wIndex = index;
-	req.wLength = size;
+	req.wValue = cpu_to_le16(value);
+	req.wIndex = cpu_to_le16(index);
+	req.wLength = cpu_to_le16(size);
 
 	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
 	addr = DBGP_EPADDR(devnum, 0);
@@ -842,7 +842,7 @@ static int __init early_dbgp_init(char *s)
 	ret = ehci_setup();
 	if (ret < 0) {
 		dbgp_printk("ehci_setup failed\n");
-		ehci_debug = 0;
+		ehci_debug = NULL;
 
 		return -1;
 	}
@@ -989,7 +989,7 @@ static int __init setup_early_printk(char *buf)
 	return 0;
 }
 
-void __init enable_debug_console(char *buf)
+static void __init enable_debug_console(char *buf)
 {
 #ifdef DBGP_DEBUG
 	struct console *old_early_console = NULL;

From 8343ef2437c599d30568e6b5a257a40bf2f4902b Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Wed, 20 Aug 2008 00:16:13 +0200
Subject: [PATCH 041/142] x86-microcode: fix unbalanced use of get_cpu()

Don't use get_cpu() at all. Resort to checking a boot-up CPU (#0) in
microcode_{intel,amd}_module_init().

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c   | 10 ++++++----
 arch/x86/kernel/microcode_intel.c | 10 ++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 33b2a217a8c5..a6e76ccf8158 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -500,13 +500,15 @@ static struct microcode_ops microcode_amd_ops = {
 
 static int __init microcode_amd_module_init(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(get_cpu());
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	equiv_cpu_table = NULL;
-	if (c->x86_vendor == X86_VENDOR_AMD)
-		return microcode_init(&microcode_amd_ops, THIS_MODULE);
-	else
+	if (c->x86_vendor != X86_VENDOR_AMD) {
+		printk(KERN_ERR "microcode: CPU platform is not AMD-capable\n");
 		return -ENODEV;
+	}
+
+	return microcode_init(&microcode_amd_ops, THIS_MODULE);
 }
 
 static void __exit microcode_amd_module_exit(void)
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index d2d9d74f4cbb..6dd8907ff22e 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -531,12 +531,14 @@ static struct microcode_ops microcode_intel_ops = {
 
 static int __init microcode_intel_module_init(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(get_cpu());
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
-	if (c->x86_vendor == X86_VENDOR_INTEL)
-		return microcode_init(&microcode_intel_ops, THIS_MODULE);
-	else
+	if (c->x86_vendor != X86_VENDOR_INTEL) {
+                printk(KERN_ERR "microcode: CPU platform is not Intel-capable\n");
 		return -ENODEV;
+	}
+
+	return microcode_init(&microcode_intel_ops, THIS_MODULE);
 }
 
 static void __exit microcode_intel_module_exit(void)

From d45de40934897c6ee5b05141f7895bbb28512395 Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Wed, 20 Aug 2008 00:22:26 +0200
Subject: [PATCH 042/142] x86-microcode: generic interface refactoring

This is the 1st patch in the series. Here the aim was to avoid any
significant changes, logically-wise.

So it's mainly about generic interface refactoring: e.g. make
microcode_{intel,amd}.c more about arch-specific details and less
about policies like make-sure-we-run-on-a-target-cpu
(no more set_cpus_allowed_ptr() here) and generic synchronization (no
more microcode_mutex here).

All in all, more line have been deleted than added.

4 files changed, 145 insertions(+), 198 deletions(-)

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c       | 155 ++++++++++++++++++++----------
 arch/x86/kernel/microcode_amd.c   |  77 +++------------
 arch/x86/kernel/microcode_intel.c |  91 +++---------------
 include/asm-x86/microcode.h       |  20 +++-
 4 files changed, 145 insertions(+), 198 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index ad136ad99cb3..b2f84ce5eed3 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -71,7 +71,7 @@
  *		Thanks to Stuart Swales for pointing out this bug.
  */
 
-/*#define DEBUG pr_debug */
+/* #define DEBUG pr_debug */
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -104,8 +104,7 @@ MODULE_LICENSE("GPL");
 struct microcode_ops *microcode_ops;
 
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-DEFINE_MUTEX(microcode_mutex);
-EXPORT_SYMBOL_GPL(microcode_mutex);
+static DEFINE_MUTEX(microcode_mutex);
 
 struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
@@ -234,22 +233,6 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 struct platform_device *microcode_pdev;
 EXPORT_SYMBOL_GPL(microcode_pdev);
 
-static void microcode_init_cpu(int cpu, int resume)
-{
-	cpumask_t old;
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	old = current->cpus_allowed;
-
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-	mutex_lock(&microcode_mutex);
-	microcode_ops->collect_cpu_info(cpu);
-	if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
-		microcode_ops->cpu_request_microcode(cpu);
-	mutex_unlock(&microcode_mutex);
-	set_cpus_allowed_ptr(current, &old);
-}
-
 static ssize_t reload_store(struct sys_device *dev,
 			    struct sysdev_attribute *attr,
 			    const char *buf, size_t sz)
@@ -266,14 +249,15 @@ static ssize_t reload_store(struct sys_device *dev,
 		cpumask_t old = current->cpus_allowed;
 
 		get_online_cpus();
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-
-		mutex_lock(&microcode_mutex);
-		if (uci->valid)
-			err = microcode_ops->cpu_request_microcode(cpu);
-		mutex_unlock(&microcode_mutex);
+		if (cpu_online(cpu)) {
+			set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+			mutex_lock(&microcode_mutex);
+			if (uci->valid)
+				err = microcode_ops->cpu_request_microcode(cpu);
+			mutex_unlock(&microcode_mutex);
+			set_cpus_allowed_ptr(current, &old);
+		}
 		put_online_cpus();
-		set_cpus_allowed_ptr(current, &old);
 	}
 	if (err)
 		return err;
@@ -285,7 +269,7 @@ static ssize_t version_show(struct sys_device *dev,
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
 
-	return sprintf(buf, "0x%x\n", uci->rev);
+	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
 }
 
 static ssize_t pf_show(struct sys_device *dev,
@@ -293,7 +277,7 @@ static ssize_t pf_show(struct sys_device *dev,
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
 
-	return sprintf(buf, "0x%x\n", uci->pf);
+	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
 }
 
 static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
@@ -312,7 +296,85 @@ static struct attribute_group mc_attr_group = {
 	.name = "microcode",
 };
 
-static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
+static void microcode_fini_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	mutex_lock(&microcode_mutex);
+	microcode_ops->microcode_fini_cpu(cpu);
+	uci->valid = 0;
+	mutex_unlock(&microcode_mutex);
+}
+
+static void collect_cpu_info(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	memset(uci, 0, sizeof(*uci));
+	if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig))
+		uci->valid = 1;
+}
+
+static void microcode_resume_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct cpu_signature nsig;
+
+	pr_debug("microcode: CPU%d resumed\n", cpu);
+
+	if (!uci->mc.valid_mc)
+		return;
+
+	/*
+	 * Let's verify that the 'cached' ucode does belong
+	 * to this cpu (a bit of paranoia):
+	 */
+	if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
+		microcode_fini_cpu(cpu);
+		return;
+	}
+
+	if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) {
+		microcode_fini_cpu(cpu);
+		/* Should we look for a new ucode here? */
+		return;
+	}
+
+	microcode_ops->apply_microcode(cpu);
+}
+
+void microcode_update_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(raw_smp_processor_id() != cpu);
+
+	mutex_lock(&microcode_mutex);
+	/*
+	 * Check if the system resume is in progress (uci->valid != NULL),
+	 * otherwise just request a firmware:
+	 */
+	if (uci->valid) {
+		microcode_resume_cpu(cpu);
+	} else {	
+		collect_cpu_info(cpu);
+		if (uci->valid && system_state == SYSTEM_RUNNING)
+			microcode_ops->cpu_request_microcode(cpu);
+	}
+	mutex_unlock(&microcode_mutex);
+}
+
+static void microcode_init_cpu(int cpu)
+{
+	cpumask_t old = current->cpus_allowed;
+
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	microcode_update_cpu(cpu);
+	set_cpus_allowed_ptr(current, &old);
+}
+
+static int mc_sysdev_add(struct sys_device *sys_dev)
 {
 	int err, cpu = sys_dev->id;
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -327,16 +389,10 @@ static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
 	if (err)
 		return err;
 
-	microcode_init_cpu(cpu, resume);
-
+	microcode_init_cpu(cpu);
 	return 0;
 }
 
-static int mc_sysdev_add(struct sys_device *sys_dev)
-{
-	return __mc_sysdev_add(sys_dev, 0);
-}
-
 static int mc_sysdev_remove(struct sys_device *sys_dev)
 {
 	int cpu = sys_dev->id;
@@ -345,7 +401,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 		return 0;
 
 	pr_debug("microcode: CPU%d removed\n", cpu);
-	microcode_ops->microcode_fini_cpu(cpu);
+	microcode_fini_cpu(cpu);
 	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
 	return 0;
 }
@@ -376,33 +432,26 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 
 	sys_dev = get_cpu_sysdev(cpu);
 	switch (action) {
-	case CPU_UP_CANCELED_FROZEN:
-		/* The CPU refused to come up during a system resume */
-		microcode_ops->microcode_fini_cpu(cpu);
-		break;
 	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		mc_sysdev_add(sys_dev);
-		break;
 	case CPU_ONLINE_FROZEN:
-		/* System-wide resume is in progress, try to apply microcode */
-		if (microcode_ops->apply_microcode_check_cpu(cpu)) {
-			/* The application of microcode failed */
-			microcode_ops->microcode_fini_cpu(cpu);
-			__mc_sysdev_add(sys_dev, 1);
-			break;
-		}
+		microcode_init_cpu(cpu);
+	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
+		pr_debug("microcode: CPU%d added\n", cpu);
 		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
 			printk(KERN_ERR "microcode: Failed to create the sysfs "
 				"group for CPU%d\n", cpu);
 		break;
 	case CPU_DOWN_PREPARE:
-		mc_sysdev_remove(sys_dev);
-		break;
 	case CPU_DOWN_PREPARE_FROZEN:
 		/* Suspend is in progress, only remove the interface */
 		sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+		pr_debug("microcode: CPU%d removed\n", cpu);
+		break;
+	case CPU_DEAD:
+	case CPU_UP_CANCELED_FROZEN:
+		/* The CPU refused to come up during a system resume */
+		microcode_fini_cpu(cpu);
 		break;
 	}
 	return NOTIFY_OK;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index a6e76ccf8158..4006e5e3adf0 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -59,38 +59,28 @@ MODULE_LICENSE("GPL v2");
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
 
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-extern struct mutex (microcode_mutex);
-
 struct equiv_cpu_entry *equiv_cpu_table;
 
-extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
-
-static void collect_cpu_info_amd(int cpu)
+static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	/* We should bind the task to the CPU */
-	BUG_ON(raw_smp_processor_id() != cpu);
-	uci->rev = 0;
-	uci->pf = 0;
-	uci->mc.mc_amd = NULL;
-	uci->valid = 1;
+	memset(csig, 0, sizeof(*csig));
 
 	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
 		printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
 		       cpu);
-		uci->valid = 0;
-		return;
+		return -1;
 	}
 
 	asm volatile("movl %1, %%ecx; rdmsr"
-		     : "=a" (uci->rev)
+		     : "=a" (csig->rev)
 		     : "i" (0x0000008B) : "ecx");
 
 	printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
-	       uci->rev);
+		csig->rev);
+
+	return 0;
 }
 
 static int get_matching_microcode_amd(void *mc, int cpu)
@@ -119,7 +109,7 @@ static int get_matching_microcode_amd(void *mc, int cpu)
 	if (equiv_cpu_table == NULL) {
 		printk(KERN_INFO "microcode: CPU%d microcode update with "
 		       "version 0x%x (current=0x%x)\n",
-		       cpu, mc_header->patch_id, uci->rev);
+		       cpu, mc_header->patch_id, uci->cpu_sig.rev);
 		goto out;
 	}
 
@@ -185,12 +175,12 @@ static int get_matching_microcode_amd(void *mc, int cpu)
 		pci_dev_put(sb_pci_dev);
 	}
 
-	if (mc_header->patch_id <= uci->rev)
+	if (mc_header->patch_id <= uci->cpu_sig.rev)
 		return 0;
 
 	printk(KERN_INFO "microcode: CPU%d found a matching microcode "
 	       "update with version 0x%x (current=0x%x)\n",
-	       cpu, mc_header->patch_id, uci->rev);
+	       cpu, mc_header->patch_id, uci->cpu_sig.rev);
 
 out:
 	new_mc = vmalloc(UCODE_MAX_SIZE);
@@ -250,9 +240,9 @@ static void apply_microcode_amd(int cpu)
 
 	printk(KERN_INFO "microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x \n",
-	       cpu_num, uci->rev, uci->mc.mc_amd->hdr.patch_id);
+	       cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id);
 
-	uci->rev = rev;
+	uci->cpu_sig.rev = rev;
 }
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
@@ -437,61 +427,18 @@ static int cpu_request_microcode_amd(int cpu)
 	return error;
 }
 
-static int apply_microcode_check_cpu_amd(int cpu)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	unsigned int rev;
-	cpumask_t old;
-	int err = 0;
-
-	/* Check if the microcode is available */
-	if (!uci->mc.mc_amd)
-		return 0;
-
-	old = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-
-	/* Check if the microcode we have in memory matches the CPU */
-	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 16)
-		err = -EINVAL;
-
-	if (!err) {
-		asm volatile("movl %1, %%ecx; rdmsr"
-		     : "=a" (rev)
-		     : "i" (0x0000008B) : "ecx");
-
-		if (uci->rev != rev)
-			err = -EINVAL;
-	}
-
-	if (!err)
-		apply_microcode_amd(cpu);
-	else
-		printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
-		       " rev=0x%x\n",
-		       cpu,  uci->rev);
-
-	set_cpus_allowed(current, old);
-	return err;
-}
-
 static void microcode_fini_cpu_amd(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	mutex_lock(&microcode_mutex);
-	uci->valid = 0;
 	vfree(uci->mc.mc_amd);
 	uci->mc.mc_amd = NULL;
-	mutex_unlock(&microcode_mutex);
 }
 
 static struct microcode_ops microcode_amd_ops = {
 	.get_next_ucode                   = get_next_ucode_amd,
 	.get_matching_microcode           = get_matching_microcode_amd,
 	.microcode_sanity_check           = NULL,
-	.apply_microcode_check_cpu        = apply_microcode_check_cpu_amd,
 	.cpu_request_microcode            = cpu_request_microcode_amd,
 	.collect_cpu_info                 = collect_cpu_info_amd,
 	.apply_microcode                  = apply_microcode_amd,
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 6dd8907ff22e..c9b53202ba3d 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -122,46 +122,37 @@ MODULE_LICENSE("GPL");
 /* serialize access to the physical write to MSR 0x79 */
 static DEFINE_SPINLOCK(microcode_update_lock);
 
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-extern struct mutex microcode_mutex;
-
-extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
-
-static void collect_cpu_info(int cpu_num)
+static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	unsigned int val[2];
 
-	/* We should bind the task to the CPU */
-	BUG_ON(raw_smp_processor_id() != cpu_num);
-	uci->pf = uci->rev = 0;
-	uci->mc.mc_intel = NULL;
-	uci->valid = 1;
+	memset(csig, 0, sizeof(*csig));
 
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
 	    cpu_has(c, X86_FEATURE_IA64)) {
 		printk(KERN_ERR "microcode: CPU%d not a capable Intel "
 			"processor\n", cpu_num);
-		uci->valid = 0;
-		return;
+		return -1;
 	}
 
-	uci->sig = cpuid_eax(0x00000001);
+	csig->sig = cpuid_eax(0x00000001);
 
 	if ((c->x86_model >= 5) || (c->x86 > 6)) {
 		/* get processor flags from MSR 0x17 */
 		rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-		uci->pf = 1 << ((val[1] >> 18) & 7);
+		csig->pf = 1 << ((val[1] >> 18) & 7);
 	}
 
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 	/* see notes above for revision 1.07.  Apparent chip bug */
 	sync_core();
 	/* get the current revision from MSR 0x8B */
-	rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
+	rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
 	pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
-			uci->sig, uci->pf, uci->rev);
+			csig->sig, csig->pf, csig->rev);
+
+	return 0;
 }
 
 static inline int microcode_update_match(int cpu_num,
@@ -169,8 +160,8 @@ static inline int microcode_update_match(int cpu_num,
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 
-	if (!sigmatch(sig, uci->sig, pf, uci->pf)
-		|| mc_header->rev <= uci->rev)
+	if (!sigmatch(sig, uci->cpu_sig.sig, pf, uci->cpu_sig.pf)
+		|| mc_header->rev <= uci->cpu_sig.rev)
 		return 0;
 	return 1;
 }
@@ -289,7 +280,7 @@ static int get_matching_microcode(void *mc, int cpu)
 find:
 	pr_debug("microcode: CPU%d found a matching microcode update with"
 		 " version 0x%x (current=0x%x)\n",
-		 cpu, mc_header->rev, uci->rev);
+		 cpu, mc_header->rev, uci->cpu_sig.rev);
 	new_mc = vmalloc(total_size);
 	if (!new_mc) {
 		printk(KERN_ERR "microcode: error! Can not allocate memory\n");
@@ -335,16 +326,16 @@ static void apply_microcode(int cpu)
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
 	if (val[1] != uci->mc.mc_intel->hdr.rev) {
 		printk(KERN_ERR "microcode: CPU%d update from revision "
-			"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
+			"0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]);
 		return;
 	}
 	printk(KERN_INFO "microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x, date = %04x-%02x-%02x \n",
-		cpu_num, uci->rev, val[1],
+		cpu_num, uci->cpu_sig.rev, val[1],
 		uci->mc.mc_intel->hdr.date & 0xffff,
 		uci->mc.mc_intel->hdr.date >> 24,
 		(uci->mc.mc_intel->hdr.date >> 16) & 0xff);
-	uci->rev = val[1];
+	uci->cpu_sig.rev = val[1];
 }
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
@@ -459,70 +450,18 @@ static int cpu_request_microcode(int cpu)
 	return error;
 }
 
-static int apply_microcode_check_cpu(int cpu)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	cpumask_t old;
-	unsigned int val[2];
-	int err = 0;
-
-	/* Check if the microcode is available */
-	if (!uci->mc.mc_intel)
-		return 0;
-
-	old = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-
-	/* Check if the microcode we have in memory matches the CPU */
-	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
-	    cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001))
-		err = -EINVAL;
-
-	if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) {
-		/* get processor flags from MSR 0x17 */
-		rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-		if (uci->pf != (1 << ((val[1] >> 18) & 7)))
-			err = -EINVAL;
-	}
-
-	if (!err) {
-		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-		/* see notes above for revision 1.07.  Apparent chip bug */
-		sync_core();
-		/* get the current revision from MSR 0x8B */
-		rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-		if (uci->rev != val[1])
-			err = -EINVAL;
-	}
-
-	if (!err)
-		apply_microcode(cpu);
-	else
-		printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
-			" sig=0x%x, pf=0x%x, rev=0x%x\n",
-			cpu, uci->sig, uci->pf, uci->rev);
-
-	set_cpus_allowed_ptr(current, &old);
-	return err;
-}
-
 static void microcode_fini_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	mutex_lock(&microcode_mutex);
-	uci->valid = 0;
 	vfree(uci->mc.mc_intel);
 	uci->mc.mc_intel = NULL;
-	mutex_unlock(&microcode_mutex);
 }
 
 static struct microcode_ops microcode_intel_ops = {
 	.get_next_ucode                   = get_next_ucode,
 	.get_matching_microcode           = get_matching_microcode,
 	.microcode_sanity_check           = microcode_sanity_check,
-	.apply_microcode_check_cpu        = apply_microcode_check_cpu,
 	.cpu_request_microcode            = cpu_request_microcode,
 	.collect_cpu_info                 = collect_cpu_info,
 	.apply_microcode                  = apply_microcode,
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index 18b2aeec2adf..7ceff48fa657 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -1,14 +1,18 @@
+#ifndef ASM_X86__MICROCODE_H
+#define ASM_X86__MICROCODE_H
+
 extern int microcode_init(void *opaque, struct module *module);
 extern void microcode_exit(void);
 
+struct cpu_signature;
+
 struct microcode_ops {
 	long (*get_next_ucode)(void **mc, long offset);
 	long (*microcode_get_next_ucode)(void **mc, long offset);
 	int (*get_matching_microcode)(void *mc, int cpu);
-	int (*apply_microcode_check_cpu)(int cpu);
 	int (*microcode_sanity_check)(void *mc);
 	int (*cpu_request_microcode)(int cpu);
-	void (*collect_cpu_info)(int cpu_num);
+	int (*collect_cpu_info)(int cpu_num, struct cpu_signature *csig);
 	void (*apply_microcode)(int cpu);
 	void (*microcode_fini_cpu)(int cpu);
 	void (*clear_patch)(void *data);
@@ -75,13 +79,21 @@ struct microcode_amd {
 	unsigned int mpb[0];
 };
 
-struct ucode_cpu_info {
-	int valid;
+struct cpu_signature {
 	unsigned int sig;
 	unsigned int pf;
 	unsigned int rev;
+};
+
+struct ucode_cpu_info {
+	struct cpu_signature cpu_sig;
+	int valid;
 	union {
 		struct microcode_intel *mc_intel;
 		struct microcode_amd *mc_amd;
+		void *valid_mc;
 	} mc;
 };
+extern struct ucode_cpu_info ucode_cpu_info[];
+
+#endif /* ASM_X86__MICROCODE_H */

From 6e833587e11ed0dbf12e647127f2650e2f80b26d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 19 Aug 2008 13:16:17 -0700
Subject: [PATCH 043/142] xen: clean up domain mode predicates

There are four operating modes Xen code may find itself running in:
 - native
 - hvm domain
 - pv dom0
 - pv domU

Clean up predicates for testing for these states to make them more consistent.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Xen-devel <xen-devel@lists.xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c          |  9 +++++++--
 drivers/block/xen-blkfront.c      |  2 +-
 drivers/char/hvc_xen.c            |  6 +++---
 drivers/input/xen-kbdfront.c      |  4 ++--
 drivers/net/xen-netfront.c        |  6 +++---
 drivers/video/xen-fbfront.c       |  4 ++--
 drivers/xen/balloon.c             |  2 +-
 drivers/xen/grant-table.c         |  2 +-
 drivers/xen/xenbus/xenbus_probe.c |  8 ++++----
 include/asm-x86/xen/hypervisor.h  | 14 ++++++++++++--
 10 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 53afa14eb314..b106e825d266 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(hypercall_page);
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
+enum xen_domain_type xen_domain_type = XEN_NATIVE;
+EXPORT_SYMBOL_GPL(xen_domain_type);
+
 /*
  * Identity map, in addition to plain kernel map.  This needs to be
  * large enough to allocate page table pages to allocate the rest.
@@ -1613,6 +1616,8 @@ asmlinkage void __init xen_start_kernel(void)
 	if (!xen_start_info)
 		return;
 
+	xen_domain_type = XEN_PV_DOMAIN;
+
 	BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
 
 	xen_setup_features();
@@ -1650,7 +1655,7 @@ asmlinkage void __init xen_start_kernel(void)
 
 	/* Prevent unwanted bits from being set in PTEs. */
 	__supported_pte_mask &= ~_PAGE_GLOBAL;
-	if (!is_initial_xendomain())
+	if (!xen_initial_domain())
 		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
 
 	/* Don't do the full vcpu_info placement stuff until we have a
@@ -1685,7 +1690,7 @@ asmlinkage void __init xen_start_kernel(void)
 	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
 	boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
 
-	if (!is_initial_xendomain()) {
+	if (!xen_initial_domain()) {
 		add_preferred_console("xenboot", 0, NULL);
 		add_preferred_console("tty", 0, NULL);
 		add_preferred_console("hvc", 0, NULL);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3ca643cafccd..d5e753255153 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1032,7 +1032,7 @@ static struct xenbus_driver blkfront = {
 
 static int __init xlblk_init(void)
 {
-	if (!is_running_on_xen())
+	if (!xen_domain())
 		return -ENODEV;
 
 	if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index 6b70aa66a587..538ceea5e7df 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -108,8 +108,8 @@ static int __init xen_init(void)
 {
 	struct hvc_struct *hp;
 
-	if (!is_running_on_xen() ||
-	    is_initial_xendomain() ||
+	if (!xen_pv_domain() ||
+	    xen_initial_domain() ||
 	    !xen_start_info->console.domU.evtchn)
 		return -ENODEV;
 
@@ -142,7 +142,7 @@ static void __exit xen_fini(void)
 
 static int xen_cons_init(void)
 {
-	if (!is_running_on_xen())
+	if (!xen_pv_domain())
 		return 0;
 
 	hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
index 9ce3b3baf3a2..3ab6362f043c 100644
--- a/drivers/input/xen-kbdfront.c
+++ b/drivers/input/xen-kbdfront.c
@@ -335,11 +335,11 @@ static struct xenbus_driver xenkbd = {
 
 static int __init xenkbd_init(void)
 {
-	if (!is_running_on_xen())
+	if (!xen_domain())
 		return -ENODEV;
 
 	/* Nothing to do if running in dom0. */
-	if (is_initial_xendomain())
+	if (xen_initial_domain())
 		return -ENODEV;
 
 	return xenbus_register_frontend(&xenkbd);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index c749bdba214c..3c3dd403f5dd 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1794,10 +1794,10 @@ static struct xenbus_driver netfront = {
 
 static int __init netif_init(void)
 {
-	if (!is_running_on_xen())
+	if (!xen_domain())
 		return -ENODEV;
 
-	if (is_initial_xendomain())
+	if (xen_initial_domain())
 		return 0;
 
 	printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
@@ -1809,7 +1809,7 @@ module_init(netif_init);
 
 static void __exit netif_exit(void)
 {
-	if (is_initial_xendomain())
+	if (xen_initial_domain())
 		return;
 
 	xenbus_unregister_driver(&netfront);
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index 47ed39b52f9c..a463b3dd837b 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -680,11 +680,11 @@ static struct xenbus_driver xenfb = {
 
 static int __init xenfb_init(void)
 {
-	if (!is_running_on_xen())
+	if (!xen_domain())
 		return -ENODEV;
 
 	/* Nothing to do if running in dom0. */
-	if (is_initial_xendomain())
+	if (xen_initial_domain())
 		return -ENODEV;
 
 	return xenbus_register_frontend(&xenfb);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index fff987b10e0f..a51f3e17a5fd 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -419,7 +419,7 @@ static int __init balloon_init(void)
 	unsigned long pfn;
 	struct page *page;
 
-	if (!is_running_on_xen())
+	if (!xen_pv_domain())
 		return -ENODEV;
 
 	pr_info("xen_balloon: Initialising balloon driver.\n");
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index e9e11168616a..06592b9da83c 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -508,7 +508,7 @@ static int __devinit gnttab_init(void)
 	unsigned int max_nr_glist_frames, nr_glist_frames;
 	unsigned int nr_init_grefs;
 
-	if (!is_running_on_xen())
+	if (!xen_domain())
 		return -ENODEV;
 
 	nr_grant_frames = 1;
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 57ceb5346b74..7f24a98a446f 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -814,7 +814,7 @@ static int __init xenbus_probe_init(void)
 	DPRINTK("");
 
 	err = -ENODEV;
-	if (!is_running_on_xen())
+	if (!xen_domain())
 		goto out_error;
 
 	/* Register ourselves with the kernel bus subsystem */
@@ -829,7 +829,7 @@ static int __init xenbus_probe_init(void)
 	/*
 	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
 	 */
-	if (is_initial_xendomain()) {
+	if (xen_initial_domain()) {
 		/* dom0 not yet supported */
 	} else {
 		xenstored_ready = 1;
@@ -846,7 +846,7 @@ static int __init xenbus_probe_init(void)
 		goto out_unreg_back;
 	}
 
-	if (!is_initial_xendomain())
+	if (!xen_initial_domain())
 		xenbus_probe(NULL);
 
 	return 0;
@@ -937,7 +937,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
 	unsigned long timeout = jiffies + 10*HZ;
 	struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
 
-	if (!ready_to_wait_for_devices || !is_running_on_xen())
+	if (!ready_to_wait_for_devices || !xen_domain())
 		return;
 
 	while (exists_disconnected_device(drv)) {
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h
index 8e15dd28c91f..d9dd28caa508 100644
--- a/include/asm-x86/xen/hypervisor.h
+++ b/include/asm-x86/xen/hypervisor.h
@@ -55,7 +55,6 @@
 /* arch/i386/kernel/setup.c */
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
-#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
 
 /* arch/i386/mach-xen/evtchn.c */
 /* Force a proper event-channel callback from Xen. */
@@ -68,6 +67,17 @@ u64 jiffies_to_st(unsigned long jiffies);
 #define MULTI_UVMFLAGS_INDEX 3
 #define MULTI_UVMDOMID_INDEX 4
 
-#define is_running_on_xen()	(xen_start_info ? 1 : 0)
+enum xen_domain_type {
+	XEN_NATIVE,
+	XEN_PV_DOMAIN,
+	XEN_HVM_DOMAIN,
+};
+
+extern enum xen_domain_type xen_domain_type;
+
+#define xen_domain()		(xen_domain_type != XEN_NATIVE)
+#define xen_pv_domain()		(xen_domain_type == XEN_PV_DOMAIN)
+#define xen_initial_domain()	(xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
+#define xen_hvm_domain()	(xen_domain_type == XEN_HVM_DOMAIN)
 
 #endif /* __HYPERVISOR_H__ */

From 63d3a75d6f1fcf2f33e6abbe84e1f428c3586152 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 19 Aug 2008 13:19:36 -0700
Subject: [PATCH 044/142] x86/paravirt: add spin_lock_flags lock op

It is useful for a pv_lock_ops backend to know whether interrupts are
enabled or not in the context a spin_lock is being called.  This
allows it to enable interrupts while spinning, which could be
particularly helpful when spinning becomes blocking.

The default implementation just calls the normal spin_lock op,
ignoring the flags.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt-spinlocks.c | 6 ++++++
 include/asm-x86/paravirt.h           | 7 +++++++
 include/asm-x86/spinlock.h           | 9 +++++++--
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 38d7f7f1dbc9..206359a8e43f 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,12 +7,18 @@
 
 #include <asm/paravirt.h>
 
+static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+{
+	__raw_spin_lock(lock);
+}
+
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
 	.spin_is_locked = __ticket_spin_is_locked,
 	.spin_is_contended = __ticket_spin_is_contended,
 
 	.spin_lock = __ticket_spin_lock,
+	.spin_lock_flags = default_spin_lock_flags,
 	.spin_trylock = __ticket_spin_trylock,
 	.spin_unlock = __ticket_spin_unlock,
 #endif
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index db9b0647b346..8e9b1266898c 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -333,6 +333,7 @@ struct pv_lock_ops {
 	int (*spin_is_locked)(struct raw_spinlock *lock);
 	int (*spin_is_contended)(struct raw_spinlock *lock);
 	void (*spin_lock)(struct raw_spinlock *lock);
+	void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
 	int (*spin_trylock)(struct raw_spinlock *lock);
 	void (*spin_unlock)(struct raw_spinlock *lock);
 };
@@ -1414,6 +1415,12 @@ static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
 	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
 }
 
+static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
+						  unsigned long flags)
+{
+	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
+}
+
 static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
 {
 	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index e39c790dbfd2..b755ea86367e 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -182,8 +182,6 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 }
 #endif
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-
 #ifdef CONFIG_PARAVIRT
 /*
  * Define virtualization-friendly old-style lock byte lock, for use in
@@ -272,6 +270,13 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
 	__ticket_spin_unlock(lock);
 }
+
+static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+						  unsigned long flags)
+{
+	__raw_spin_lock(lock);
+}
+
 #endif	/* CONFIG_PARAVIRT */
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)

From 11ad93e59d114f4b218873f1c93261be725d2e22 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 19 Aug 2008 13:32:51 -0700
Subject: [PATCH 045/142] xen: clarify locking used when pinning a pagetable.

Add some comments explaining the locking and pinning algorithm when
using split pte locks.  Also implement a minor optimisation of not
pinning the PTE when not using split pte locks.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Xen-devel <xen-devel@lists.xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aa37469da696..d3752b6ce6e6 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -590,8 +590,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 	pmdidx_limit = 0;
 #endif
 
-	flush |= (*func)(virt_to_page(pgd), PT_PGD);
-
 	for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
 		pud_t *pud;
 
@@ -637,7 +635,11 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 			}
 		}
 	}
+
 out:
+	/* Do the top level last, so that the callbacks can use it as
+	   a cue to do final things like tlb flushes. */
+	flush |= (*func)(virt_to_page(pgd), PT_PGD);
 
 	return flush;
 }
@@ -691,6 +693,26 @@ static int pin_page(struct page *page, enum pt_level level)
 
 		flush = 0;
 
+		/*
+		 * We need to hold the pagetable lock between the time
+		 * we make the pagetable RO and when we actually pin
+		 * it.  If we don't, then other users may come in and
+		 * attempt to update the pagetable by writing it,
+		 * which will fail because the memory is RO but not
+		 * pinned, so Xen won't do the trap'n'emulate.
+		 *
+		 * If we're using split pte locks, we can't hold the
+		 * entire pagetable's worth of locks during the
+		 * traverse, because we may wrap the preempt count (8
+		 * bits).  The solution is to mark RO and pin each PTE
+		 * page while holding the lock.  This means the number
+		 * of locks we end up holding is never more than a
+		 * batch size (~32 entries, at present).
+		 *
+		 * If we're not using split pte locks, we needn't pin
+		 * the PTE pages independently, because we're
+		 * protected by the overall pagetable lock.
+		 */
 		ptl = NULL;
 		if (level == PT_PTE)
 			ptl = lock_pte(page);
@@ -699,10 +721,9 @@ static int pin_page(struct page *page, enum pt_level level)
 					pfn_pte(pfn, PAGE_KERNEL_RO),
 					level == PT_PGD ? UVMF_TLB_FLUSH : 0);
 
-		if (level == PT_PTE)
+		if (ptl) {
 			xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
 
-		if (ptl) {
 			/* Queue a deferred unlock for when this batch
 			   is completed. */
 			xen_mc_callback(do_unlock, ptl);
@@ -796,10 +817,18 @@ static int unpin_page(struct page *page, enum pt_level level)
 		spinlock_t *ptl = NULL;
 		struct multicall_space mcs;
 
+		/*
+		 * Do the converse to pin_page.  If we're using split
+		 * pte locks, we must be holding the lock for while
+		 * the pte page is unpinned but still RO to prevent
+		 * concurrent updates from seeing it in this
+		 * partially-pinned state.
+		 */
 		if (level == PT_PTE) {
 			ptl = lock_pte(page);
 
-			xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
+			if (ptl)
+				xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
 		}
 
 		mcs = __xen_mc_entry(0);
@@ -837,7 +866,7 @@ static void xen_pgd_unpin(pgd_t *pgd)
 
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is unpinned */
-	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+	unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
 #endif
 
 	pgd_walk(pgd, unpin_page, USER_LIMIT);

From 7708ad64a24a674f7905aa7a5099a50f055debec Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 19 Aug 2008 13:34:22 -0700
Subject: [PATCH 046/142] xen: add xen_ prefixes to make tracing with ftrace
 easier

It's easier to pattern match on Xen function if they all start with xen_.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 66 ++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d3752b6ce6e6..d9a35a363095 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -229,14 +229,14 @@ void make_lowmem_page_readwrite(void *vaddr)
 }
 
 
-static bool page_pinned(void *ptr)
+static bool xen_page_pinned(void *ptr)
 {
 	struct page *page = virt_to_page(ptr);
 
 	return PagePinned(page);
 }
 
-static void extend_mmu_update(const struct mmu_update *update)
+static void xen_extend_mmu_update(const struct mmu_update *update)
 {
 	struct multicall_space mcs;
 	struct mmu_update *u;
@@ -265,7 +265,7 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 	/* ptr may be ioremapped for 64-bit pagetable setup */
 	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
 	u.val = pmd_val_ma(val);
-	extend_mmu_update(&u);
+	xen_extend_mmu_update(&u);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
@@ -276,7 +276,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
 {
 	/* If page is not pinned, we can just update the entry
 	   directly */
-	if (!page_pinned(ptr)) {
+	if (!xen_page_pinned(ptr)) {
 		*ptr = val;
 		return;
 	}
@@ -334,7 +334,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 
 	u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
 	u.val = pte_val_ma(pte);
-	extend_mmu_update(&u);
+	xen_extend_mmu_update(&u);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
@@ -400,7 +400,7 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 	/* ptr may be ioremapped for 64-bit pagetable setup */
 	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
 	u.val = pud_val_ma(val);
-	extend_mmu_update(&u);
+	xen_extend_mmu_update(&u);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
@@ -411,7 +411,7 @@ void xen_set_pud(pud_t *ptr, pud_t val)
 {
 	/* If page is not pinned, we can just update the entry
 	   directly */
-	if (!page_pinned(ptr)) {
+	if (!xen_page_pinned(ptr)) {
 		*ptr = val;
 		return;
 	}
@@ -490,7 +490,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
 
 	u.ptr = virt_to_machine(ptr).maddr;
 	u.val = pgd_val_ma(val);
-	extend_mmu_update(&u);
+	xen_extend_mmu_update(&u);
 }
 
 /*
@@ -519,10 +519,10 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
 
 	/* If page is not pinned, we can just update the entry
 	   directly */
-	if (!page_pinned(ptr)) {
+	if (!xen_page_pinned(ptr)) {
 		*ptr = val;
 		if (user_ptr) {
-			WARN_ON(page_pinned(user_ptr));
+			WARN_ON(xen_page_pinned(user_ptr));
 			*user_ptr = val;
 		}
 		return;
@@ -555,8 +555,8 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
  * For 64-bit, we must skip the Xen hole in the middle of the address
  * space, just after the big x86-64 virtual hole.
  */
-static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
-		    unsigned long limit)
+static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
+			unsigned long limit)
 {
 	int flush = 0;
 	unsigned hole_low, hole_high;
@@ -644,7 +644,9 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 	return flush;
 }
 
-static spinlock_t *lock_pte(struct page *page)
+/* If we're using split pte locks, then take the page's lock and
+   return a pointer to it.  Otherwise return NULL. */
+static spinlock_t *xen_pte_lock(struct page *page)
 {
 	spinlock_t *ptl = NULL;
 
@@ -656,7 +658,7 @@ static spinlock_t *lock_pte(struct page *page)
 	return ptl;
 }
 
-static void do_unlock(void *v)
+static void xen_pte_unlock(void *v)
 {
 	spinlock_t *ptl = v;
 	spin_unlock(ptl);
@@ -674,7 +676,7 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 }
 
-static int pin_page(struct page *page, enum pt_level level)
+static int xen_pin_page(struct page *page, enum pt_level level)
 {
 	unsigned pgfl = TestSetPagePinned(page);
 	int flush;
@@ -715,7 +717,7 @@ static int pin_page(struct page *page, enum pt_level level)
 		 */
 		ptl = NULL;
 		if (level == PT_PTE)
-			ptl = lock_pte(page);
+			ptl = xen_pte_lock(page);
 
 		MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
 					pfn_pte(pfn, PAGE_KERNEL_RO),
@@ -726,7 +728,7 @@ static int pin_page(struct page *page, enum pt_level level)
 
 			/* Queue a deferred unlock for when this batch
 			   is completed. */
-			xen_mc_callback(do_unlock, ptl);
+			xen_mc_callback(xen_pte_unlock, ptl);
 		}
 	}
 
@@ -740,7 +742,7 @@ void xen_pgd_pin(pgd_t *pgd)
 {
 	xen_mc_batch();
 
-	if (pgd_walk(pgd, pin_page, USER_LIMIT)) {
+	if (xen_pgd_walk(pgd, xen_pin_page, USER_LIMIT)) {
 		/* re-enable interrupts for kmap_flush_unused */
 		xen_mc_issue(0);
 		kmap_flush_unused();
@@ -754,14 +756,14 @@ void xen_pgd_pin(pgd_t *pgd)
 		xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
 
 		if (user_pgd) {
-			pin_page(virt_to_page(user_pgd), PT_PGD);
+			xen_pin_page(virt_to_page(user_pgd), PT_PGD);
 			xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
 		}
 	}
 #else /* CONFIG_X86_32 */
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is pinnable */
-	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+	xen_pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
 #endif
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 #endif /* CONFIG_X86_64 */
@@ -796,7 +798,7 @@ void xen_mm_pin_all(void)
  * that's before we have page structures to store the bits.  So do all
  * the book-keeping now.
  */
-static __init int mark_pinned(struct page *page, enum pt_level level)
+static __init int xen_mark_pinned(struct page *page, enum pt_level level)
 {
 	SetPagePinned(page);
 	return 0;
@@ -804,10 +806,10 @@ static __init int mark_pinned(struct page *page, enum pt_level level)
 
 void __init xen_mark_init_mm_pinned(void)
 {
-	pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
+	xen_pgd_walk(init_mm.pgd, xen_mark_pinned, FIXADDR_TOP);
 }
 
-static int unpin_page(struct page *page, enum pt_level level)
+static int xen_unpin_page(struct page *page, enum pt_level level)
 {
 	unsigned pgfl = TestClearPagePinned(page);
 
@@ -825,7 +827,7 @@ static int unpin_page(struct page *page, enum pt_level level)
 		 * partially-pinned state.
 		 */
 		if (level == PT_PTE) {
-			ptl = lock_pte(page);
+			ptl = xen_pte_lock(page);
 
 			if (ptl)
 				xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
@@ -839,7 +841,7 @@ static int unpin_page(struct page *page, enum pt_level level)
 
 		if (ptl) {
 			/* unlock when batch completed */
-			xen_mc_callback(do_unlock, ptl);
+			xen_mc_callback(xen_pte_unlock, ptl);
 		}
 	}
 
@@ -859,17 +861,17 @@ static void xen_pgd_unpin(pgd_t *pgd)
 
 		if (user_pgd) {
 			xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
-			unpin_page(virt_to_page(user_pgd), PT_PGD);
+			xen_unpin_page(virt_to_page(user_pgd), PT_PGD);
 		}
 	}
 #endif
 
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is unpinned */
-	unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+	xen_unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
 #endif
 
-	pgd_walk(pgd, unpin_page, USER_LIMIT);
+	xen_pgd_walk(pgd, xen_unpin_page, USER_LIMIT);
 
 	xen_mc_issue(0);
 }
@@ -936,7 +938,7 @@ static void drop_other_mm_ref(void *info)
 	}
 }
 
-static void drop_mm_ref(struct mm_struct *mm)
+static void xen_drop_mm_ref(struct mm_struct *mm)
 {
 	cpumask_t mask;
 	unsigned cpu;
@@ -966,7 +968,7 @@ static void drop_mm_ref(struct mm_struct *mm)
 		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 }
 #else
-static void drop_mm_ref(struct mm_struct *mm)
+static void xen_drop_mm_ref(struct mm_struct *mm)
 {
 	if (current->active_mm == mm)
 		load_cr3(swapper_pg_dir);
@@ -990,13 +992,13 @@ static void drop_mm_ref(struct mm_struct *mm)
 void xen_exit_mmap(struct mm_struct *mm)
 {
 	get_cpu();		/* make sure we don't move around */
-	drop_mm_ref(mm);
+	xen_drop_mm_ref(mm);
 	put_cpu();
 
 	spin_lock(&mm->page_table_lock);
 
 	/* pgd may not be pinned in the error exit path of execve */
-	if (page_pinned(mm->pgd))
+	if (xen_page_pinned(mm->pgd))
 		xen_pgd_unpin(mm->pgd);
 
 	spin_unlock(&mm->page_table_lock);

From 168d2f464ab9860f0d1e66cf1f9684973222f1c6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 20 Aug 2008 17:02:18 -0700
Subject: [PATCH 047/142] xen: save previous spinlock when blocking

A spinlock can be interrupted while spinning, so make sure we preserve
the previous lock of interest if we're taking a lock from within an
interrupt handler.

We also need to deal with the case where the blocking path gets
interrupted between testing to see if the lock is free and actually
blocking.  If we get interrupted there and end up in the state where
the lock is free but the irq isn't pending, then we'll block
indefinitely in the hypervisor.  This fix is to make sure that any
nested lock-takers will always leave the irq pending if there's any
chance the outer lock became free.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/spinlock.c | 65 +++++++++++++++++++++++++++++++----------
 drivers/xen/events.c    | 25 ++++++++++++++++
 include/xen/events.h    |  2 ++
 3 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 8dc4d31da67f..4884bc603aa7 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -47,25 +47,41 @@ static int xen_spin_trylock(struct raw_spinlock *lock)
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
 
-static inline void spinning_lock(struct xen_spinlock *xl)
+/*
+ * Mark a cpu as interested in a lock.  Returns the CPU's previous
+ * lock of interest, in case we got preempted by an interrupt.
+ */
+static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
 {
+	struct xen_spinlock *prev;
+
+	prev = __get_cpu_var(lock_spinners);
 	__get_cpu_var(lock_spinners) = xl;
+
 	wmb();			/* set lock of interest before count */
+
 	asm(LOCK_PREFIX " incw %0"
 	    : "+m" (xl->spinners) : : "memory");
+
+	return prev;
 }
 
-static inline void unspinning_lock(struct xen_spinlock *xl)
+/*
+ * Mark a cpu as no longer interested in a lock.  Restores previous
+ * lock of interest (NULL for none).
+ */
+static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
 {
 	asm(LOCK_PREFIX " decw %0"
 	    : "+m" (xl->spinners) : : "memory");
-	wmb();			/* decrement count before clearing lock */
-	__get_cpu_var(lock_spinners) = NULL;
+	wmb();			/* decrement count before restoring lock */
+	__get_cpu_var(lock_spinners) = prev;
 }
 
 static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+	struct xen_spinlock *prev;
 	int irq = __get_cpu_var(lock_kicker_irq);
 	int ret;
 
@@ -74,23 +90,42 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 		return 0;
 
 	/* announce we're spinning */
-	spinning_lock(xl);
+	prev = spinning_lock(xl);
 
-	/* clear pending */
-	xen_clear_irq_pending(irq);
+	do {
+		/* clear pending */
+		xen_clear_irq_pending(irq);
 
-	/* check again make sure it didn't become free while
-	   we weren't looking  */
-	ret = xen_spin_trylock(lock);
-	if (ret)
-		goto out;
+		/* check again make sure it didn't become free while
+		   we weren't looking  */
+		ret = xen_spin_trylock(lock);
+		if (ret) {
+			/*
+			 * If we interrupted another spinlock while it
+			 * was blocking, make sure it doesn't block
+			 * without rechecking the lock.
+			 */
+			if (prev != NULL)
+				xen_set_irq_pending(irq);
+			goto out;
+		}
+
+		/*
+		 * Block until irq becomes pending.  If we're
+		 * interrupted at this point (after the trylock but
+		 * before entering the block), then the nested lock
+		 * handler guarantees that the irq will be left
+		 * pending if there's any chance the lock became free;
+		 * xen_poll_irq() returns immediately if the irq is
+		 * pending.
+		 */
+		xen_poll_irq(irq);
+	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
-	/* block until irq becomes pending */
-	xen_poll_irq(irq);
 	kstat_this_cpu.irqs[irq]++;
 
 out:
-	unspinning_lock(xl);
+	unspinning_lock(xl, prev);
 	return ret;
 }
 
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a0837036d898..b6c2b8f16bee 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -164,6 +164,12 @@ static inline void set_evtchn(int port)
 	sync_set_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline int test_evtchn(int port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	return sync_test_bit(port, &s->evtchn_pending[0]);
+}
+
 
 /**
  * notify_remote_via_irq - send event to remote end of event channel via irq
@@ -732,6 +738,25 @@ void xen_clear_irq_pending(int irq)
 		clear_evtchn(evtchn);
 }
 
+void xen_set_irq_pending(int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	if (VALID_EVTCHN(evtchn))
+		set_evtchn(evtchn);
+}
+
+bool xen_test_irq_pending(int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+	bool ret = false;
+
+	if (VALID_EVTCHN(evtchn))
+		ret = test_evtchn(evtchn);
+
+	return ret;
+}
+
 /* Poll waiting for an irq to become pending.  In the usual case, the
    irq will be disabled so it won't deliver an interrupt. */
 void xen_poll_irq(int irq)
diff --git a/include/xen/events.h b/include/xen/events.h
index 4680ff3fbc91..0d5f1adc0363 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -46,6 +46,8 @@ extern void xen_irq_resume(void);
 
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq);
+void xen_set_irq_pending(int irq);
+bool xen_test_irq_pending(int irq);
 
 /* Poll waiting for an irq to become pending.  In the usual case, the
    irq will be disabled so it won't deliver an interrupt. */

From 994025caba3e6beade9bde84dd1b70d9d250f27b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 20 Aug 2008 17:02:19 -0700
Subject: [PATCH 048/142] xen: add debugfs support

Add support for exporting statistics on mmu updates, multicall
batching and pv spinlocks into debugfs. The base path is xen/ and
each subsystem adds its own directory: mmu, multicalls, spinlocks.

In each directory, writing 1 to "zero_stats" will cause the
corresponding stats to be zeroed the next time they're updated.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/Kconfig      |  10 ++-
 arch/x86/xen/Makefile     |   3 +-
 arch/x86/xen/debugfs.c    | 123 ++++++++++++++++++++++++++++
 arch/x86/xen/debugfs.h    |  10 +++
 arch/x86/xen/mmu.c        | 163 ++++++++++++++++++++++++++++++++++++-
 arch/x86/xen/multicalls.c | 115 +++++++++++++++++++++++++-
 arch/x86/xen/spinlock.c   | 165 +++++++++++++++++++++++++++++++++++++-
 7 files changed, 580 insertions(+), 9 deletions(-)
 create mode 100644 arch/x86/xen/debugfs.c
 create mode 100644 arch/x86/xen/debugfs.h

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 3815e425f470..d3e68465ace9 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -27,4 +27,12 @@ config XEN_MAX_DOMAIN_MEMORY
 config XEN_SAVE_RESTORE
        bool
        depends on PM
-       default y
\ No newline at end of file
+       default y
+
+config XEN_DEBUG_FS
+	bool "Enable Xen debug and tuning parameters in debugfs"
+	depends on XEN && DEBUG_FS
+	default n
+	help
+	  Enable statistics output and various tuning options in debugfs.
+	  Enabling this option may incur a significant performance overhead.
\ No newline at end of file
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 9ee745fa5527..313947940a1a 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -8,4 +8,5 @@ endif
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm_$(BITS).o grant-table.o suspend.o
 
-obj-$(CONFIG_SMP)	+= smp.o spinlock.o
+obj-$(CONFIG_SMP)		+= smp.o spinlock.o
+obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
\ No newline at end of file
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
new file mode 100644
index 000000000000..b53225d2cac3
--- /dev/null
+++ b/arch/x86/xen/debugfs.c
@@ -0,0 +1,123 @@
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "debugfs.h"
+
+static struct dentry *d_xen_debug;
+
+struct dentry * __init xen_init_debugfs(void)
+{
+	if (!d_xen_debug) {
+		d_xen_debug = debugfs_create_dir("xen", NULL);
+
+		if (!d_xen_debug)
+			pr_warning("Could not create 'xen' debugfs directory\n");
+	}
+
+	return d_xen_debug;
+}
+
+struct array_data
+{
+	void *array;
+	unsigned elements;
+};
+
+static int u32_array_open(struct inode *inode, struct file *file)
+{
+	file->private_data = NULL;
+	return nonseekable_open(inode, file);
+}
+
+static size_t format_array(char *buf, size_t bufsize, const char *fmt,
+			   u32 *array, unsigned array_size)
+{
+	size_t ret = 0;
+	unsigned i;
+
+	for(i = 0; i < array_size; i++) {
+		size_t len;
+
+		len = snprintf(buf, bufsize, fmt, array[i]);
+		len++;	/* ' ' or '\n' */
+		ret += len;
+
+		if (buf) {
+			buf += len;
+			bufsize -= len;
+			buf[-1] = (i == array_size-1) ? '\n' : ' ';
+		}
+	}
+
+	ret++;		/* \0 */
+	if (buf)
+		*buf = '\0';
+
+	return ret;
+}
+
+static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
+{
+	size_t len = format_array(NULL, 0, fmt, array, array_size);
+	char *ret;
+
+	ret = kmalloc(len, GFP_KERNEL);
+	if (ret == NULL)
+		return NULL;
+
+	format_array(ret, len, fmt, array, array_size);
+	return ret;
+}
+
+static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
+			      loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct array_data *data = inode->i_private;
+	size_t size;
+
+	if (*ppos == 0) {
+		if (file->private_data) {
+			kfree(file->private_data);
+			file->private_data = NULL;
+		}
+
+		file->private_data = format_array_alloc("%u", data->array, data->elements);
+	}
+
+	size = 0;
+	if (file->private_data)
+		size = strlen(file->private_data);
+
+	return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
+}
+
+static int xen_array_release(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+
+	return 0;
+}
+
+static struct file_operations u32_array_fops = {
+	.owner	= THIS_MODULE,
+	.open	= u32_array_open,
+	.release= xen_array_release,
+	.read	= u32_array_read,
+};
+
+struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
+					    struct dentry *parent,
+					    u32 *array, unsigned elements)
+{
+	struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
+
+	if (data == NULL)
+		return NULL;
+
+	data->array = array;
+	data->elements = elements;
+
+	return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
+}
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h
new file mode 100644
index 000000000000..e28132084832
--- /dev/null
+++ b/arch/x86/xen/debugfs.h
@@ -0,0 +1,10 @@
+#ifndef _XEN_DEBUGFS_H
+#define _XEN_DEBUGFS_H
+
+struct dentry * __init xen_init_debugfs(void);
+
+struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
+					    struct dentry *parent,
+					    u32 *array, unsigned elements);
+
+#endif /* _XEN_DEBUGFS_H */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d9a35a363095..f5af913fd7b0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -40,6 +40,7 @@
  */
 #include <linux/sched.h>
 #include <linux/highmem.h>
+#include <linux/debugfs.h>
 #include <linux/bug.h>
 
 #include <asm/pgtable.h>
@@ -57,6 +58,61 @@
 
 #include "multicalls.h"
 #include "mmu.h"
+#include "debugfs.h"
+
+#define MMU_UPDATE_HISTO	30
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+static struct {
+	u32 pgd_update;
+	u32 pgd_update_pinned;
+	u32 pgd_update_batched;
+
+	u32 pud_update;
+	u32 pud_update_pinned;
+	u32 pud_update_batched;
+
+	u32 pmd_update;
+	u32 pmd_update_pinned;
+	u32 pmd_update_batched;
+
+	u32 pte_update;
+	u32 pte_update_pinned;
+	u32 pte_update_batched;
+
+	u32 mmu_update;
+	u32 mmu_update_extended;
+	u32 mmu_update_histo[MMU_UPDATE_HISTO];
+
+	u32 prot_commit;
+	u32 prot_commit_batched;
+
+	u32 set_pte_at;
+	u32 set_pte_at_batched;
+	u32 set_pte_at_pinned;
+	u32 set_pte_at_current;
+	u32 set_pte_at_kernel;
+} mmu_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+	if (unlikely(zero_stats)) {
+		memset(&mmu_stats, 0, sizeof(mmu_stats));
+		zero_stats = 0;
+	}
+}
+
+#define ADD_STATS(elem, val)			\
+	do { check_zero(); mmu_stats.elem += (val); } while(0)
+
+#else  /* !CONFIG_XEN_DEBUG_FS */
+
+#define ADD_STATS(elem, val)	do { (void)(val); } while(0)
+
+#endif /* CONFIG_XEN_DEBUG_FS */
 
 /*
  * Just beyond the highest usermode address.  STACK_TOP_MAX has a
@@ -243,11 +299,21 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
 
 	mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
 
-	if (mcs.mc != NULL)
+	if (mcs.mc != NULL) {
+		ADD_STATS(mmu_update_extended, 1);
+		ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
+
 		mcs.mc->args[1]++;
-	else {
+
+		if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
+			ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
+		else
+			ADD_STATS(mmu_update_histo[0], 1);
+	} else {
+		ADD_STATS(mmu_update, 1);
 		mcs = __xen_mc_entry(sizeof(*u));
 		MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
+		ADD_STATS(mmu_update_histo[1], 1);
 	}
 
 	u = mcs.args;
@@ -267,6 +333,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 	u.val = pmd_val_ma(val);
 	xen_extend_mmu_update(&u);
 
+	ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
+
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
 	preempt_enable();
@@ -274,6 +342,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 
 void xen_set_pmd(pmd_t *ptr, pmd_t val)
 {
+	ADD_STATS(pmd_update, 1);
+
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!xen_page_pinned(ptr)) {
@@ -281,6 +351,8 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
 		return;
 	}
 
+	ADD_STATS(pmd_update_pinned, 1);
+
 	xen_set_pmd_hyper(ptr, val);
 }
 
@@ -300,12 +372,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	if (mm == &init_mm)
 		preempt_disable();
 
+	ADD_STATS(set_pte_at, 1);
+//	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
+	ADD_STATS(set_pte_at_current, mm == current->mm);
+	ADD_STATS(set_pte_at_kernel, mm == &init_mm);
+
 	if (mm == current->mm || mm == &init_mm) {
 		if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
 			struct multicall_space mcs;
 			mcs = xen_mc_entry(0);
 
 			MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
+			ADD_STATS(set_pte_at_batched, 1);
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
 			goto out;
 		} else
@@ -336,6 +414,9 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 	u.val = pte_val_ma(pte);
 	xen_extend_mmu_update(&u);
 
+	ADD_STATS(prot_commit, 1);
+	ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
+
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 
@@ -402,6 +483,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 	u.val = pud_val_ma(val);
 	xen_extend_mmu_update(&u);
 
+	ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
+
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
 	preempt_enable();
@@ -409,6 +492,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 
 void xen_set_pud(pud_t *ptr, pud_t val)
 {
+	ADD_STATS(pud_update, 1);
+
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!xen_page_pinned(ptr)) {
@@ -416,11 +501,17 @@ void xen_set_pud(pud_t *ptr, pud_t val)
 		return;
 	}
 
+	ADD_STATS(pud_update_pinned, 1);
+
 	xen_set_pud_hyper(ptr, val);
 }
 
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
+	ADD_STATS(pte_update, 1);
+//	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
+	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
+
 #ifdef CONFIG_X86_PAE
 	ptep->pte_high = pte.pte_high;
 	smp_wmb();
@@ -517,6 +608,8 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
 {
 	pgd_t *user_ptr = xen_get_user_pgd(ptr);
 
+	ADD_STATS(pgd_update, 1);
+
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!xen_page_pinned(ptr)) {
@@ -528,6 +621,9 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
 		return;
 	}
 
+	ADD_STATS(pgd_update_pinned, 1);
+	ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
+
 	/* If it's pinned, then we can at least batch the kernel and
 	   user updates together. */
 	xen_mc_batch();
@@ -1003,3 +1099,66 @@ void xen_exit_mmap(struct mm_struct *mm)
 
 	spin_unlock(&mm->page_table_lock);
 }
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_mmu_debugfs(void)
+{
+	struct dentry *d_xen = xen_init_debugfs();
+
+	if (d_xen == NULL)
+		return -ENOMEM;
+
+	d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+	debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
+
+	debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
+	debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
+			   &mmu_stats.pgd_update_pinned);
+	debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
+			   &mmu_stats.pgd_update_pinned);
+
+	debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
+	debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
+			   &mmu_stats.pud_update_pinned);
+	debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
+			   &mmu_stats.pud_update_pinned);
+
+	debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
+	debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
+			   &mmu_stats.pmd_update_pinned);
+	debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
+			   &mmu_stats.pmd_update_pinned);
+
+	debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
+//	debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
+//			   &mmu_stats.pte_update_pinned);
+	debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
+			   &mmu_stats.pte_update_pinned);
+
+	debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
+	debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
+			   &mmu_stats.mmu_update_extended);
+	xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
+				     mmu_stats.mmu_update_histo, 20);
+
+	debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
+	debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
+			   &mmu_stats.set_pte_at_batched);
+	debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
+			   &mmu_stats.set_pte_at_current);
+	debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
+			   &mmu_stats.set_pte_at_kernel);
+
+	debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
+	debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
+			   &mmu_stats.prot_commit_batched);
+
+	return 0;
+}
+fs_initcall(xen_mmu_debugfs);
+
+#endif	/* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 9efd1c6c9776..8ea8a0d0b0de 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -21,16 +21,20 @@
  */
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
+#include <linux/debugfs.h>
 
 #include <asm/xen/hypercall.h>
 
 #include "multicalls.h"
+#include "debugfs.h"
+
+#define MC_BATCH	32
 
 #define MC_DEBUG	1
 
-#define MC_BATCH	32
 #define MC_ARGS		(MC_BATCH * 16)
 
+
 struct mc_buffer {
 	struct multicall_entry entries[MC_BATCH];
 #if MC_DEBUG
@@ -47,6 +51,76 @@ struct mc_buffer {
 static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
 DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
 
+/* flush reasons 0- slots, 1- args, 2- callbacks */
+enum flush_reasons
+{
+	FL_SLOTS,
+	FL_ARGS,
+	FL_CALLBACKS,
+
+	FL_N_REASONS
+};
+
+#ifdef CONFIG_XEN_DEBUG_FS
+#define NHYPERCALLS	40		/* not really */
+
+static struct {
+	unsigned histo[MC_BATCH+1];
+
+	unsigned issued;
+	unsigned arg_total;
+	unsigned hypercalls;
+	unsigned histo_hypercalls[NHYPERCALLS];
+
+	unsigned flush[FL_N_REASONS];
+} mc_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+	if (unlikely(zero_stats)) {
+		memset(&mc_stats, 0, sizeof(mc_stats));
+		zero_stats = 0;
+	}
+}
+
+static void mc_add_stats(const struct mc_buffer *mc)
+{
+	int i;
+
+	check_zero();
+
+	mc_stats.issued++;
+	mc_stats.hypercalls += mc->mcidx;
+	mc_stats.arg_total += mc->argidx;
+
+	mc_stats.histo[mc->mcidx]++;
+	for(i = 0; i < mc->mcidx; i++) {
+		unsigned op = mc->entries[i].op;
+		if (op < NHYPERCALLS)
+			mc_stats.histo_hypercalls[op]++;
+	}
+}
+
+static void mc_stats_flush(enum flush_reasons idx)
+{
+	check_zero();
+
+	mc_stats.flush[idx]++;
+}
+
+#else  /* !CONFIG_XEN_DEBUG_FS */
+
+static inline void mc_add_stats(const struct mc_buffer *mc)
+{
+}
+
+static inline void mc_stats_flush(enum flush_reasons idx)
+{
+}
+#endif	/* CONFIG_XEN_DEBUG_FS */
+
 void xen_mc_flush(void)
 {
 	struct mc_buffer *b = &__get_cpu_var(mc_buffer);
@@ -60,6 +134,8 @@ void xen_mc_flush(void)
 	   something in the middle */
 	local_irq_save(flags);
 
+	mc_add_stats(b);
+
 	if (b->mcidx) {
 #if MC_DEBUG
 		memcpy(b->debug, b->entries,
@@ -115,6 +191,7 @@ struct multicall_space __xen_mc_entry(size_t args)
 
 	if (b->mcidx == MC_BATCH ||
 	    (argidx + args) > MC_ARGS) {
+		mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS);
 		xen_mc_flush();
 		argidx = roundup(b->argidx, sizeof(u64));
 	}
@@ -158,10 +235,44 @@ void xen_mc_callback(void (*fn)(void *), void *data)
 	struct mc_buffer *b = &__get_cpu_var(mc_buffer);
 	struct callback *cb;
 
-	if (b->cbidx == MC_BATCH)
+	if (b->cbidx == MC_BATCH) {
+		mc_stats_flush(FL_CALLBACKS);
 		xen_mc_flush();
+	}
 
 	cb = &b->callbacks[b->cbidx++];
 	cb->fn = fn;
 	cb->data = data;
 }
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+static struct dentry *d_mc_debug;
+
+static int __init xen_mc_debugfs(void)
+{
+	struct dentry *d_xen = xen_init_debugfs();
+
+	if (d_xen == NULL)
+		return -ENOMEM;
+
+	d_mc_debug = debugfs_create_dir("multicalls", d_xen);
+
+	debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats);
+
+	debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued);
+	debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls);
+	debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total);
+
+	xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug,
+				     mc_stats.histo, MC_BATCH);
+	xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug,
+				     mc_stats.histo_hypercalls, NHYPERCALLS);
+	xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug,
+				     mc_stats.flush, FL_N_REASONS);
+
+	return 0;
+}
+fs_initcall(xen_mc_debugfs);
+
+#endif	/* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 4884bc603aa7..0d8f3b2d9bec 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -4,6 +4,8 @@
  */
 #include <linux/kernel_stat.h>
 #include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/log2.h>
 
 #include <asm/paravirt.h>
 
@@ -11,6 +13,93 @@
 #include <xen/events.h>
 
 #include "xen-ops.h"
+#include "debugfs.h"
+
+#ifdef CONFIG_XEN_DEBUG_FS
+static struct xen_spinlock_stats
+{
+	u64 taken;
+	u32 taken_slow;
+	u32 taken_slow_nested;
+	u32 taken_slow_pickup;
+	u32 taken_slow_spurious;
+
+	u64 released;
+	u32 released_slow;
+	u32 released_slow_kicked;
+
+#define HISTO_BUCKETS	20
+	u32 histo_spin_fast[HISTO_BUCKETS+1];
+	u32 histo_spin[HISTO_BUCKETS+1];
+
+	u64 spinning_time;
+	u64 total_time;
+} spinlock_stats;
+
+static u8 zero_stats;
+
+static unsigned lock_timeout = 1 << 10;
+#define TIMEOUT lock_timeout
+
+static inline void check_zero(void)
+{
+	if (unlikely(zero_stats)) {
+		memset(&spinlock_stats, 0, sizeof(spinlock_stats));
+		zero_stats = 0;
+	}
+}
+
+#define ADD_STATS(elem, val)			\
+	do { check_zero(); spinlock_stats.elem += (val); } while(0)
+
+static inline u64 spin_time_start(void)
+{
+	return xen_clocksource_read();
+}
+
+static void __spin_time_accum(u64 delta, u32 *array)
+{
+	unsigned index = ilog2(delta);
+
+	check_zero();
+
+	if (index < HISTO_BUCKETS)
+		array[index]++;
+	else
+		array[HISTO_BUCKETS]++;
+}
+
+static inline void spin_time_accum_fast(u64 start)
+{
+	u32 delta = xen_clocksource_read() - start;
+
+	__spin_time_accum(delta, spinlock_stats.histo_spin_fast);
+	spinlock_stats.spinning_time += delta;
+}
+
+static inline void spin_time_accum(u64 start)
+{
+	u32 delta = xen_clocksource_read() - start;
+
+	__spin_time_accum(delta, spinlock_stats.histo_spin);
+	spinlock_stats.total_time += delta;
+}
+#else  /* !CONFIG_XEN_DEBUG_FS */
+#define TIMEOUT			(1 << 10)
+#define ADD_STATS(elem, val)	do { (void)(val); } while(0)
+
+static inline u64 spin_time_start(void)
+{
+	return 0;
+}
+
+static inline void spin_time_accum_fast(u64 start)
+{
+}
+static inline void spin_time_accum(u64 start)
+{
+}
+#endif  /* CONFIG_XEN_DEBUG_FS */
 
 struct xen_spinlock {
 	unsigned char lock;		/* 0 -> free; 1 -> locked */
@@ -92,6 +181,9 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 	/* announce we're spinning */
 	prev = spinning_lock(xl);
 
+	ADD_STATS(taken_slow, 1);
+	ADD_STATS(taken_slow_nested, prev != NULL);
+
 	do {
 		/* clear pending */
 		xen_clear_irq_pending(irq);
@@ -100,6 +192,8 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 		   we weren't looking  */
 		ret = xen_spin_trylock(lock);
 		if (ret) {
+			ADD_STATS(taken_slow_pickup, 1);
+
 			/*
 			 * If we interrupted another spinlock while it
 			 * was blocking, make sure it doesn't block
@@ -120,6 +214,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 		 * pending.
 		 */
 		xen_poll_irq(irq);
+		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
 	kstat_this_cpu.irqs[irq]++;
@@ -132,11 +227,18 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 static void xen_spin_lock(struct raw_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	int timeout;
+	unsigned timeout;
 	u8 oldval;
+	u64 start_spin;
+
+	ADD_STATS(taken, 1);
+
+	start_spin = spin_time_start();
 
 	do {
-		timeout = 1 << 10;
+		u64 start_spin_fast = spin_time_start();
+
+		timeout = TIMEOUT;
 
 		asm("1: xchgb %1,%0\n"
 		    "   testb %1,%1\n"
@@ -151,16 +253,22 @@ static void xen_spin_lock(struct raw_spinlock *lock)
 		    : "1" (1)
 		    : "memory");
 
-	} while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
+		spin_time_accum_fast(start_spin_fast);
+	} while (unlikely(oldval != 0 && (TIMEOUT == ~0 || !xen_spin_lock_slow(lock))));
+
+	spin_time_accum(start_spin);
 }
 
 static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
 {
 	int cpu;
 
+	ADD_STATS(released_slow, 1);
+
 	for_each_online_cpu(cpu) {
 		/* XXX should mix up next cpu selection */
 		if (per_cpu(lock_spinners, cpu) == xl) {
+			ADD_STATS(released_slow_kicked, 1);
 			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
 			break;
 		}
@@ -171,6 +279,8 @@ static void xen_spin_unlock(struct raw_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 
+	ADD_STATS(released, 1);
+
 	smp_wmb();		/* make sure no writes get moved after unlock */
 	xl->lock = 0;		/* release lock */
 
@@ -216,3 +326,52 @@ void __init xen_init_spinlocks(void)
 	pv_lock_ops.spin_trylock = xen_spin_trylock;
 	pv_lock_ops.spin_unlock = xen_spin_unlock;
 }
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+static struct dentry *d_spin_debug;
+
+static int __init xen_spinlock_debugfs(void)
+{
+	struct dentry *d_xen = xen_init_debugfs();
+
+	if (d_xen == NULL)
+		return -ENOMEM;
+
+	d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
+
+	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
+
+	debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
+
+	debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
+	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
+			   &spinlock_stats.taken_slow);
+	debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
+			   &spinlock_stats.taken_slow_nested);
+	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
+			   &spinlock_stats.taken_slow_pickup);
+	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
+			   &spinlock_stats.taken_slow_spurious);
+
+	debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
+	debugfs_create_u32("released_slow", 0444, d_spin_debug,
+			   &spinlock_stats.released_slow);
+	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
+			   &spinlock_stats.released_slow_kicked);
+
+	debugfs_create_u64("time_spinning", 0444, d_spin_debug,
+			   &spinlock_stats.spinning_time);
+	debugfs_create_u64("time_total", 0444, d_spin_debug,
+			   &spinlock_stats.total_time);
+
+	xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
+				     spinlock_stats.histo_spin, HISTO_BUCKETS + 1);
+	xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
+				     spinlock_stats.histo_spin_fast, HISTO_BUCKETS + 1);
+
+	return 0;
+}
+fs_initcall(xen_spinlock_debugfs);
+
+#endif	/* CONFIG_XEN_DEBUG_FS */

From 1e696f638b523958ee3d95e655becdb4c4b6fcde Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 20 Aug 2008 17:02:20 -0700
Subject: [PATCH 049/142] xen: allow interrupts to be enabled while doing a
 blocking spin

If spin_lock is called in an interrupts-enabled context, we can safely
enable interrupts while spinning.  We don't bother for the actual spin
loop, but if we timeout and fall back to blocking, it's definitely
worthwhile enabling interrupts if possible.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/spinlock.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 0d8f3b2d9bec..e6061f2e64f2 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -23,6 +23,7 @@ static struct xen_spinlock_stats
 	u32 taken_slow_nested;
 	u32 taken_slow_pickup;
 	u32 taken_slow_spurious;
+	u32 taken_slow_irqenable;
 
 	u64 released;
 	u32 released_slow;
@@ -167,12 +168,13 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock
 	__get_cpu_var(lock_spinners) = prev;
 }
 
-static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
+static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	struct xen_spinlock *prev;
 	int irq = __get_cpu_var(lock_kicker_irq);
 	int ret;
+	unsigned long flags;
 
 	/* If kicker interrupts not initialized yet, just spin */
 	if (irq == -1)
@@ -181,6 +183,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 	/* announce we're spinning */
 	prev = spinning_lock(xl);
 
+	flags = __raw_local_save_flags();
+	if (irq_enable) {
+		ADD_STATS(taken_slow_irqenable, 1);
+		raw_local_irq_enable();
+	}
+
 	ADD_STATS(taken_slow, 1);
 	ADD_STATS(taken_slow_nested, prev != NULL);
 
@@ -220,11 +228,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
 	kstat_this_cpu.irqs[irq]++;
 
 out:
+	raw_local_irq_restore(flags);
 	unspinning_lock(xl, prev);
 	return ret;
 }
 
-static void xen_spin_lock(struct raw_spinlock *lock)
+static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	unsigned timeout;
@@ -254,11 +263,23 @@ static void xen_spin_lock(struct raw_spinlock *lock)
 		    : "memory");
 
 		spin_time_accum_fast(start_spin_fast);
-	} while (unlikely(oldval != 0 && (TIMEOUT == ~0 || !xen_spin_lock_slow(lock))));
+
+	} while (unlikely(oldval != 0 &&
+			  (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
 
 	spin_time_accum(start_spin);
 }
 
+static void xen_spin_lock(struct raw_spinlock *lock)
+{
+	__xen_spin_lock(lock, false);
+}
+
+static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+{
+	__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
+}
+
 static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
 {
 	int cpu;
@@ -323,6 +344,7 @@ void __init xen_init_spinlocks(void)
 	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
 	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
 	pv_lock_ops.spin_lock = xen_spin_lock;
+	pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
 	pv_lock_ops.spin_trylock = xen_spin_trylock;
 	pv_lock_ops.spin_unlock = xen_spin_unlock;
 }
@@ -353,6 +375,8 @@ static int __init xen_spinlock_debugfs(void)
 			   &spinlock_stats.taken_slow_pickup);
 	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
 			   &spinlock_stats.taken_slow_spurious);
+	debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
+			   &spinlock_stats.taken_slow_irqenable);
 
 	debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
 	debugfs_create_u32("released_slow", 0444, d_spin_debug,

From f8eca41f0afcc7042dc3398c8ba5878c59b1cf1b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 20 Aug 2008 17:02:21 -0700
Subject: [PATCH 050/142] xen: measure how long spinlocks spend blocking

Measure how long spinlocks spend blocked.  Also rename some fields to
be more consistent.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/spinlock.c | 60 ++++++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index e6061f2e64f2..d072823bc06d 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -29,12 +29,14 @@ static struct xen_spinlock_stats
 	u32 released_slow;
 	u32 released_slow_kicked;
 
-#define HISTO_BUCKETS	20
-	u32 histo_spin_fast[HISTO_BUCKETS+1];
-	u32 histo_spin[HISTO_BUCKETS+1];
+#define HISTO_BUCKETS	30
+	u32 histo_spin_total[HISTO_BUCKETS+1];
+	u32 histo_spin_spinning[HISTO_BUCKETS+1];
+	u32 histo_spin_blocked[HISTO_BUCKETS+1];
 
-	u64 spinning_time;
-	u64 total_time;
+	u64 time_total;
+	u64 time_spinning;
+	u64 time_blocked;
 } spinlock_stats;
 
 static u8 zero_stats;
@@ -70,20 +72,28 @@ static void __spin_time_accum(u64 delta, u32 *array)
 		array[HISTO_BUCKETS]++;
 }
 
-static inline void spin_time_accum_fast(u64 start)
+static inline void spin_time_accum_spinning(u64 start)
 {
 	u32 delta = xen_clocksource_read() - start;
 
-	__spin_time_accum(delta, spinlock_stats.histo_spin_fast);
-	spinlock_stats.spinning_time += delta;
+	__spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
+	spinlock_stats.time_spinning += delta;
 }
 
-static inline void spin_time_accum(u64 start)
+static inline void spin_time_accum_total(u64 start)
 {
 	u32 delta = xen_clocksource_read() - start;
 
-	__spin_time_accum(delta, spinlock_stats.histo_spin);
-	spinlock_stats.total_time += delta;
+	__spin_time_accum(delta, spinlock_stats.histo_spin_total);
+	spinlock_stats.time_total += delta;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+	u32 delta = xen_clocksource_read() - start;
+
+	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
+	spinlock_stats.time_blocked += delta;
 }
 #else  /* !CONFIG_XEN_DEBUG_FS */
 #define TIMEOUT			(1 << 10)
@@ -94,10 +104,13 @@ static inline u64 spin_time_start(void)
 	return 0;
 }
 
-static inline void spin_time_accum_fast(u64 start)
+static inline void spin_time_accum_total(u64 start)
 {
 }
-static inline void spin_time_accum(u64 start)
+static inline void spin_time_accum_spinning(u64 start)
+{
+}
+static inline void spin_time_accum_blocked(u64 start)
 {
 }
 #endif  /* CONFIG_XEN_DEBUG_FS */
@@ -175,11 +188,14 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 	int irq = __get_cpu_var(lock_kicker_irq);
 	int ret;
 	unsigned long flags;
+	u64 start;
 
 	/* If kicker interrupts not initialized yet, just spin */
 	if (irq == -1)
 		return 0;
 
+	start = spin_time_start();
+
 	/* announce we're spinning */
 	prev = spinning_lock(xl);
 
@@ -230,6 +246,8 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 out:
 	raw_local_irq_restore(flags);
 	unspinning_lock(xl, prev);
+	spin_time_accum_blocked(start);
+
 	return ret;
 }
 
@@ -262,12 +280,12 @@ static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
 		    : "1" (1)
 		    : "memory");
 
-		spin_time_accum_fast(start_spin_fast);
+		spin_time_accum_spinning(start_spin_fast);
 
 	} while (unlikely(oldval != 0 &&
 			  (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
 
-	spin_time_accum(start_spin);
+	spin_time_accum_total(start_spin);
 }
 
 static void xen_spin_lock(struct raw_spinlock *lock)
@@ -385,14 +403,18 @@ static int __init xen_spinlock_debugfs(void)
 			   &spinlock_stats.released_slow_kicked);
 
 	debugfs_create_u64("time_spinning", 0444, d_spin_debug,
-			   &spinlock_stats.spinning_time);
+			   &spinlock_stats.time_spinning);
+	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
+			   &spinlock_stats.time_blocked);
 	debugfs_create_u64("time_total", 0444, d_spin_debug,
-			   &spinlock_stats.total_time);
+			   &spinlock_stats.time_total);
 
 	xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
-				     spinlock_stats.histo_spin, HISTO_BUCKETS + 1);
+				     spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
 	xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
-				     spinlock_stats.histo_spin_fast, HISTO_BUCKETS + 1);
+				     spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
+	xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+				     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 
 	return 0;
 }

From 5b792d320f28ff83dd4c13f984807e26235f7703 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 21 Aug 2008 13:43:51 -0700
Subject: [PATCH 051/142] x86, microcode_amd: fix shift warning

microcode_amd.c uses ">> 32" on a 32-bit value, so gcc warns about that.
The code could use something like this *untested* patch.

linux-next-20080821/arch/x86/kernel/microcode_amd.c:229: warning: right shift count >= width of type

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 4006e5e3adf0..d606a05545ca 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -206,6 +206,7 @@ static void apply_microcode_amd(int cpu)
 	unsigned int rev;
 	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+	unsigned long addr;
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu_num != cpu);
@@ -215,10 +216,9 @@ static void apply_microcode_amd(int cpu)
 
 	spin_lock_irqsave(&microcode_update_lock, flags);
 
-	edx = (unsigned int)(((unsigned long)
-			      &(uci->mc.mc_amd->hdr.data_code)) >> 32);
-	eax = (unsigned int)(((unsigned long)
-			      &(uci->mc.mc_amd->hdr.data_code)) & 0xffffffffL);
+	addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code;
+	edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
+	eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
 
 	asm volatile("movl %0, %%ecx; wrmsr" :
 		     : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");

From ee7686bc043921a488d9bf89fe93241c5457a74e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 21 Aug 2008 13:17:56 -0700
Subject: [PATCH 052/142] x86: build fix in "xen spinlock updates and
 performance measurements"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ingo Molnar wrote:
> -tip testing found this build failure:
>
>  arch/x86/xen/spinlock.c: In function ‘spin_time_start’:
>  arch/x86/xen/spinlock.c:60: error: implicit declaration of function ‘xen_clocksource_read’
>
> i've excluded these new commits for now from tip/master - could you
> please send a delta fix against tip/x86/xen?

Make xen_clocksource_read non-static.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/time.c    | 4 +---
 arch/x86/xen/xen-ops.h | 2 ++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 685b77470fc3..20182d9072c4 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -30,8 +30,6 @@
 #define TIMER_SLOP	100000
 #define NS_PER_TICK	(1000000000LL / HZ)
 
-static cycle_t xen_clocksource_read(void);
-
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
@@ -213,7 +211,7 @@ unsigned long xen_tsc_khz(void)
 	return xen_khz;
 }
 
-static cycle_t xen_clocksource_read(void)
+cycle_t xen_clocksource_read(void)
 {
         struct pvclock_vcpu_time_info *src;
 	cycle_t ret;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3c70ebc50b1b..1e8bfdaa20d3 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -2,6 +2,7 @@
 #define XEN_OPS_H
 
 #include <linux/init.h>
+#include <linux/clocksource.h>
 #include <linux/irqreturn.h>
 #include <xen/xen-ops.h>
 
@@ -33,6 +34,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
+cycle_t xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 unsigned long xen_tsc_khz(void);
 void __init xen_time_init(void);

From 25258ef762bc4a05fa9c4523f7dae56e3fd01864 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 20 Aug 2008 11:31:07 -0700
Subject: [PATCH 053/142] x86: export pv_lock_ops non-GPL

None of the spinlock API is exported GPL, so there's no reason for
pv_lock_ops to be.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: drago01 <drago01@gmail.com>
---
 arch/x86/kernel/paravirt-spinlocks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 206359a8e43f..0e9f1982b1dd 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -23,7 +23,7 @@ struct pv_lock_ops pv_lock_ops = {
 	.spin_unlock = __ticket_spin_unlock,
 #endif
 };
-EXPORT_SYMBOL_GPL(pv_lock_ops);
+EXPORT_SYMBOL(pv_lock_ops);
 
 void __init paravirt_use_bytelocks(void)
 {

From 93be71b672f167b1e8c23725114f86305354f0ac Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Fri, 22 Aug 2008 11:52:11 +0100
Subject: [PATCH 054/142] x86: add cpu hotplug hooks into smp_ops

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c |  4 ++--
 arch/x86/kernel/process_64.c |  4 ++--
 arch/x86/kernel/smp.c        |  6 +++++-
 arch/x86/kernel/smpboot.c    |  8 ++++----
 include/asm-x86/smp.h        | 28 ++++++++++++++++++++++++----
 5 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1ddcc0bc..e382fe0ccd66 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -91,7 +91,7 @@ static void cpu_exit_clear(void)
 }
 
 /* We don't actually take CPU down, just spin without interrupts. */
-static inline void play_dead(void)
+void native_play_dead(void)
 {
 	/* This must be done before dead CPU ack */
 	cpu_exit_clear();
@@ -107,7 +107,7 @@ static inline void play_dead(void)
 	wbinvd_halt();
 }
 #else
-static inline void play_dead(void)
+void native_play_dead(void)
 {
 	BUG();
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b664e2a..dfd3f5752085 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -90,7 +90,7 @@ DECLARE_PER_CPU(int, cpu_state);
 
 #include <asm/nmi.h>
 /* We halt the CPU with physical CPU hotplug */
-static inline void play_dead(void)
+void native_play_dead(void)
 {
 	idle_task_exit();
 	mb();
@@ -102,7 +102,7 @@ static inline void play_dead(void)
 	wbinvd_halt();
 }
 #else
-static inline void play_dead(void)
+void native_play_dead(void)
 {
 	BUG();
 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 361b7a4c640c..18f9b19f5f8f 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
 struct smp_ops smp_ops = {
 	.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = native_smp_prepare_cpus,
-	.cpu_up = native_cpu_up,
 	.smp_cpus_done = native_smp_cpus_done,
 
 	.smp_send_stop = native_smp_send_stop,
 	.smp_send_reschedule = native_smp_send_reschedule,
 
+	.cpu_up = native_cpu_up,
+	.cpu_die = native_cpu_die,
+	.cpu_disable = native_cpu_disable,
+	.play_dead = native_play_dead,
+
 	.send_call_func_ipi = native_send_call_func_ipi,
 	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b3f916..c414cee296ba 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1346,7 +1346,7 @@ static void __ref remove_cpu_from_maps(int cpu)
 	numa_remove_cpu(cpu);
 }
 
-int __cpu_disable(void)
+int native_cpu_disable(void)
 {
 	int cpu = smp_processor_id();
 
@@ -1385,7 +1385,7 @@ int __cpu_disable(void)
 	return 0;
 }
 
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
 {
 	/* We don't do anything here: idle task is faking death itself. */
 	unsigned int i;
@@ -1403,12 +1403,12 @@ void __cpu_die(unsigned int cpu)
 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
 #else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
+int native_cpu_disable(void)
 {
 	return -ENOSYS;
 }
 
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
 {
 	/* We said "no" in __cpu_disable */
 	BUG();
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 3c877f74f279..dbf4249e2a6d 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -47,12 +47,16 @@ extern struct {
 struct smp_ops {
 	void (*smp_prepare_boot_cpu)(void);
 	void (*smp_prepare_cpus)(unsigned max_cpus);
-	int (*cpu_up)(unsigned cpu);
 	void (*smp_cpus_done)(unsigned max_cpus);
 
 	void (*smp_send_stop)(void);
 	void (*smp_send_reschedule)(int cpu);
 
+	int (*cpu_up)(unsigned cpu);
+	int (*cpu_disable)(void);
+	void (*cpu_die)(unsigned int cpu);
+	void (*play_dead)(void);
+
 	void (*send_call_func_ipi)(cpumask_t mask);
 	void (*send_call_func_single_ipi)(int cpu);
 };
@@ -91,6 +95,21 @@ static inline int __cpu_up(unsigned int cpu)
 	return smp_ops.cpu_up(cpu);
 }
 
+static inline int __cpu_disable(void)
+{
+	return smp_ops.cpu_disable();
+}
+
+static inline void __cpu_die(unsigned int cpu)
+{
+	smp_ops.cpu_die(cpu);
+}
+
+static inline void play_dead(void)
+{
+	smp_ops.play_dead();
+}
+
 static inline void smp_send_reschedule(int cpu)
 {
 	smp_ops.smp_send_reschedule(cpu);
@@ -110,12 +129,13 @@ void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
 int native_cpu_up(unsigned int cpunum);
+int native_cpu_disable(void);
+void native_cpu_die(unsigned int cpu);
+void native_play_dead(void);
+
 void native_send_call_func_ipi(cpumask_t mask);
 void native_send_call_func_single_ipi(int cpu);
 
-extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-
 void smp_store_cpu_info(int id);
 #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 

From 379002586368ae22916f668011c9118c8ce8189c Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Fri, 22 Aug 2008 11:52:12 +0100
Subject: [PATCH 055/142] x86_32: clean up play_dead

The removal of the CPU from the various maps was redundant as it already
happened in cpu_disable.

After cleaning this up, cpu_uninit only resets the tlb state, so rename
it and create a noop version for the X86_64 case (so the two play_deads
can be unified later).

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c |  6 +-----
 arch/x86/kernel/process_32.c | 17 ++++-------------
 include/asm-x86/smp.h        |  7 ++++++-
 3 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80ab20d4fa39..18f5551b32dd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -714,14 +714,10 @@ void __cpuinit cpu_init(void)
 	mxcsr_feature_mask_init();
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-void __cpuinit cpu_uninit(void)
+void reset_lazy_tlbstate(void)
 {
 	int cpu = raw_smp_processor_id();
-	cpu_clear(cpu, cpu_initialized);
 
-	/* lazy TLB state */
 	per_cpu(cpu_tlbstate, cpu).state = 0;
 	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
 }
-#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e382fe0ccd66..d55eb49584b1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -75,26 +75,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 #ifdef CONFIG_HOTPLUG_CPU
 #include <asm/nmi.h>
 
-static void cpu_exit_clear(void)
+/* We don't actually take CPU down, just spin without interrupts. */
+void native_play_dead(void)
 {
 	int cpu = raw_smp_processor_id();
 
 	idle_task_exit();
 
-	cpu_uninit();
+	reset_lazy_tlbstate();
+
 	irq_ctx_exit(cpu);
 
-	cpu_clear(cpu, cpu_callout_map);
-	cpu_clear(cpu, cpu_callin_map);
-
-	numa_remove_cpu(cpu);
-}
-
-/* We don't actually take CPU down, just spin without interrupts. */
-void native_play_dead(void)
-{
-	/* This must be done before dead CPU ack */
-	cpu_exit_clear();
 	mb();
 	/* Ack it */
 	__get_cpu_var(cpu_state) = CPU_DEAD;
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index dbf4249e2a6d..3bec6b478213 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -221,7 +221,12 @@ static inline int hard_smp_processor_id(void)
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_HOTPLUG_CPU
-extern void cpu_uninit(void);
+#ifdef CONFIG_X86_32
+extern void reset_lazy_tlbstate(void);
+#else
+static inline void reset_lazy_tlbstate(void)
+{ }
+#endif /* CONFIG_X86_32 */
 #endif
 
 #endif /* __ASSEMBLY__ */

From a21f5d88c17a40941f6239d1959d89e8493e8e01 Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Fri, 22 Aug 2008 11:52:13 +0100
Subject: [PATCH 056/142] x86: unify x86_32 and x86_64 play_dead into one
 function

Add the new play_dead into smpboot.c, as it fits more cleanly in there
alongside other CONFIG_HOTPLUG functions.

Separate out the common code into its own function.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c | 32 --------------------------------
 arch/x86/kernel/process_64.c | 23 -----------------------
 arch/x86/kernel/smpboot.c    | 29 +++++++++++++++++++++++++++++
 include/asm-x86/smp.h        |  1 +
 4 files changed, 30 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d55eb49584b1..633cf06578fa 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -72,38 +72,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 	return ((unsigned long *)tsk->thread.sp)[3];
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-#include <asm/nmi.h>
-
-/* We don't actually take CPU down, just spin without interrupts. */
-void native_play_dead(void)
-{
-	int cpu = raw_smp_processor_id();
-
-	idle_task_exit();
-
-	reset_lazy_tlbstate();
-
-	irq_ctx_exit(cpu);
-
-	mb();
-	/* Ack it */
-	__get_cpu_var(cpu_state) = CPU_DEAD;
-
-	/*
-	 * With physical CPU hotplug, we should halt the cpu
-	 */
-	local_irq_disable();
-	/* mask all interrupts, flush any and all caches, and halt */
-	wbinvd_halt();
-}
-#else
-void native_play_dead(void)
-{
-	BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dfd3f5752085..aa28ed01cdf3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -85,29 +85,6 @@ void exit_idle(void)
 	__exit_idle();
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-DECLARE_PER_CPU(int, cpu_state);
-
-#include <asm/nmi.h>
-/* We halt the CPU with physical CPU hotplug */
-void native_play_dead(void)
-{
-	idle_task_exit();
-	mb();
-	/* Ack it */
-	__get_cpu_var(cpu_state) = CPU_DEAD;
-
-	local_irq_disable();
-	/* mask all interrupts, flush any and all caches, and halt */
-	wbinvd_halt();
-}
-#else
-void native_play_dead(void)
-{
-	BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c414cee296ba..28b4287296aa 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1402,6 +1402,29 @@ void native_cpu_die(unsigned int cpu)
 	}
 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
+
+void play_dead_common(void)
+{
+	idle_task_exit();
+	reset_lazy_tlbstate();
+	irq_ctx_exit(raw_smp_processor_id());
+
+	mb();
+	/* Ack it */
+	__get_cpu_var(cpu_state) = CPU_DEAD;
+
+	/*
+	 * With physical CPU hotplug, we should halt the cpu
+	 */
+	local_irq_disable();
+}
+
+void native_play_dead(void)
+{
+	play_dead_common();
+	wbinvd_halt();
+}
+
 #else /* ... !CONFIG_HOTPLUG_CPU */
 int native_cpu_disable(void)
 {
@@ -1413,4 +1436,10 @@ void native_cpu_die(unsigned int cpu)
 	/* We said "no" in __cpu_disable */
 	BUG();
 }
+
+void native_play_dead(void)
+{
+	BUG();
+}
+
 #endif
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 3bec6b478213..17305e72f348 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -132,6 +132,7 @@ int native_cpu_up(unsigned int cpunum);
 int native_cpu_disable(void);
 void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
+void play_dead_common(void);
 
 void native_send_call_func_ipi(cpumask_t mask);
 void native_send_call_func_single_ipi(int cpu);

From 8227dce7dc2cfdcc28ee0eadfb482a7ee77fba03 Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Fri, 22 Aug 2008 11:52:14 +0100
Subject: [PATCH 057/142] x86: separate generic cpu disabling code from APIC
 writes in cpu_disable

It allows paravirt implementations of cpu_disable to share the
cpu_disable_common code, without having to take on board APIC
writes, which may not be appropriate.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 40 ++++++++++++++++++++++-----------------
 include/asm-x86/smp.h     |  1 +
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 28b4287296aa..66b04e598817 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1346,6 +1346,28 @@ static void __ref remove_cpu_from_maps(int cpu)
 	numa_remove_cpu(cpu);
 }
 
+void cpu_disable_common(void)
+{
+	int cpu = smp_processor_id();
+	/*
+	 * HACK:
+	 * Allow any queued timer interrupts to get serviced
+	 * This is only a temporary solution until we cleanup
+	 * fixup_irqs as we do for IA64.
+	 */
+	local_irq_enable();
+	mdelay(1);
+
+	local_irq_disable();
+	remove_siblinginfo(cpu);
+
+	/* It's now safe to remove this processor from the online map */
+	lock_vector_lock();
+	remove_cpu_from_maps(cpu);
+	unlock_vector_lock();
+	fixup_irqs(cpu_online_map);
+}
+
 int native_cpu_disable(void)
 {
 	int cpu = smp_processor_id();
@@ -1365,23 +1387,7 @@ int native_cpu_disable(void)
 		stop_apic_nmi_watchdog(NULL);
 	clear_local_APIC();
 
-	/*
-	 * HACK:
-	 * Allow any queued timer interrupts to get serviced
-	 * This is only a temporary solution until we cleanup
-	 * fixup_irqs as we do for IA64.
-	 */
-	local_irq_enable();
-	mdelay(1);
-
-	local_irq_disable();
-	remove_siblinginfo(cpu);
-
-	/* It's now safe to remove this processor from the online map */
-	lock_vector_lock();
-	remove_cpu_from_maps(cpu);
-	unlock_vector_lock();
-	fixup_irqs(cpu_online_map);
+	cpu_disable_common();
 	return 0;
 }
 
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 17305e72f348..8bdaa4a25f05 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -125,6 +125,7 @@ static inline void arch_send_call_function_ipi(cpumask_t mask)
 	smp_ops.send_call_func_ipi(mask);
 }
 
+void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);

From d68d82afd4c88e25763b23cd9cd4974573a3706f Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Fri, 22 Aug 2008 11:52:15 +0100
Subject: [PATCH 058/142] xen: implement CPU hotplugging

Note the changes from 2.6.18-xen CPU hotplugging:

A vcpu_down request from the remote admin via Xenbus both hotunplugs the
CPU, and disables it by removing it from the cpu_present map, and removing
its entry in /sys.

A vcpu_up request from the remote admin only re-enables the CPU, and does
not immediately bring the CPU up. A udev event is emitted, which can be
caught by the user if he wishes to automatically re-up CPUs when available,
or implement a more complex policy.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/smp.c        | 60 ++++++++++++++++++++------
 arch/x86/xen/spinlock.c   |  5 +++
 arch/x86/xen/time.c       |  8 ++++
 arch/x86/xen/xen-ops.h    |  6 +++
 drivers/xen/Makefile      |  2 +-
 drivers/xen/cpu_hotplug.c | 90 +++++++++++++++++++++++++++++++++++++++
 drivers/xen/events.c      |  4 ++
 7 files changed, 162 insertions(+), 13 deletions(-)
 create mode 100644 drivers/xen/cpu_hotplug.c

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index baca7f2fbd8a..be5cbb2b7c60 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -11,8 +11,6 @@
  * useful topology information for the kernel to make use of.  As a
  * result, all CPUs are treated as if they're single-core and
  * single-threaded.
- *
- * This does not handle HOTPLUG_CPU yet.
  */
 #include <linux/sched.h>
 #include <linux/err.h>
@@ -61,11 +59,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static __cpuinit void cpu_bringup_and_idle(void)
+static __cpuinit void cpu_bringup(void)
 {
 	int cpu = smp_processor_id();
 
 	cpu_init();
+	touch_softlockup_watchdog();
 	preempt_disable();
 
 	xen_enable_sysenter();
@@ -86,6 +85,11 @@ static __cpuinit void cpu_bringup_and_idle(void)
 	local_irq_enable();
 
 	wmb();			/* make sure everything is out */
+}
+
+static __cpuinit void cpu_bringup_and_idle(void)
+{
+	cpu_bringup();
 	cpu_idle();
 }
 
@@ -209,8 +213,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 
 		cpu_set(cpu, cpu_present_map);
 	}
-
-	//init_xenbus_allowed_cpumask();
 }
 
 static __cpuinit int
@@ -278,12 +280,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	struct task_struct *idle = idle_task(cpu);
 	int rc;
 
-#if 0
-	rc = cpu_up_check(cpu);
-	if (rc)
-		return rc;
-#endif
-
 #ifdef CONFIG_X86_64
 	/* Allocate node local memory for AP pdas */
 	WARN_ON(cpu == 0);
@@ -336,6 +332,42 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
 {
 }
 
+int xen_cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+	if (cpu == 0)
+		return -EBUSY;
+
+	cpu_disable_common();
+
+	load_cr3(swapper_pg_dir);
+	return 0;
+}
+
+void xen_cpu_die(unsigned int cpu)
+{
+	while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+		current->state = TASK_UNINTERRUPTIBLE;
+		schedule_timeout(HZ/10);
+	}
+	unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
+	unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
+	unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+	unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+	xen_uninit_lock_cpu(cpu);
+	xen_teardown_timer(cpu);
+
+	if (num_online_cpus() == 1)
+		alternatives_smp_switch(0);
+}
+
+void xen_play_dead(void)
+{
+	play_dead_common();
+	HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+	cpu_bringup();
+}
+
 static void stop_self(void *v)
 {
 	int cpu = smp_processor_id();
@@ -419,9 +451,13 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 static const struct smp_ops xen_smp_ops __initdata = {
 	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = xen_smp_prepare_cpus,
-	.cpu_up = xen_cpu_up,
 	.smp_cpus_done = xen_smp_cpus_done,
 
+	.cpu_up = xen_cpu_up,
+	.cpu_die = xen_cpu_die,
+	.cpu_disable = xen_cpu_disable,
+	.play_dead = xen_play_dead,
+
 	.smp_send_stop = xen_smp_send_stop,
 	.smp_send_reschedule = xen_smp_send_reschedule,
 
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index d072823bc06d..dd71e3a021cd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -357,6 +357,11 @@ void __cpuinit xen_init_lock_cpu(int cpu)
 	printk("cpu %d spinlock event irq %d\n", cpu, irq);
 }
 
+void xen_uninit_lock_cpu(int cpu)
+{
+	unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+}
+
 void __init xen_init_spinlocks(void)
 {
 	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 20182d9072c4..004ba86326ae 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -450,6 +450,14 @@ void xen_setup_timer(int cpu)
 	setup_runstate_info(cpu);
 }
 
+void xen_teardown_timer(int cpu)
+{
+	struct clock_event_device *evt;
+	BUG_ON(cpu == 0);
+	evt = &per_cpu(xen_clock_events, cpu);
+	unbind_from_irqhandler(evt->irq, NULL);
+}
+
 void xen_setup_cpu_clockevents(void)
 {
 	BUG_ON(preemptible());
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 1e8bfdaa20d3..8dbd97fd7f18 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -34,6 +34,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
+void xen_teardown_timer(int cpu);
 cycle_t xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 unsigned long xen_tsc_khz(void);
@@ -50,11 +51,16 @@ void xen_mark_init_mm_pinned(void);
 
 void __init xen_setup_vcpu_info_placement(void);
 
+void xen_play_dead(void);
+void xen_cpu_die(unsigned int cpu);
+int xen_cpu_disable(void);
+
 #ifdef CONFIG_SMP
 void xen_smp_init(void);
 
 void __init xen_init_spinlocks(void);
 __cpuinit void xen_init_lock_cpu(int cpu);
+void xen_uninit_lock_cpu(int cpu);
 
 extern cpumask_t xen_cpu_initialized_map;
 #else
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 363286c54290..f62d8df27696 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,4 @@
-obj-y	+= grant-table.o features.o events.o manage.o
+obj-y	+= grant-table.o features.o events.o manage.o cpu_hotplug.o
 obj-y	+= xenbus/
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
new file mode 100644
index 000000000000..1bc003536cdb
--- /dev/null
+++ b/drivers/xen/cpu_hotplug.c
@@ -0,0 +1,90 @@
+#include <linux/notifier.h>
+
+#include <xen/xenbus.h>
+
+#include <asm-x86/xen/hypervisor.h>
+#include <asm/cpu.h>
+
+static void enable_hotplug_cpu(int cpu)
+{
+	if (!cpu_present(cpu))
+		arch_register_cpu(cpu);
+
+	cpu_set(cpu, cpu_present_map);
+}
+
+static void disable_hotplug_cpu(int cpu)
+{
+	if (cpu_present(cpu))
+		arch_unregister_cpu(cpu);
+
+	cpu_clear(cpu, cpu_present_map);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+	int err;
+	char dir[32], state[32];
+
+	if (!cpu_possible(cpu))
+		return;
+
+	sprintf(dir, "cpu/%u", cpu);
+	err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
+	if (err != 1) {
+		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+		return;
+	}
+
+	if (strcmp(state, "online") == 0) {
+		enable_hotplug_cpu(cpu);
+	} else if (strcmp(state, "offline") == 0) {
+		(void)cpu_down(cpu);
+		disable_hotplug_cpu(cpu);
+	} else {
+		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+		       state, cpu);
+	}
+}
+
+static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
+					const char **vec, unsigned int len)
+{
+	unsigned int cpu;
+	char *cpustr;
+	const char *node = vec[XS_WATCH_PATH];
+
+	cpustr = strstr(node, "cpu/");
+	if (cpustr != NULL) {
+		sscanf(cpustr, "cpu/%u", &cpu);
+		vcpu_hotplug(cpu);
+	}
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+			      unsigned long event, void *data)
+{
+	static struct xenbus_watch cpu_watch = {
+		.node = "cpu",
+		.callback = handle_vcpu_hotplug_event};
+
+	(void)register_xenbus_watch(&cpu_watch);
+
+	return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+	static struct notifier_block xsn_cpu = {
+		.notifier_call = setup_cpu_watcher };
+
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	register_xenstore_notifier(&xsn_cpu);
+
+	return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index b6c2b8f16bee..c3290bc186a0 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -360,6 +360,10 @@ static void unbind_from_irq(unsigned int irq)
 			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
 				[index_from_irq(irq)] = -1;
 			break;
+		case IRQT_IPI:
+			per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
+				[index_from_irq(irq)] = -1;
+			break;
 		default:
 			break;
 		}

From a57a5c2e8db8d80f460dcad77877895718c9f209 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 4 Sep 2008 01:03:02 +0400
Subject: [PATCH 059/142] x86 setup: remove remnants of CONFIG_VIDEO_SELECT
 (read: vga=)

Impact: cleanup

Video mode selection became always possible in 2.6.23-rc1 after i386 setup
code rewrite in C.

Regardless, VIDEO_SELECT is stupid config option because it affects only
kernel setup code, not code which always stays in memory.

vga= always possible now which is good.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/configs/i386_defconfig   |  1 -
 arch/x86/configs/x86_64_defconfig |  1 -
 drivers/video/Kconfig             |  2 --
 drivers/video/console/Kconfig     | 16 ----------------
 4 files changed, 20 deletions(-)

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 9bc34e2033ec..a7ae4385670e 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1482,7 +1482,6 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y
 CONFIG_VGA_CONSOLE=y
 CONFIG_VGACON_SOFT_SCROLLBACK=y
 CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
-CONFIG_VIDEO_SELECT=y
 CONFIG_DUMMY_CONSOLE=y
 # CONFIG_FRAMEBUFFER_CONSOLE is not set
 CONFIG_LOGO=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index ae5124e064d4..743e80392731 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1449,7 +1449,6 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y
 CONFIG_VGA_CONSOLE=y
 CONFIG_VGACON_SOFT_SCROLLBACK=y
 CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
-CONFIG_VIDEO_SELECT=y
 CONFIG_DUMMY_CONSOLE=y
 # CONFIG_FRAMEBUFFER_CONSOLE is not set
 CONFIG_LOGO=y
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 9b887ef64ff1..d58509a67068 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -678,7 +678,6 @@ config FB_VESA
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
-	select VIDEO_SELECT
 	help
 	  This is the frame buffer device driver for generic VESA 2.0
 	  compliant graphic cards. The older VESA 1.2 cards are not supported.
@@ -1583,7 +1582,6 @@ config FB_CYBLA
 	tristate "Cyberblade/i1 support"
 	depends on FB && PCI && X86_32 && !64BIT
 	select FB_CFB_IMAGEBLIT
-	select VIDEO_SELECT
 	---help---
 	  This driver is supposed to support the Trident Cyberblade/i1
 	  graphics core integrated in the VIA VT8601A North Bridge,
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 06f87b04f207..2f50a80b413e 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -43,22 +43,6 @@ config VGACON_SOFT_SCROLLBACK_SIZE
 	 buffer.  Each 64KB will give you approximately 16 80x25
 	 screenfuls of scrollback buffer
 
-config VIDEO_SELECT
-	bool "Video mode selection support"
-	depends on  X86 && VGA_CONSOLE
-	---help---
-	  This enables support for text mode selection on kernel startup. If
-	  you want to take advantage of some high-resolution text mode your
-	  card's BIOS offers, but the traditional Linux utilities like
-	  SVGATextMode don't, you can say Y here and set the mode using the
-	  "vga=" option from your boot loader (lilo or loadlin) or set
-	  "vga=ask" which brings up a video mode menu on kernel startup. (Try
-	  "man bootparam" or see the documentation of your boot loader about
-	  how to pass options to the kernel.)
-
-	  Read the file <file:Documentation/svga.txt> for more information
-	  about the Video mode selection support. If unsure, say N.
-
 config MDA_CONSOLE
 	depends on !M68K && !PARISC && ISA
 	tristate "MDA text console (dual-headed) (EXPERIMENTAL)"

From 7f16a339787d45f997d67c1a4dea3c357f48e121 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 4 Sep 2008 06:19:45 -0700
Subject: [PATCH 060/142] x86: boot/compressed/Makefile: fix "make clean"

The Kbuild variable "targets" is supposed to be
configuration-independent and reflect "all possible targets".  This is
required to make "make clean" work properly.

Therefore, move all manipulation of "targets" as well as custom rules
out of the x86-32 ifdef statement.  Only leave inside the ifdefs the
things that are genuinely configuration-dependent.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/compressed/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 92fdd35bd93e..1771c804e02f 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -27,9 +27,8 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
 
-ifeq ($(CONFIG_X86_32),y)
-targets += vmlinux.bin.all vmlinux.relocs
-hostprogs-y := relocs
+targets += vmlinux.bin.all vmlinux.relocs relocs
+hostprogs-$(CONFIG_X86_32) += relocs
 
 quiet_cmd_relocs = RELOCS  $@
       cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@@ -43,6 +42,8 @@ quiet_cmd_relocbin = BUILD   $@
 $(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,relocbin)
 
+ifeq ($(CONFIG_X86_32),y)
+
 ifdef CONFIG_RELOCATABLE
 $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
 	$(call if_changed,gzip)
@@ -59,6 +60,5 @@ $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
 LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
 endif
 
-
 $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
 	$(call if_changed,ld)

From ef1f3413284b9270266cb04a944647e59735f0f1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 5 Sep 2008 13:26:39 +0100
Subject: [PATCH 061/142] x86: ticket spin locks: fix asm constraints

In addition to these changes I doubt the 'volatile' on all the ticket
lock asm()-s are really necessary.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/spinlock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index 93adae338ac6..acd9bdda55cf 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -101,7 +101,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
-		     : "=&a" (tmp), "=Q" (new), "+m" (lock->slock)
+		     : "=&a" (tmp), "=&Q" (new), "+m" (lock->slock)
 		     :
 		     : "memory", "cc");
 
@@ -146,7 +146,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 		     /* don't need lfence here, because loads are in-order */
 		     "jmp 1b\n"
 		     "2:"
-		     : "+Q" (inc), "+m" (lock->slock), "=r" (tmp)
+		     : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
 		     :
 		     : "memory", "cc");
 }
@@ -166,7 +166,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
-		     : "=&a" (tmp), "=r" (new), "+m" (lock->slock)
+		     : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
 		     :
 		     : "memory", "cc");
 

From 08f5fcbe6e0ea029c7e9b1b1c338700ab7809daf Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 5 Sep 2008 13:26:39 +0100
Subject: [PATCH 062/142] x86: ticket spin locks: factor out more common code

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/spinlock.h | 42 +++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index acd9bdda55cf..63d3b610a5ef 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -54,19 +54,7 @@
  * much between them in performance though, especially as locks are out of line.
  */
 #if (NR_CPUS < 256)
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
-{
-	int tmp = ACCESS_ONCE(lock->slock);
-
-	return (((tmp >> 8) & 0xff) != (tmp & 0xff));
-}
-
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
-{
-	int tmp = ACCESS_ONCE(lock->slock);
-
-	return (((tmp >> 8) - tmp) & 0xff) > 1;
-}
+#define TICKET_SHIFT 8
 
 static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
@@ -116,19 +104,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 		     : "memory", "cc");
 }
 #else
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
-{
-	int tmp = ACCESS_ONCE(lock->slock);
-
-	return (((tmp >> 16) & 0xffff) != (tmp & 0xffff));
-}
-
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
-{
-	int tmp = ACCESS_ONCE(lock->slock);
-
-	return (((tmp >> 16) - tmp) & 0xffff) > 1;
-}
+#define TICKET_SHIFT 16
 
 static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
@@ -182,6 +158,20 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 }
 #endif
 
+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+{
+	int tmp = ACCESS_ONCE(lock->slock);
+
+	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+}
+
+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+{
+	int tmp = ACCESS_ONCE(lock->slock);
+
+	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+}
+
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 
 #ifdef CONFIG_PARAVIRT

From 74e91604b2452c15bbe72d77b37cf47ed0310d13 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 5 Sep 2008 13:27:45 +0100
Subject: [PATCH 063/142] x86: ticket spin locks: reduce instruction
 dependencies

Reduce the amount of partial register accesses in the NR_CPUS < 256
case, and slightly weaken resource dependencies in the other case.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/spinlock.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index 63d3b610a5ef..b5a4551fd565 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -21,8 +21,10 @@
 
 #ifdef CONFIG_X86_32
 # define LOCK_PTR_REG "a"
+# define REG_PTR_MODE "k"
 #else
 # define LOCK_PTR_REG "D"
+# define REG_PTR_MODE "q"
 #endif
 
 #if defined(CONFIG_X86_32) && \
@@ -77,19 +79,17 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 
 static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 {
-	int tmp;
-	short new;
+	int tmp, new;
 
-	asm volatile("movw %2,%w0\n\t"
+	asm volatile("movzwl %2, %0\n\t"
 		     "cmpb %h0,%b0\n\t"
+		     "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
 		     "jne 1f\n\t"
-		     "movw %w0,%w1\n\t"
-		     "incb %h1\n\t"
 		     LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
-		     : "=&a" (tmp), "=&Q" (new), "+m" (lock->slock)
+		     : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
 		     :
 		     : "memory", "cc");
 
@@ -136,8 +136,8 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 		     "movl %0,%1\n\t"
 		     "roll $16, %0\n\t"
 		     "cmpl %0,%1\n\t"
+		     "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
 		     "jne 1f\n\t"
-		     "addl $0x00010000, %1\n\t"
 		     LOCK_PREFIX "cmpxchgl %1,%2\n\t"
 		     "1:"
 		     "sete %b1\n\t"

From 5c51c637e489f3f4d58d51365b0c558549918858 Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Wed, 3 Sep 2008 14:30:21 +0100
Subject: [PATCH 064/142] Xen: fix cpu_hotplug.c build by replacing
 is_running_on_xen() with xen_pv_domain()

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/cpu_hotplug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 1bc003536cdb..565280ec1c6a 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -78,7 +78,7 @@ static int __init setup_vcpu_hotplug_event(void)
 	static struct notifier_block xsn_cpu = {
 		.notifier_call = setup_cpu_watcher };
 
-	if (!is_running_on_xen())
+	if (!xen_pv_domain())
 		return -ENODEV;
 
 	register_xenstore_notifier(&xsn_cpu);

From 5ab6d815dc23117cd9c5895cb9592824de3d4a68 Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Wed, 3 Sep 2008 14:30:22 +0100
Subject: [PATCH 065/142] Xen: fix cpu_hotplug build when !CONFIG_SMP

Compile cpu_hotplug.c conditionally on CONFIG_HOTPLUG_CPU

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index f62d8df27696..d2a8fdf0e191 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,5 @@
-obj-y	+= grant-table.o features.o events.o manage.o cpu_hotplug.o
+obj-y	+= grant-table.o features.o events.o manage.o
 obj-y	+= xenbus/
+obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o

From 913da64b54b2b3bb212a59aba2e6f2b8294ca1fa Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Wed, 3 Sep 2008 14:30:23 +0100
Subject: [PATCH 066/142] x86: build fix for !CONFIG_SMP

Move reset_lazy_tlbstate into tlb_32.c, and define noop versions of
play_dead() in process_{32,64}.c when !CONFIG_SMP.

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c |  7 -------
 arch/x86/kernel/process_32.c |  7 +++++++
 arch/x86/kernel/process_64.c |  7 +++++++
 arch/x86/kernel/tlb_32.c     |  8 ++++++++
 include/asm-x86/smp.h        |  9 ---------
 include/asm-x86/tlbflush.h   | 10 ++++++++++
 6 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 18f5551b32dd..76d10d5c2fa7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -714,10 +714,3 @@ void __cpuinit cpu_init(void)
 	mxcsr_feature_mask_init();
 }
 
-void reset_lazy_tlbstate(void)
-{
-	int cpu = raw_smp_processor_id();
-
-	per_cpu(cpu_tlbstate, cpu).state = 0;
-	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 633cf06578fa..b76b38ff962b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -72,6 +72,13 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 	return ((unsigned long *)tsk->thread.sp)[3];
 }
 
+#ifndef CONFIG_SMP
+static inline void play_dead(void)
+{
+	BUG();
+}
+#endif
+
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index aa28ed01cdf3..ec27afa43d7e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -85,6 +85,13 @@ void exit_idle(void)
 	__exit_idle();
 }
 
+#ifndef CONFIG_SMP
+static inline void play_dead(void)
+{
+	BUG();
+}
+#endif
+
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index fec1ecedc9b7..e00534b33534 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -241,3 +241,11 @@ void flush_tlb_all(void)
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
+void reset_lazy_tlbstate(void)
+{
+	int cpu = raw_smp_processor_id();
+
+	per_cpu(cpu_tlbstate, cpu).state = 0;
+	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 8bdaa4a25f05..30b5146cc436 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -222,14 +222,5 @@ static inline int hard_smp_processor_id(void)
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
-#ifdef CONFIG_HOTPLUG_CPU
-#ifdef CONFIG_X86_32
-extern void reset_lazy_tlbstate(void);
-#else
-static inline void reset_lazy_tlbstate(void)
-{ }
-#endif /* CONFIG_X86_32 */
-#endif
-
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h
index 35c76ceb9f40..0e7bbb549116 100644
--- a/include/asm-x86/tlbflush.h
+++ b/include/asm-x86/tlbflush.h
@@ -119,6 +119,10 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask,
 {
 }
 
+static inline void reset_lazy_tlbstate(void)
+{
+}
+
 #else  /* SMP */
 
 #include <asm/smp.h>
@@ -151,6 +155,12 @@ struct tlb_state {
 	char __cacheline_padding[L1_CACHE_BYTES-8];
 };
 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+
+void reset_lazy_tlbstate(void);
+#else
+static inline void reset_lazy_tlbstate(void)
+{
+}
 #endif
 
 #endif	/* SMP */

From d2f37384fc9957ad0162d5285a5660f0a86ef243 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 5 Sep 2008 21:28:27 -0700
Subject: [PATCH 067/142] x86: when building image.iso, use isohybrid if it
 exists

When building image.iso (make isoimage), use the isohybrid tool if it
exists.  isohybrid is a script included with Syslinux 3.72 and higher,
which creates an image that can be booted either as a hard disk
(including removable, e.g. USB disk) or as a CD-ROM.

If isohybrid doesn't exist, then this has no effect.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 7ee102f9c4f8..cceba1f46365 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -181,6 +181,7 @@ isoimage: $(BOOTIMAGE)
 	mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
 		-no-emul-boot -boot-load-size 4 -boot-info-table \
 		$(obj)/isoimage
+	isohybrid $(obj)/image.iso 2>/dev/null || true
 	rm -rf $(obj)/isoimage
 
 zlilo: $(BOOTIMAGE)

From bb57925f5057837c248b57a51181fd6b138a32f3 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 5 Sep 2008 16:26:55 -0700
Subject: [PATCH 068/142] x86_32: signal: use syscall_get_nr and error

Use asm/syscall.h interfaces that do the same things.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index b21070ea33a4..3e4a688bb84f 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -27,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+#include <asm/syscall.h>
 #include <asm/syscalls.h>
 
 #include "sigframe.h"
@@ -507,9 +508,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	int ret;
 
 	/* Are we from a system call? */
-	if ((long)regs->orig_ax >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* If so, check system call restarting.. */
-		switch (regs->ax) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 			regs->ax = -EINTR;
@@ -623,9 +624,9 @@ static void do_signal(struct pt_regs *regs)
 	}
 
 	/* Did we come from a system call? */
-	if ((long)regs->orig_ax >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* Restart the system call - no handlers present */
-		switch (regs->ax) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:

From 72fa50f4ef9014f4212945b766af84ea94308903 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 5 Sep 2008 16:27:11 -0700
Subject: [PATCH 069/142] x86_32: signal: introduce signal_fault()

implement signal_fault() for 32bit.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 18 +++++++++++++++++-
 include/asm-x86/ptrace.h    |  4 ++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 3e4a688bb84f..76d05d703845 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -243,7 +243,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 	return ax;
 
 badframe:
-	force_sig(SIGSEGV, current);
+	signal_fault(regs, frame, "rt sigreturn");
 	return 0;
 }
 
@@ -669,3 +669,19 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 
 	clear_thread_flag(TIF_IRET);
 }
+
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+{
+	struct task_struct *me = current;
+
+	if (show_unhandled_signals && printk_ratelimit()) {
+		printk(KERN_INFO
+		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+		       me->comm, me->pid, where, frame,
+		       regs->ip, regs->sp, regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk(KERN_CONT "\n");
+	}
+
+	force_sig(SIGSEGV, me);
+}
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index a33f027dbbea..fad807769910 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -144,10 +144,10 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
 #ifdef CONFIG_X86_32
 extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 			 int error_code);
-#else
-void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 #endif
 
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+
 extern long syscall_trace_enter(struct pt_regs *);
 extern void syscall_trace_leave(struct pt_regs *);
 

From 8fcd8e20f388d787f6abf701b11037b122029d5b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 5 Sep 2008 16:27:39 -0700
Subject: [PATCH 070/142] x86: signal: make NR_restart_syscall

make NR_restart_syscall macro for cosmetic unification of handle_signal().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 3 ++-
 arch/x86/kernel/signal_64.c | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 76d05d703845..bd9b65031a9a 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -572,6 +572,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	return 0;
 }
 
+#define NR_restart_syscall	__NR_restart_syscall
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -635,7 +636,7 @@ static void do_signal(struct pt_regs *regs)
 			break;
 
 		case -ERESTART_RESTARTBLOCK:
-			regs->ax = __NR_restart_syscall;
+			regs->ax = NR_restart_syscall;
 			regs->ip -= 2;
 			break;
 		}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 823a55bf8c39..19e2b9143205 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -362,6 +362,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	return ret;
 }
 
+#define NR_restart_syscall	\
+	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -423,9 +425,7 @@ static void do_signal(struct pt_regs *regs)
 			regs->ip -= 2;
 			break;
 		case -ERESTART_RESTARTBLOCK:
-			regs->ax = test_thread_flag(TIF_IA32) ?
-					__NR_ia32_restart_syscall :
-					__NR_restart_syscall;
+			regs->ax = NR_restart_syscall;
 			regs->ip -= 2;
 			break;
 		}

From 1d13024e624d0e2e47f117b383917249a41ef5ce Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 5 Sep 2008 16:28:06 -0700
Subject: [PATCH 071/142] x86: signal: split out frame setups

Make setup_rt_frame() and split out frame setups from handle_signal().
This is for cosmetic unification of handle_signal().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 29 ++++++++++++++++++++---------
 arch/x86/kernel/signal_64.c | 30 ++++++++++++++++++++----------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index bd9b65031a9a..48982f7ce886 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -338,8 +338,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 }
 
 static int
-setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
-	    struct pt_regs *regs)
+__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+	      struct pt_regs *regs)
 {
 	struct sigframe __user *frame;
 	void __user *restorer;
@@ -416,8 +416,8 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	return -EFAULT;
 }
 
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			  sigset_t *set, struct pt_regs *regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
 	void __user *restorer;
@@ -501,6 +501,21 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 /*
  * OK, we're invoking a handler:
  */
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
+{
+	int ret;
+
+	/* Set up the stack frame */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		ret = __setup_rt_frame(sig, ka, info, set, regs);
+	else
+		ret = __setup_frame(sig, ka, set, regs);
+
+	return ret;
+}
+
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	      sigset_t *oldset, struct pt_regs *regs)
@@ -537,11 +552,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
 		regs->flags &= ~X86_EFLAGS_TF;
 
-	/* Set up the stack frame */
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		ret = setup_rt_frame(sig, ka, info, oldset, regs);
-	else
-		ret = setup_frame(sig, ka, oldset, regs);
+	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
 	if (ret)
 		return ret;
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 19e2b9143205..8fbdd23d5cc6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -195,8 +195,8 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
 	return (void __user *)round_down(sp - size, 64);
 }
 
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			   sigset_t *set, struct pt_regs *regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
 	void __user *fp = NULL;
@@ -280,6 +280,24 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 /*
  * OK, we're invoking a handler
  */
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
+{
+	int ret;
+
+#ifdef CONFIG_IA32_EMULATION
+	if (test_thread_flag(TIF_IA32)) {
+		if (ka->sa.sa_flags & SA_SIGINFO)
+			ret = ia32_setup_rt_frame(sig, ka, info, set, regs);
+		else
+			ret = ia32_setup_frame(sig, ka, set, regs);
+	} else
+#endif
+	ret = __setup_rt_frame(sig, ka, info, set, regs);
+
+	return ret;
+}
 
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -317,14 +335,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
 		regs->flags &= ~X86_EFLAGS_TF;
 
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32)) {
-		if (ka->sa.sa_flags & SA_SIGINFO)
-			ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
-		else
-			ret = ia32_setup_frame(sig, ka, oldset, regs);
-	} else
-#endif
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
 	if (ret == 0) {

From 13ad7725e9955ac68fff670a039da99ab33e86a0 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 5 Sep 2008 16:28:38 -0700
Subject: [PATCH 072/142] x86_32: signal: move signal number conversion to
 upper layer

Bring signal number conversion in __setup_frame() and __setup_rt_frame()
up into the common part setup_frame().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 48982f7ce886..b668efc18eae 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -344,7 +344,6 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	struct sigframe __user *frame;
 	void __user *restorer;
 	int err = 0;
-	int usig;
 	void __user *fpstate = NULL;
 
 	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
@@ -352,13 +351,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err = __put_user(usig, &frame->sig);
+	err = __put_user(sig, &frame->sig);
 	if (err)
 		goto give_sigsegv;
 
@@ -422,7 +415,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	struct rt_sigframe __user *frame;
 	void __user *restorer;
 	int err = 0;
-	int usig;
 	void __user *fpstate = NULL;
 
 	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
@@ -430,13 +422,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= __put_user(usig, &frame->sig);
+	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, info);
@@ -482,7 +468,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long)frame;
 	regs->ip = (unsigned long)ka->sa.sa_handler;
-	regs->ax = (unsigned long)usig;
+	regs->ax = (unsigned long)sig;
 	regs->dx = (unsigned long)&frame->info;
 	regs->cx = (unsigned long)&frame->uc;
 
@@ -506,12 +492,19 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	       sigset_t *set, struct pt_regs *regs)
 {
 	int ret;
+	int usig;
+
+	usig = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
 
 	/* Set up the stack frame */
 	if (ka->sa.sa_flags & SA_SIGINFO)
-		ret = __setup_rt_frame(sig, ka, info, set, regs);
+		ret = __setup_rt_frame(usig, ka, info, set, regs);
 	else
-		ret = __setup_frame(sig, ka, set, regs);
+		ret = __setup_frame(usig, ka, set, regs);
 
 	return ret;
 }

From 464f04c9e9b3b1c4f5ffb89c51d8ba2a2034c846 Mon Sep 17 00:00:00 2001
From: Andrey Borzenkov <arvidjaar@newmail.ru>
Date: Sat, 6 Sep 2008 12:40:21 +0400
Subject: [PATCH 073/142] x86: fix ghost EDD devices in /sys again

> This is regression but old enough. Apparently I had for whatever reasons
> EDD turned off till recently. This is 2.6.27-rc5 just in case.
>
> In 2006 I fixed ghost devices due to buggy BIOS:
>
> http://marc.info/?l=linux-kernel&m=114087765422490&w=2
>
> Later edd.S has been rewritten in C, and apparently this patch has been
> lost:
>
> {pts/1}% ls /sys/firmware/edd
> int13_dev80/  int13_dev84/  int13_dev88/  int13_dev8c/
> int13_dev81/  int13_dev85/  int13_dev89/  int13_dev8d/
> int13_dev82/  int13_dev86/  int13_dev8a/  int13_dev8e/
> int13_dev83/  int13_dev87/  int13_dev8b/  int13_dev8f/
>
> But I have just a single disk. This is the same system BTW.

Some BIOSes do not always set CF on error before return from int13.
The patch adds additional check for status being zero (AH == 0).

This was fixed for edd.S in
http://marc.info/?l=linux-kernel&m=114087765422490&w=2, but lost
again when edd.S was rewritten in C.

Signed-off-by: Andrey Borzenkov <arvidjaar@mail.ru>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/boot/edd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index d93cbc6464d0..bf4ae6ff518e 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -32,7 +32,9 @@ static int read_mbr(u8 devno, void *buf)
 		     : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
 		     : : "esi", "edi", "memory");
 
-	return -(u8)ax;		/* 0 or -1 */
+	/* Some BIOSes do not set carry flag on error but still return
+	 * error in AH. The condition below is expected to catch both */
+	return -!!ax;		/* 0 or -1 */
 }
 
 static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)

From a19aac8548d85cf15d744335b8e82201da760d80 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 30 Aug 2008 06:03:34 +0400
Subject: [PATCH 074/142] x86: make setup_xstate_init() __init

WARNING: vmlinux.o(.text+0x22453): Section mismatch in reference from the function setup_xstate_init() to the function .init.text:__alloc_bootmem()
The function setup_xstate_init() references the function __init __alloc_bootmem().
This is often because setup_xstate_init lacks a __init annotation or the annotation of __alloc_bootmem is wrong.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/xsave.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 07713d64debe..ed5274a5bb0f 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -272,7 +272,7 @@ void __cpuinit xsave_init(void)
 /*
  * setup the xstate image representing the init state
  */
-void setup_xstate_init(void)
+static void __init setup_xstate_init(void)
 {
 	init_xstate_buf = alloc_bootmem(xstate_size);
 	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;

From 30cec97967beb5aa08e893e8ff69623d5fecd9b7 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 29 Aug 2008 12:49:55 +0100
Subject: [PATCH 075/142] x86: init annotations in early_printk() setup

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/early_printk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 825e9136b026..02fc5b40c14b 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -120,7 +120,7 @@ static __init void early_serial_init(char *s)
 		if (!strncmp(s, "0x", 2)) {
 			early_serial_base = simple_strtoul(s, &e, 16);
 		} else {
-			static int bases[] = { 0x3f8, 0x2f8 };
+			static const int __initconst bases[] = { 0x3f8, 0x2f8 };
 
 			if (!strncmp(s, "ttyS", 4))
 				s += 4;
@@ -920,7 +920,7 @@ static struct console simnow_console = {
 
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
-static int early_console_initialized;
+static int __initdata early_console_initialized;
 
 asmlinkage void early_printk(const char *fmt, ...)
 {

From cc643d4687533345fd8ebcba836f9ee25df7c458 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 29 Aug 2008 12:53:45 +0100
Subject: [PATCH 076/142] x86: adjust vmalloc_sync_all() for Xen (2nd try)

Since the fourth PDPT entry cannot be shared under Xen,
vmalloc_sync_all() must iterate over pmd-s rather than pgd-s here.
Luckily, the code isn't used for native PAE (SHARED_KERNEL_PMD is 1)
and the change is benign to non-PAE.

Also do a little more cleanup in that function.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
---
 arch/x86/mm/fault.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b42..356ed2dec3a6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -915,15 +915,15 @@ LIST_HEAD(pgd_list);
 
 void vmalloc_sync_all(void)
 {
-#ifdef CONFIG_X86_32
-	unsigned long start = VMALLOC_START & PGDIR_MASK;
 	unsigned long address;
 
+#ifdef CONFIG_X86_32
 	if (SHARED_KERNEL_PMD)
 		return;
 
-	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
-	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+	for (address = VMALLOC_START & PMD_MASK;
+	     address >= TASK_SIZE && address < FIXADDR_TOP;
+	     address += PMD_SIZE) {
 		unsigned long flags;
 		struct page *page;
 
@@ -936,10 +936,8 @@ void vmalloc_sync_all(void)
 		spin_unlock_irqrestore(&pgd_lock, flags);
 	}
 #else /* CONFIG_X86_64 */
-	unsigned long start = VMALLOC_START & PGDIR_MASK;
-	unsigned long address;
-
-	for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
+	     address += PGDIR_SIZE) {
 		const pgd_t *pgd_ref = pgd_offset_k(address);
 		unsigned long flags;
 		struct page *page;

From 5394f80f92642c61fc2a95385be85f2fdcfb5adb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sun, 7 Sep 2008 01:51:32 -0700
Subject: [PATCH 077/142] x86: check for and defend against BIOS memory
 corruption

Some BIOSes have been observed to corrupt memory in the low 64k.  This
change:
 - Reserves all memory which does not have to be in that area, to
   prevent it from being used as general memory by the kernel.  Things
   like the SMP trampoline are still in the memory, however.
 - Clears the reserved memory so we can observe changes to it.
 - Adds a function check_for_bios_corruption() which checks and reports on
   memory becoming unexpectedly non-zero.  Currently it's called in the
   x86 fault handler, and the powermanagement debug output.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |  5 ++
 arch/x86/Kconfig                    |  3 +
 arch/x86/kernel/setup.c             | 87 +++++++++++++++++++++++++++++
 arch/x86/mm/fault.c                 |  2 +
 drivers/base/power/main.c           |  1 +
 include/linux/kernel.h              | 12 ++++
 6 files changed, 110 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1150444a21ab..df48af505d15 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -360,6 +360,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <io>,<irq>,<mode>
 			See header of drivers/net/hamradio/baycom_ser_hdx.c.
 
+	bios_corruption_check=0/1 [X86]
+			Some BIOSes seem to corrupt the first 64k of memory
+			when doing things like suspend/resume.  Setting this
+			option will scan the memory looking for corruption.
+
 	boot_delay=	Milliseconds to delay each printk during boot.
 			Values larger than 10 seconds (10000) are changed to
 			no delay (0).
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ed92864d1325..1bb52e2ca02e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -201,6 +201,9 @@ config X86_TRAMPOLINE
 	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
 	default y
 
+config X86_CHECK_BIOS_CORRUPTION
+        def_bool y
+
 config KTIME_SCALAR
 	def_bool X86_32
 source "init/Kconfig"
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 362d4e7f2d38..ee89ebc5aabc 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -578,6 +578,89 @@ static struct x86_quirks default_x86_quirks __initdata;
 
 struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 
+/*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable.  Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+#define MAX_SCAN_AREAS	8
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+static void __init setup_bios_corruption_check(void)
+{
+	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
+
+	while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {
+		u64 size;
+		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+		if (addr == 0)
+			break;
+
+		if ((addr + size) > 0x10000)
+			size = 0x10000 - addr;
+
+		if (size == 0)
+			break;
+
+		e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+		scan_areas[num_scan_areas].addr = addr;
+		scan_areas[num_scan_areas].size = size;
+		num_scan_areas++;
+
+		/* Assume we've already mapped this early memory */
+		memset(__va(addr), 0, size);
+
+		addr += size;
+	}
+
+	printk(KERN_INFO "scanning %d areas for BIOS corruption\n",
+	       num_scan_areas);
+	update_e820();
+}
+
+static int __read_mostly bios_corruption_check = 1;
+
+void check_for_bios_corruption(void)
+{
+	int i;
+	int corruption = 0;
+
+	if (!bios_corruption_check)
+		return;
+
+	for(i = 0; i < num_scan_areas; i++) {
+		unsigned long *addr = __va(scan_areas[i].addr);
+		unsigned long size = scan_areas[i].size;
+
+		for(; size; addr++, size -= sizeof(unsigned long)) {
+			if (!*addr)
+				continue;
+			printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
+			       addr, __pa(addr), *addr);
+			corruption = 1;
+			*addr = 0;
+		}
+	}
+
+	if (corruption)
+		dump_stack();
+}
+
+static int set_bios_corruption_check(char *arg)
+{
+	char *end;
+
+	bios_corruption_check = simple_strtol(arg, &end, 10);
+
+	return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("bios_corruption_check", set_bios_corruption_check);
+#endif
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -750,6 +833,10 @@ void __init setup_arch(char **cmdline_p)
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+	setup_bios_corruption_check();
+#endif
+
 	/* max_pfn_mapped is updated here */
 	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
 	max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b42..5140bdf03020 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -848,6 +848,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
  */
+	check_for_bios_corruption();
+
 #ifdef CONFIG_X86_32
 	bust_spinlocks(1);
 #else
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 273a944d4040..bf6d3554e506 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -254,6 +254,7 @@ static char *pm_verb(int event)
 
 static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
 {
+	check_for_bios_corruption();
 	dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
 		((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
 		", may wakeup" : "");
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2651f805ba6d..8017129e6b63 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -240,6 +240,18 @@ extern const char *print_tainted(void);
 extern void add_taint(unsigned);
 extern int root_mountflags;
 
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+/*
+ * This is obviously not a great place for this, but we want to be
+ * able to scatter it around anywhere in the kernel.
+ */
+void check_for_bios_corruption(void);
+#else
+static inline void check_for_bios_corruption(void)
+{
+}
+#endif
+
 /* Values used for system_state */
 extern enum system_states {
 	SYSTEM_BOOTING,

From bb577f980ef35e2b0d00aeed566724e5032aa5eb Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sun, 7 Sep 2008 01:51:33 -0700
Subject: [PATCH 078/142] x86: add periodic corruption check

Perodically check for corruption in low phusical memory.  Don't bother
checking at fault time, since it won't show anything useful.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 17 +++++++++++++++++
 arch/x86/mm/fault.c     |  2 --
 arch/x86/mm/init_32.c   |  2 ++
 arch/x86/mm/init_64.c   |  2 ++
 include/linux/kernel.h  |  1 +
 5 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ee89ebc5aabc..c239b3780973 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -623,6 +623,7 @@ static void __init setup_bios_corruption_check(void)
 }
 
 static int __read_mostly bios_corruption_check = 1;
+static struct timer_list periodic_check_timer;
 
 void check_for_bios_corruption(void)
 {
@@ -650,6 +651,22 @@ void check_for_bios_corruption(void)
 		dump_stack();
 }
 
+static void periodic_check_for_corruption(unsigned long data)
+{
+	check_for_bios_corruption();
+	mod_timer(&periodic_check_timer, jiffies + 60*HZ);
+}
+
+void start_periodic_check_for_corruption(void)
+{
+	if (!bios_corruption_check)
+		return;
+
+	init_timer(&periodic_check_timer);
+	periodic_check_timer.function = &periodic_check_for_corruption;
+	periodic_check_for_corruption(0);
+}
+
 static int set_bios_corruption_check(char *arg)
 {
 	char *end;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5140bdf03020..455f3fe67b42 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -848,8 +848,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
  */
-	check_for_bios_corruption();
-
 #ifdef CONFIG_X86_32
 	bust_spinlocks(1);
 #else
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d37f29376b0c..657a16ad61ba 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -907,6 +907,8 @@ void __init mem_init(void)
 	int codesize, reservedpages, datasize, initsize;
 	int tmp;
 
+	start_periodic_check_for_corruption();
+
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
 #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d3746efb060d..f4db5276fa21 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -769,6 +769,8 @@ void __init mem_init(void)
 {
 	long codesize, reservedpages, datasize, initsize;
 
+	start_periodic_check_for_corruption();
+
 	pci_iommu_alloc();
 
 	/* clear_bss() already clear the empty_zero_page */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8017129e6b63..00bb251c6451 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -246,6 +246,7 @@ extern int root_mountflags;
  * able to scatter it around anywhere in the kernel.
  */
 void check_for_bios_corruption(void);
+void start_periodic_check_for_corruption(void);
 #else
 static inline void check_for_bios_corruption(void)
 {

From 9f077871ce7237e2387fc76542b3b4033cb05e49 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sun, 7 Sep 2008 01:51:34 -0700
Subject: [PATCH 079/142] x86: clean up memory corruption check and add more
 kernel parameters

The corruption check is enabled in Kconfig by default, but disabled at runtime.

This patch adds several kernel parameters to control the corruption
check's behaviour; these are documented in kernel-parameters.txt.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt | 28 ++++++++--
 arch/x86/Kconfig                    | 26 ++++++++--
 arch/x86/kernel/setup.c             | 80 +++++++++++++++++++++--------
 3 files changed, 106 insertions(+), 28 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index df48af505d15..6a2629d00598 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -360,11 +360,6 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <io>,<irq>,<mode>
 			See header of drivers/net/hamradio/baycom_ser_hdx.c.
 
-	bios_corruption_check=0/1 [X86]
-			Some BIOSes seem to corrupt the first 64k of memory
-			when doing things like suspend/resume.  Setting this
-			option will scan the memory looking for corruption.
-
 	boot_delay=	Milliseconds to delay each printk during boot.
 			Values larger than 10 seconds (10000) are changed to
 			no delay (0).
@@ -1233,6 +1228,29 @@ and is between 256 and 4096 characters. It is defined in the file
 			         or
 			         memmap=0x10000$0x18690000
 
+	memory_corruption_check=0/1 [X86]
+			Some BIOSes seem to corrupt the first 64k of
+			memory when doing things like suspend/resume.
+			Setting this option will scan the memory
+			looking for corruption.  Enabling this will
+			both detect corruption and prevent the kernel
+			from using the memory being corrupted.
+			However, its intended as a diagnostic tool; if
+			repeatable BIOS-originated corruption always
+			affects the same memory, you can use memmap=
+			to prevent the kernel from using that memory.
+
+	memory_corruption_check_size=size [X86]
+			By default it checks for corruption in the low
+			64k, making this memory unavailable for normal
+			use.  Use this parameter to scan for
+			corruption in more or less memory.
+
+	memory_corruption_check_period=seconds [X86]
+			By default it checks for corruption every 60
+			seconds.  Use this parameter to check at some
+			other rate.  0 disables periodic checking.
+
 	memtest=	[KNL,X86] Enable memtest
 			Format: <integer>
 			range: 0,4 : pattern number
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1bb52e2ca02e..cbee4199689c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -201,9 +201,6 @@ config X86_TRAMPOLINE
 	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
 	default y
 
-config X86_CHECK_BIOS_CORRUPTION
-        def_bool y
-
 config KTIME_SCALAR
 	def_bool X86_32
 source "init/Kconfig"
@@ -1062,6 +1059,29 @@ config HIGHPTE
 	  low memory.  Setting this option will put user-space page table
 	  entries in high memory.
 
+config X86_CHECK_BIOS_CORRUPTION
+        bool "Check for low memory corruption"
+	default y
+	help
+	 Periodically check for memory corruption in low memory, which
+	 is suspected to be caused by BIOS.  Even when enabled in the
+	 configuration, it is disabled at runtime.  Enable it by
+	 setting "memory_corruption_check=1" on the kernel command
+	 line.  By default it scans the low 64k of memory every 60
+	 seconds; see the memory_corruption_check_size and
+	 memory_corruption_check_period parameters in
+	 Documentation/kernel-parameters.txt to adjust this.
+
+	 When enabled with the default parameters, this option has
+	 almost no overhead, as it reserves a relatively small amount
+	 of memory and scans it infrequently.  It both detects corruption
+	 and prevents it from affecting the running system.
+
+	 It is, however, intended as a diagnostic tool; if repeatable
+	 BIOS-originated corruption always affects the same memory,
+	 you can use memmap= to prevent the kernel from using that
+	 memory.
+
 config MATH_EMULATION
 	bool
 	prompt "Math emulation" if X86_32
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c239b3780973..27ae91288855 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -586,22 +586,71 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
  */
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 #define MAX_SCAN_AREAS	8
+
+static int __read_mostly memory_corruption_check = 0;
+static unsigned __read_mostly corruption_check_size = 64*1024;
+static unsigned __read_mostly corruption_check_period = 60; /* seconds */
+
 static struct e820entry scan_areas[MAX_SCAN_AREAS];
 static int num_scan_areas;
 
+
+static int set_corruption_check(char *arg)
+{
+	char *end;
+
+	memory_corruption_check = simple_strtol(arg, &end, 10);
+
+	return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check", set_corruption_check);
+
+static int set_corruption_check_period(char *arg)
+{
+	char *end;
+
+	corruption_check_period = simple_strtoul(arg, &end, 10);
+
+	return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_period", set_corruption_check_period);
+
+static int set_corruption_check_size(char *arg)
+{
+	char *end;
+	unsigned size;
+
+	size = memparse(arg, &end);
+
+	if (*end == '\0')
+		corruption_check_size = size;
+
+	return (size == corruption_check_size) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_size", set_corruption_check_size);
+
+
 static void __init setup_bios_corruption_check(void)
 {
 	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
 
-	while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {
+	if (corruption_check_size == 0)
+		memory_corruption_check = 0;
+
+	if (!memory_corruption_check)
+		return;
+
+	corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
+
+	while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
 		u64 size;
 		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
 
 		if (addr == 0)
 			break;
 
-		if ((addr + size) > 0x10000)
-			size = 0x10000 - addr;
+		if ((addr + size) > corruption_check_size)
+			size = corruption_check_size - addr;
 
 		if (size == 0)
 			break;
@@ -617,12 +666,11 @@ static void __init setup_bios_corruption_check(void)
 		addr += size;
 	}
 
-	printk(KERN_INFO "scanning %d areas for BIOS corruption\n",
+	printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
 	       num_scan_areas);
 	update_e820();
 }
 
-static int __read_mostly bios_corruption_check = 1;
 static struct timer_list periodic_check_timer;
 
 void check_for_bios_corruption(void)
@@ -630,7 +678,7 @@ void check_for_bios_corruption(void)
 	int i;
 	int corruption = 0;
 
-	if (!bios_corruption_check)
+	if (!memory_corruption_check)
 		return;
 
 	for(i = 0; i < num_scan_areas; i++) {
@@ -647,35 +695,27 @@ void check_for_bios_corruption(void)
 		}
 	}
 
-	if (corruption)
-		dump_stack();
+	WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
 }
 
 static void periodic_check_for_corruption(unsigned long data)
 {
 	check_for_bios_corruption();
-	mod_timer(&periodic_check_timer, jiffies + 60*HZ);
+	mod_timer(&periodic_check_timer, jiffies + corruption_check_period*HZ);
 }
 
 void start_periodic_check_for_corruption(void)
 {
-	if (!bios_corruption_check)
+	if (!memory_corruption_check || corruption_check_period == 0)
 		return;
 
+	printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
+	       corruption_check_period);
+
 	init_timer(&periodic_check_timer);
 	periodic_check_timer.function = &periodic_check_for_corruption;
 	periodic_check_for_corruption(0);
 }
-
-static int set_bios_corruption_check(char *arg)
-{
-	char *end;
-
-	bios_corruption_check = simple_strtol(arg, &end, 10);
-
-	return (*end == 0) ? 0 : -EINVAL;
-}
-early_param("bios_corruption_check", set_bios_corruption_check);
 #endif
 
 /*

From c885df50f571faf9fd9f395361cfff1b3a16e06e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sun, 7 Sep 2008 02:37:32 -0700
Subject: [PATCH 080/142] x86: default corruption check to off, but put
 parameter default in Kconfig

Default the low memory corruption check to off, but make the default setting of
the memory_corruption_check kernel parameter a config parameter.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig        |  9 ++++++++-
 arch/x86/kernel/setup.c | 13 ++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cbee4199689c..7820d447bb8d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1061,7 +1061,6 @@ config HIGHPTE
 
 config X86_CHECK_BIOS_CORRUPTION
         bool "Check for low memory corruption"
-	default y
 	help
 	 Periodically check for memory corruption in low memory, which
 	 is suspected to be caused by BIOS.  Even when enabled in the
@@ -1082,6 +1081,14 @@ config X86_CHECK_BIOS_CORRUPTION
 	 you can use memmap= to prevent the kernel from using that
 	 memory.
 
+config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
+        bool "Set the default setting of memory_corruption_check"
+	depends on X86_CHECK_BIOS_CORRUPTION
+	default y
+	help
+	 Set whether the default state of memory_corruption_check is
+	 on or off.
+
 config MATH_EMULATION
 	bool
 	prompt "Math emulation" if X86_32
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 27ae91288855..ec7e56c1b984 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -587,7 +587,8 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 #define MAX_SCAN_AREAS	8
 
-static int __read_mostly memory_corruption_check = 0;
+static int __read_mostly memory_corruption_check = -1;
+
 static unsigned __read_mostly corruption_check_size = 64*1024;
 static unsigned __read_mostly corruption_check_period = 60; /* seconds */
 
@@ -634,6 +635,16 @@ static void __init setup_bios_corruption_check(void)
 {
 	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
 
+	if (memory_corruption_check == -1) {
+		memory_corruption_check =
+#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
+			1
+#else
+			0
+#endif
+			;
+	}
+
 	if (corruption_check_size == 0)
 		memory_corruption_check = 0;
 

From 2737146b3aa2cf8b5d5ae87a18c49fe1c374528b Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Mon, 8 Sep 2008 13:43:33 +0100
Subject: [PATCH 081/142] x86, xen: fix build when !CONFIG_HOTPLUG_CPU

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/smp.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index be5cbb2b7c60..bf51a466d62c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -332,6 +332,7 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
 {
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 int xen_cpu_disable(void)
 {
 	unsigned int cpu = smp_processor_id();
@@ -368,6 +369,23 @@ void xen_play_dead(void)
 	cpu_bringup();
 }
 
+#else /* !CONFIG_HOTPLUG_CPU */
+int xen_cpu_disable(void)
+{
+	return -ENOSYS;
+}
+
+void xen_cpu_die(unsigned int cpu)
+{
+	BUG();
+}
+
+void xen_play_dead(void)
+{
+	BUG();
+}
+
+#endif
 static void stop_self(void *v)
 {
 	int cpu = smp_processor_id();

From 26fd10517e810dd59ea050b052de24a75ee6dc07 Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Mon, 8 Sep 2008 13:43:34 +0100
Subject: [PATCH 082/142] xen: make CPU hotplug functions static

There's no need for these functions to be accessed from outside of xen/smp.c

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/smp.c     | 12 ++++++------
 arch/x86/xen/xen-ops.h |  4 ----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index bf51a466d62c..d77da613b1d2 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -333,7 +333,7 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-int xen_cpu_disable(void)
+static int xen_cpu_disable(void)
 {
 	unsigned int cpu = smp_processor_id();
 	if (cpu == 0)
@@ -345,7 +345,7 @@ int xen_cpu_disable(void)
 	return 0;
 }
 
-void xen_cpu_die(unsigned int cpu)
+static void xen_cpu_die(unsigned int cpu)
 {
 	while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
 		current->state = TASK_UNINTERRUPTIBLE;
@@ -362,7 +362,7 @@ void xen_cpu_die(unsigned int cpu)
 		alternatives_smp_switch(0);
 }
 
-void xen_play_dead(void)
+static void xen_play_dead(void)
 {
 	play_dead_common();
 	HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
@@ -370,17 +370,17 @@ void xen_play_dead(void)
 }
 
 #else /* !CONFIG_HOTPLUG_CPU */
-int xen_cpu_disable(void)
+static int xen_cpu_disable(void)
 {
 	return -ENOSYS;
 }
 
-void xen_cpu_die(unsigned int cpu)
+static void xen_cpu_die(unsigned int cpu)
 {
 	BUG();
 }
 
-void xen_play_dead(void)
+static void xen_play_dead(void)
 {
 	BUG();
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 8dbd97fd7f18..d7422dc2a55c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -51,10 +51,6 @@ void xen_mark_init_mm_pinned(void);
 
 void __init xen_setup_vcpu_info_placement(void);
 
-void xen_play_dead(void);
-void xen_cpu_die(unsigned int cpu);
-int xen_cpu_disable(void);
-
 #ifdef CONFIG_SMP
 void xen_smp_init(void);
 

From 9c3254ad42e7925c3a3907a072185273705bd7b2 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sun, 7 Sep 2008 14:54:41 -0700
Subject: [PATCH 083/142] x86: fix compile error with corruption checking
 disabled

Fix compile error:

 arch/x86/mm/init_32.c: In function 'mem_init':
 arch/x86/mm/init_32.c:908: error: implicit declaration of function 'start_periodic_check_for_corruption'

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 00bb251c6451..50873b211788 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -251,6 +251,10 @@ void start_periodic_check_for_corruption(void);
 static inline void check_for_bios_corruption(void)
 {
 }
+
+static inline void start_periodic_check_for_corruption(void)
+{
+}
 #endif
 
 /* Values used for system_state */

From 43789e21638626d826c0e8d62e50ceb76b9d61ed Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 8 Sep 2008 14:43:11 -0700
Subject: [PATCH 084/142] kbuild: fix cc-option and cc-option-yn

David Sanders wrote:

> I'm getting this error:
> as: unrecognized option `-mtune=generic32'
> I have binutils 2.17.

Use -c instead of -S in cc-option and cc-option-yn, so we can probe
options related to the assembler.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: kbuild devel <kbuild-devel@lists.sourceforge.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 scripts/Kbuild.include | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index d64e6badc942..982dcae7bbe2 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -105,12 +105,12 @@ as-instr = $(call try-run,\
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
 
 cc-option = $(call try-run,\
-	$(CC) $(KBUILD_CFLAGS) $(1) -S -xc /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
-	$(CC) $(KBUILD_CFLAGS) $(1) -S -xc /dev/null -o "$$TMP",y,n)
+	$(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
 
 # cc-option-align
 # Prefix align with either -falign or -malign

From 28f7e66fc1da53997a545684b21b91fb3ca3f321 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 8 Sep 2008 12:01:48 -0700
Subject: [PATCH 085/142] x86: prevent binutils from being "smart" and
 generating NOPLs for us

binutils, contrary to documented behaviour, will generate long NOPs (a
P6-or-higher instruction which is broken on at least some VIA chips,
Virtual PC/Virtual Server, and some versions of Qemu) depending on the
-mtune= option, which is not supposed to change architectural
behaviour.

Pass an explicit override to the assembler, in case ends up passing
the -mtune= parameter to gas (gcc 4.3.0 does not appear to.)

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Makefile_32.cpu | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index e372b584e919..b72b4f753113 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -45,3 +45,8 @@ cflags-$(CONFIG_MGEODEGX1)	+= -march=pentium-mmx
 # cpu entries
 cflags-$(CONFIG_X86_GENERIC) 	+= $(call tune,generic,$(call tune,i686))
 
+# Bug fix for binutils: this option is required in order to keep
+# binutils from generating NOPL instructions against our will.
+ifneq ($(CONFIG_X86_P6_NOP),y)
+cflags-y			+= $(call cc-option,-Wa$(comma)-mtune=generic32,)
+endif

From b2994ef0de252d41f1511e5f87704bedda12dabc Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 9 Sep 2008 17:18:50 -0700
Subject: [PATCH 086/142] x86: signal_64.c: clean up signal_fault()

clean up and make signal_fault() same as 32bit.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 8fbdd23d5cc6..552a331313ff 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -473,12 +473,14 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 {
 	struct task_struct *me = current;
+
 	if (show_unhandled_signals && printk_ratelimit()) {
-		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
-	       me->comm, me->pid, where, frame, regs->ip,
-		   regs->sp, regs->orig_ax);
+		printk(KERN_INFO
+		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+		       me->comm, me->pid, where, frame,
+		       regs->ip, regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
-		printk("\n");
+		printk(KERN_CONT "\n");
 	}
 
 	force_sig(SIGSEGV, me);

From 0c40ed71736c20f447843b3c96b715bb9c4904d7 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 9 Sep 2008 17:21:17 -0700
Subject: [PATCH 087/142] x86: signal_64.c: arg for restore_i387_xstate() is
 void __user *

restore_i387_xstate() is declared as:

  int restore_i387_xstate(void __user *buf);

so, make the variable buf void __user *.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 552a331313ff..321da93f2fb7 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -94,7 +94,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	}
 
 	{
-		struct _fpstate __user *buf;
+		void __user *buf;
+
 		err |= __get_user(buf, &sc->fpstate);
 		err |= restore_i387_xstate(buf);
 	}

From 764e8d128f9343027cf09afe8a145e8ff186e129 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 9 Sep 2008 17:22:12 -0700
Subject: [PATCH 088/142] x86: signal_64.c: make handle_signal() similar

Make handle_signal() same as 32bit.

No change in functionality intended.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 57 +++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 321da93f2fb7..43e6862fc7a2 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -338,39 +338,40 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
-	if (ret == 0) {
-		/*
-		 * This has nothing to do with segment registers,
-		 * despite the name.  This magic affects uaccess.h
-		 * macros' behavior.  Reset it to the normal setting.
-		 */
-		set_fs(USER_DS);
+	if (ret)
+		return ret;
 
-		/*
-		 * Clear the direction flag as per the ABI for function entry.
-		 */
-		regs->flags &= ~X86_EFLAGS_DF;
+	/*
+	 * This has nothing to do with segment registers,
+	 * despite the name.  This magic affects uaccess.h
+	 * macros' behavior.  Reset it to the normal setting.
+	 */
+	set_fs(USER_DS);
 
-		/*
-		 * Clear TF when entering the signal handler, but
-		 * notify any tracer that was single-stepping it.
-		 * The tracer may want to single-step inside the
-		 * handler too.
-		 */
-		regs->flags &= ~X86_EFLAGS_TF;
+	/*
+	 * Clear the direction flag as per the ABI for function entry.
+	 */
+	regs->flags &= ~X86_EFLAGS_DF;
 
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-		if (!(ka->sa.sa_flags & SA_NODEFER))
-			sigaddset(&current->blocked, sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
+	/*
+	 * Clear TF when entering the signal handler, but
+	 * notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	regs->flags &= ~X86_EFLAGS_TF;
 
-		tracehook_signal_handler(sig, info, ka, regs,
-					 test_thread_flag(TIF_SINGLESTEP));
-	}
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 
-	return ret;
+	tracehook_signal_handler(sig, info, ka, regs,
+				 test_thread_flag(TIF_SINGLESTEP));
+
+	return 0;
 }
 
 #define NR_restart_syscall	\

From f7d0b926ac8c8ec0c7a83ee69409bd2e6bb39f81 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 9 Sep 2008 15:43:22 -0700
Subject: [PATCH 089/142] mm: define USE_SPLIT_PTLOCKS rather than repeating
 expression

Define USE_SPLIT_PTLOCKS as a constant expression rather than repeating
"NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS" all over the place.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c       |  2 +-
 include/linux/mm.h       |  6 +++---
 include/linux/mm_types.h | 10 ++++++----
 include/linux/sched.h    |  6 +++---
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aa37469da696..2e1b64088490 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -646,7 +646,7 @@ static spinlock_t *lock_pte(struct page *page)
 {
 	spinlock_t *ptl = NULL;
 
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#if USE_SPLIT_PTLOCKS
 	ptl = __pte_lockptr(page);
 	spin_lock(ptl);
 #endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 72a15dc26bbf..4194bf8e4f6c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -919,7 +919,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 }
 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
 
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#if USE_SPLIT_PTLOCKS
 /*
  * We tuck a spinlock to guard each pagetable page into its struct page,
  * at page->private, with BUILD_BUG_ON to make sure that this will not
@@ -932,14 +932,14 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 } while (0)
 #define pte_lock_deinit(page)	((page)->mapping = NULL)
 #define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
-#else
+#else	/* !USE_SPLIT_PTLOCKS */
 /*
  * We use mm->page_table_lock to guard all pagetable pages of the mm.
  */
 #define pte_lock_init(page)	do {} while (0)
 #define pte_lock_deinit(page)	do {} while (0)
 #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#endif /* USE_SPLIT_PTLOCKS */
 
 static inline void pgtable_page_ctor(struct page *page)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bf334138c7c1..9d49fa36bbef 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -21,11 +21,13 @@
 
 struct address_space;
 
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#define USE_SPLIT_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
+
+#if USE_SPLIT_PTLOCKS
 typedef atomic_long_t mm_counter_t;
-#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#else  /* !USE_SPLIT_PTLOCKS */
 typedef unsigned long mm_counter_t;
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#endif /* !USE_SPLIT_PTLOCKS */
 
 /*
  * Each physical page in the system has a struct page associated with
@@ -65,7 +67,7 @@ struct page {
 						 * see PAGE_MAPPING_ANON below.
 						 */
 	    };
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#if USE_SPLIT_PTLOCKS
 	    spinlock_t ptl;
 #endif
 	    struct kmem_cache *slab;	/* SLUB: Pointer to slab */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d9120c5ad15..272c35309df2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -352,7 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 extern void arch_unmap_area(struct mm_struct *, unsigned long);
 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#if USE_SPLIT_PTLOCKS
 /*
  * The mm counters are not protected by its page_table_lock,
  * so must be incremented atomically.
@@ -363,7 +363,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
 #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
 
-#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#else  /* !USE_SPLIT_PTLOCKS */
 /*
  * The mm counters are protected by its page_table_lock,
  * so can be incremented directly.
@@ -374,7 +374,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 #define inc_mm_counter(mm, member) (mm)->_##member++
 #define dec_mm_counter(mm, member) (mm)->_##member--
 
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#endif /* !USE_SPLIT_PTLOCKS */
 
 #define get_mm_rss(mm)					\
 	(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))

From 6a9e91846bf52cc70a0417de19fdfac224c435c4 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 9 Sep 2008 15:43:25 -0700
Subject: [PATCH 090/142] xen: fix pinning when not using split pte locks

We only pin PTE pages when using split PTE locks, so don't do the
pin/unpin when attaching/detaching pte pages to a pinned pagetable.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b106e825d266..8ca2f88bde1e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -826,7 +826,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
 
 		if (!PageHighMem(page)) {
 			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
-			if (level == PT_PTE)
+			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
 				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
 		} else
 			/* make sure there are no stray mappings of
@@ -894,7 +894,7 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
 
 	if (PagePinned(page)) {
 		if (!PageHighMem(page)) {
-			if (level == PT_PTE)
+			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
 				pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 			make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 		}

From a0a29b62a9cac6b7d83b7514679f2ed8d33d4372 Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Thu, 11 Sep 2008 23:27:52 +0200
Subject: [PATCH 091/142] x86, microcode rework, v2

this is a rework of the microcode splitup in tip/x86/microcode

(1) I think this new interface is cleaner (look at the changes
    in 'struct microcode_ops' in microcode.h);

(2) it's -64 lines of code;

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c       |  86 ++++----
 arch/x86/kernel/microcode_amd.c   | 329 ++++++++++++++----------------
 arch/x86/kernel/microcode_intel.c | 220 +++++++++-----------
 include/asm-x86/microcode.h       |  17 +-
 4 files changed, 294 insertions(+), 358 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index b2f84ce5eed3..902dada2eb6d 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -110,50 +110,28 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
-void __user *user_buffer;		/* user area microcode data buffer */
-EXPORT_SYMBOL_GPL(user_buffer);
-unsigned int user_buffer_size;		/* it's size */
-EXPORT_SYMBOL_GPL(user_buffer_size);
-
-static int do_microcode_update(void)
+static int do_microcode_update(const void __user *buf, size_t size)
 {
-	long cursor = 0;
-	int error = 0;
-	void *new_mc = NULL;
-	int cpu;
 	cpumask_t old;
+	int error = 0;
+	int cpu;
 
 	old = current->cpus_allowed;
 
-	while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) {
-		if (microcode_ops->microcode_sanity_check != NULL)
-			error = microcode_ops->microcode_sanity_check(new_mc);
-		if (error)
-			goto out;
-		/*
-		 * It's possible the data file has multiple matching ucode,
-		 * lets keep searching till the latest version
-		 */
-		for_each_online_cpu(cpu) {
-			struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	for_each_online_cpu(cpu) {
+		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-			if (!uci->valid)
-				continue;
-			set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-			error = microcode_ops->get_matching_microcode(new_mc,
-								      cpu);
-			if (error < 0)
-				goto out;
-			if (error == 1)
-				microcode_ops->apply_microcode(cpu);
-		}
-		vfree(new_mc);
+		if (!uci->valid)
+			continue;
+
+		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+		error = microcode_ops->request_microcode_user(cpu, buf, size);
+		if (error < 0)
+			goto out;
+		if (!error)
+			microcode_ops->apply_microcode(cpu);
 	}
 out:
-	if (cursor > 0)
-		vfree(new_mc);
-	if (cursor < 0)
-		error = cursor;
 	set_cpus_allowed_ptr(current, &old);
 	return error;
 }
@@ -178,10 +156,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
 
-	user_buffer = (void __user *) buf;
-	user_buffer_size = (int) len;
-
-	ret = do_microcode_update();
+	ret = do_microcode_update(buf, len);
 	if (!ret)
 		ret = (ssize_t)len;
 
@@ -231,7 +206,6 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 
 /* fake device for request_firmware */
 struct platform_device *microcode_pdev;
-EXPORT_SYMBOL_GPL(microcode_pdev);
 
 static ssize_t reload_store(struct sys_device *dev,
 			    struct sysdev_attribute *attr,
@@ -252,8 +226,12 @@ static ssize_t reload_store(struct sys_device *dev,
 		if (cpu_online(cpu)) {
 			set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 			mutex_lock(&microcode_mutex);
-			if (uci->valid)
-				err = microcode_ops->cpu_request_microcode(cpu);
+			if (uci->valid) {
+				err = microcode_ops->request_microcode_fw(cpu,
+						&microcode_pdev->dev);
+				if (!err)
+					microcode_ops->apply_microcode(cpu);
+			}
 			mutex_unlock(&microcode_mutex);
 			set_cpus_allowed_ptr(current, &old);
 		}
@@ -315,7 +293,7 @@ static void collect_cpu_info(int cpu)
 		uci->valid = 1;
 }
 
-static void microcode_resume_cpu(int cpu)
+static int microcode_resume_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct cpu_signature nsig;
@@ -323,7 +301,7 @@ static void microcode_resume_cpu(int cpu)
 	pr_debug("microcode: CPU%d resumed\n", cpu);
 
 	if (!uci->mc.valid_mc)
-		return;
+		return 1;
 
 	/*
 	 * Let's verify that the 'cached' ucode does belong
@@ -331,21 +309,22 @@ static void microcode_resume_cpu(int cpu)
 	 */
 	if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
 		microcode_fini_cpu(cpu);
-		return;
+		return -1;
 	}
 
 	if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) {
 		microcode_fini_cpu(cpu);
 		/* Should we look for a new ucode here? */
-		return;
+		return 1;
 	}
 
-	microcode_ops->apply_microcode(cpu);
+	return 0;
 }
 
 void microcode_update_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int err = 0;
 
 	/* We should bind the task to the CPU */
 	BUG_ON(raw_smp_processor_id() != cpu);
@@ -356,12 +335,17 @@ void microcode_update_cpu(int cpu)
 	 * otherwise just request a firmware:
 	 */
 	if (uci->valid) {
-		microcode_resume_cpu(cpu);
+		err = microcode_resume_cpu(cpu);
 	} else {	
 		collect_cpu_info(cpu);
 		if (uci->valid && system_state == SYSTEM_RUNNING)
-			microcode_ops->cpu_request_microcode(cpu);
+			err = microcode_ops->request_microcode_fw(cpu,
+					&microcode_pdev->dev);
 	}
+
+	if (!err)
+		microcode_ops->apply_microcode(cpu);
+
 	mutex_unlock(&microcode_mutex);
 }
 
@@ -414,7 +398,7 @@ static int mc_sysdev_resume(struct sys_device *dev)
 		return 0;
 	pr_debug("microcode: CPU%d resumed\n", cpu);
 	/* only CPU 0 will apply ucode here */
-	microcode_ops->apply_microcode(0);
+	microcode_update_cpu(0);
 	return 0;
 }
 
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index d606a05545ca..6815837a7753 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -59,7 +59,7 @@ MODULE_LICENSE("GPL v2");
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
 
-struct equiv_cpu_entry *equiv_cpu_table;
+static struct equiv_cpu_entry *equiv_cpu_table;
 
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
@@ -83,36 +83,37 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 	return 0;
 }
 
-static int get_matching_microcode_amd(void *mc, int cpu)
+static int get_matching_microcode(int cpu, void *mc, int rev)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct microcode_header_amd *mc_header = mc;
-	unsigned long total_size = get_totalsize(mc_header);
-	void *new_mc;
 	struct pci_dev *nb_pci_dev, *sb_pci_dev;
 	unsigned int current_cpu_id;
 	unsigned int equiv_cpu_id = 0x00;
 	unsigned int i = 0;
 
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
-
-	/* This is a tricky part. We might be called from a write operation */
-	/* to the device file instead of the usual process of firmware */
-	/* loading. This routine needs to be able to distinguish both */
-/* cases. This is done by checking if there alread is a equivalent */
-	/* CPU table installed. If not, we're written through */
-	/* /dev/cpu/microcode. */
-/* Since we ignore all checks. The error case in which going through */
-/* firmware loading and that table is not loaded has already been */
-	/* checked earlier. */
+	/*
+	 * dimm: do we need this? Why an update via /dev/... is different
+	 * from the one via firmware?
+	 *
+	 * This is a tricky part. We might be called from a write operation
+	 * to the device file instead of the usual process of firmware
+	 * loading. This routine needs to be able to distinguish both
+	 * cases. This is done by checking if there alread is a equivalent
+	 * CPU table installed. If not, we're written through
+	 * /dev/cpu/microcode.
+	 * Since we ignore all checks. The error case in which going through
+	 * firmware loading and that table is not loaded has already been
+	 * checked earlier.
+	 */
+	BUG_ON(equiv_cpu_table == NULL);
+#if 0
 	if (equiv_cpu_table == NULL) {
 		printk(KERN_INFO "microcode: CPU%d microcode update with "
 		       "version 0x%x (current=0x%x)\n",
 		       cpu, mc_header->patch_id, uci->cpu_sig.rev);
 		goto out;
 	}
-
+#endif
 	current_cpu_id = cpuid_eax(0x00000001);
 
 	while (equiv_cpu_table[i].installed_cpu != 0) {
@@ -175,27 +176,9 @@ static int get_matching_microcode_amd(void *mc, int cpu)
 		pci_dev_put(sb_pci_dev);
 	}
 
-	if (mc_header->patch_id <= uci->cpu_sig.rev)
+	if (mc_header->patch_id <= rev)
 		return 0;
 
-	printk(KERN_INFO "microcode: CPU%d found a matching microcode "
-	       "update with version 0x%x (current=0x%x)\n",
-	       cpu, mc_header->patch_id, uci->cpu_sig.rev);
-
-out:
-	new_mc = vmalloc(UCODE_MAX_SIZE);
-	if (!new_mc) {
-		printk(KERN_ERR "microcode: error, can't allocate memory\n");
-		return -ENOMEM;
-	}
-	memset(new_mc, 0, UCODE_MAX_SIZE);
-
-	/* free previous update file */
-	vfree(uci->mc.mc_amd);
-
-	memcpy(new_mc, mc, total_size);
-
-	uci->mc.mc_amd = new_mc;
 	return 1;
 }
 
@@ -245,104 +228,65 @@ static void apply_microcode_amd(int cpu)
 	uci->cpu_sig.rev = rev;
 }
 
-#ifdef CONFIG_MICROCODE_OLD_INTERFACE
-extern void __user *user_buffer;        /* user area microcode data buffer */
-extern unsigned int user_buffer_size;   /* it's size */
-
-static long get_next_ucode_amd(void **mc, long offset)
+static void * get_next_ucode(u8 *buf, unsigned int size,
+			int (*get_ucode_data)(void *, const void *, size_t),
+			unsigned int *mc_size)
 {
-	struct microcode_header_amd mc_header;
-	unsigned long total_size;
+	unsigned int total_size;
+#define UCODE_UNKNOWN_HDR	8
+	u8 hdr[UCODE_UNKNOWN_HDR];
+	void *mc;
 
-	/* No more data */
-	if (offset >= user_buffer_size)
-		return 0;
-	if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
-		printk(KERN_ERR "microcode: error! Can not read user data\n");
-		return -EFAULT;
-	}
-	total_size = get_totalsize(&mc_header);
-	if (offset + total_size > user_buffer_size) {
-		printk(KERN_ERR "microcode: error! Bad total size in microcode "
-		       "data file\n");
-		return -EINVAL;
-	}
-	*mc = vmalloc(UCODE_MAX_SIZE);
-	if (!*mc)
-		return -ENOMEM;
-	memset(*mc, 0, UCODE_MAX_SIZE);
+	if (get_ucode_data(hdr, buf, UCODE_UNKNOWN_HDR))
+		return NULL;
 
-	if (copy_from_user(*mc, user_buffer + offset, total_size)) {
-		printk(KERN_ERR "microcode: error! Can not read user data\n");
-		vfree(*mc);
-		return -EFAULT;
-	}
-	return offset + total_size;
-}
-#else
-#define get_next_ucode_amd() NULL
-#endif
-
-static long get_next_ucode_from_buffer_amd(void **mc, void *buf,
-				       unsigned long size, long offset)
-{
-	struct microcode_header_amd *mc_header;
-	unsigned long total_size;
-	unsigned char *buf_pos = buf;
-
-	/* No more data */
-	if (offset >= size)
-		return 0;
-
-	if (buf_pos[offset] != UCODE_UCODE_TYPE) {
+	if (hdr[0] != UCODE_UCODE_TYPE) {
 		printk(KERN_ERR "microcode: error! "
 		       "Wrong microcode payload type field\n");
-		return -EINVAL;
+		return NULL;
 	}
 
-	mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]);
+	/* Why not by means of get_totalsize(hdr)? */
+	total_size = (unsigned long) (hdr[4] + (hdr[5] << 8));
 
-	total_size = (unsigned long) (buf_pos[offset+4] +
-				      (buf_pos[offset+5] << 8));
+	printk(KERN_INFO "microcode: size %u, total_size %u\n",
+		size, total_size);
 
-	printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
-		size, total_size, offset);
-
-	if (offset + total_size > size) {
+	if (total_size > size || total_size > UCODE_MAX_SIZE) {
 		printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
-		return -EINVAL;
+		return NULL;
 	}
 
-	*mc = vmalloc(UCODE_MAX_SIZE);
-	if (!*mc) {
-		printk(KERN_ERR "microcode: error! "
-		       "Can not allocate memory for microcode patch\n");
-		return -ENOMEM;
+	mc = vmalloc(UCODE_MAX_SIZE);
+	if (mc) {
+		memset(mc, 0, UCODE_MAX_SIZE);
+		if (get_ucode_data(mc, buf + UCODE_UNKNOWN_HDR, total_size)) {
+			vfree(mc);
+			mc = NULL;
+		} else
+			*mc_size = total_size + UCODE_UNKNOWN_HDR;
 	}
-
-	memset(*mc, 0, UCODE_MAX_SIZE);
-	memcpy(*mc, buf + offset + 8, total_size);
-
-	return offset + total_size + 8;
+#undef UCODE_UNKNOWN_HDR
+	return mc;
 }
 
-static long install_equiv_cpu_table(void *buf, unsigned long size, long offset)
+
+static int install_equiv_cpu_table(u8 *buf,
+		int (*get_ucode_data)(void *, const void *, size_t))
 {
-	unsigned int *buf_pos = buf;
+#define UCODE_HEADER_SIZE	12
+	u8 *hdr[UCODE_HEADER_SIZE];
+	unsigned int *buf_pos = (unsigned int *)hdr;
+	unsigned long size;
 
-	/* No more data */
-	if (offset >= size)
+	if (get_ucode_data(&hdr, buf, UCODE_HEADER_SIZE))
 		return 0;
 
-	if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE) {
-		printk(KERN_ERR "microcode: error! "
-		       "Wrong microcode equivalnet cpu table type field\n");
-		return 0;
-	}
+	size = buf_pos[2];
 
-	if (size == 0) {
+	if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
 		printk(KERN_ERR "microcode: error! "
-		       "Wrong microcode equivalnet cpu table length\n");
+		       "Wrong microcode equivalnet cpu table\n");
 		return 0;
 	}
 
@@ -352,79 +296,118 @@ static long install_equiv_cpu_table(void *buf, unsigned long size, long offset)
 		return 0;
 	}
 
-	memset(equiv_cpu_table, 0, size);
-	memcpy(equiv_cpu_table, &buf_pos[3], size);
+	buf += UCODE_HEADER_SIZE;
+	if (get_ucode_data(equiv_cpu_table, buf, size)) {
+		vfree(equiv_cpu_table);
+		return 0;
+	}
 
-	return size + 12; /* add header length */
+	return size + UCODE_HEADER_SIZE; /* add header length */
+#undef UCODE_HEADER_SIZE
 }
 
-/* fake device for request_firmware */
-extern struct platform_device *microcode_pdev;
-
-static int cpu_request_microcode_amd(int cpu)
+static void free_equiv_cpu_table(void)
 {
-	char name[30];
-	const struct firmware *firmware;
-	void *buf;
-	unsigned int *buf_pos;
-	unsigned long size;
-	long offset = 0;
-	int error;
-	void *mc;
-
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
-
-	sprintf(name, "amd-ucode/microcode_amd.bin");
-	error = request_firmware(&firmware, "amd-ucode/microcode_amd.bin",
-				 &microcode_pdev->dev);
-	if (error) {
-		printk(KERN_ERR "microcode: ucode data file %s load failed\n",
-		       name);
-		return error;
+	if (equiv_cpu_table) {
+		vfree(equiv_cpu_table);
+		equiv_cpu_table = NULL;
 	}
+}
 
-	buf_pos = (unsigned int *)firmware->data;
-	buf = (void *)firmware->data;
-	size = firmware->size;
-
-	if (buf_pos[0] != UCODE_MAGIC) {
-		printk(KERN_ERR "microcode: error! Wrong microcode patch file magic\n");
-		return -EINVAL;
-	}
-
-	offset = install_equiv_cpu_table(buf, buf_pos[2], offset);
+static int generic_load_microcode(int cpu, void *data, size_t size,
+		int (*get_ucode_data)(void *, const void *, size_t))
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+	int new_rev = uci->cpu_sig.rev;
+	unsigned int leftover;
+	unsigned long offset;
 
+	offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data);
 	if (!offset) {
 		printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
 		return -EINVAL;
 	}
 
-	while ((offset =
-		get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0) {
-		error = get_matching_microcode_amd(mc, cpu);
-		if (error < 0)
+	ucode_ptr += offset;
+	leftover = size - offset;
+
+	while (leftover) {
+		unsigned int mc_size;
+		struct microcode_header_amd *mc_header;
+
+		mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size);
+		if (!mc)
 			break;
-		/*
-		 * It's possible the data file has multiple matching ucode,
-		 * lets keep searching till the latest version
-		 */
-		if (error == 1) {
-			apply_microcode_amd(cpu);
-			error = 0;
-		}
-		vfree(mc);
+
+		mc_header = (struct microcode_header_amd *)mc;
+		if (get_matching_microcode(cpu, mc, new_rev)) {
+			new_rev = mc_header->patch_id;
+			new_mc  = mc;
+		} else 
+			vfree(mc);
+
+		ucode_ptr += mc_size;
+		leftover  -= mc_size;
 	}
-	if (offset > 0) {
-		vfree(mc);
-		vfree(equiv_cpu_table);
-		equiv_cpu_table = NULL;
+
+	if (new_mc) {
+		if (!leftover) {
+			if (uci->mc.mc_amd)
+				vfree(uci->mc.mc_amd);
+			uci->mc.mc_amd = (struct microcode_amd *)new_mc;
+			pr_debug("microcode: CPU%d found a matching microcode update with"
+				" version 0x%x (current=0x%x)\n",
+				cpu, uci->mc.mc_amd->hdr.patch_id, uci->cpu_sig.rev);
+		} else
+			vfree(new_mc);
 	}
-	if (offset < 0)
-		error = offset;
+
+	free_equiv_cpu_table();
+
+	return (int)leftover;
+}
+
+static int get_ucode_fw(void *to, const void *from, size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
+
+static int request_microcode_fw(int cpu, struct device *device)
+{
+	const char *fw_name = "amd-ucode/microcode_amd.bin";
+	const struct firmware *firmware;
+	int ret;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu != raw_smp_processor_id());
+
+	ret = request_firmware(&firmware, fw_name, device);
+	if (ret) {
+		printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name);
+		return ret;
+	}
+
+	ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
+			&get_ucode_fw);
+
 	release_firmware(firmware);
 
-	return error;
+	return ret;
+}
+
+static int get_ucode_user(void *to, const void *from, size_t n)
+{
+	return copy_from_user(to, from, n);
+}
+
+static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+{
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu != raw_smp_processor_id());
+
+	return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
 }
 
 static void microcode_fini_cpu_amd(int cpu)
@@ -436,10 +419,8 @@ static void microcode_fini_cpu_amd(int cpu)
 }
 
 static struct microcode_ops microcode_amd_ops = {
-	.get_next_ucode                   = get_next_ucode_amd,
-	.get_matching_microcode           = get_matching_microcode_amd,
-	.microcode_sanity_check           = NULL,
-	.cpu_request_microcode            = cpu_request_microcode_amd,
+	.request_microcode_user           = request_microcode_user,
+	.request_microcode_fw             = request_microcode_fw,
 	.collect_cpu_info                 = collect_cpu_info_amd,
 	.apply_microcode                  = apply_microcode_amd,
 	.microcode_fini_cpu               = microcode_fini_cpu_amd,
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index c9b53202ba3d..f4930b55c6a0 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -155,15 +155,15 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 	return 0;
 }
 
-static inline int microcode_update_match(int cpu_num,
-	struct microcode_header_intel *mc_header, int sig, int pf)
+static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+	return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
+}
 
-	if (!sigmatch(sig, uci->cpu_sig.sig, pf, uci->cpu_sig.pf)
-		|| mc_header->rev <= uci->cpu_sig.rev)
-		return 0;
-	return 1;
+static inline int 
+update_match_revision(struct microcode_header_intel *mc_header,	int rev)
+{
+	return (mc_header->rev <= rev) ? 0 : 1;
 }
 
 static int microcode_sanity_check(void *mc)
@@ -248,51 +248,36 @@ static int microcode_sanity_check(void *mc)
 /*
  * return 0 - no update found
  * return 1 - found update
- * return < 0 - error
  */
-static int get_matching_microcode(void *mc, int cpu)
+static int
+get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header;
 	unsigned long total_size = get_totalsize(mc_header);
 	int ext_sigcount, i;
 	struct extended_signature *ext_sig;
-	void *new_mc;
 
-	if (microcode_update_match(cpu, mc_header,
-			mc_header->sig, mc_header->pf))
-		goto find;
+	if (!update_match_revision(mc_header, rev))
+		return 0;
 
+	if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
+		return 1;
+
+	/* Look for ext. headers: */
 	if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
 		return 0;
 
 	ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
 	ext_sigcount = ext_header->count;
 	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
 	for (i = 0; i < ext_sigcount; i++) {
-		if (microcode_update_match(cpu, mc_header,
-				ext_sig->sig, ext_sig->pf))
-			goto find;
+		if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
+			return 1;
 		ext_sig++;
 	}
 	return 0;
-find:
-	pr_debug("microcode: CPU%d found a matching microcode update with"
-		 " version 0x%x (current=0x%x)\n",
-		 cpu, mc_header->rev, uci->cpu_sig.rev);
-	new_mc = vmalloc(total_size);
-	if (!new_mc) {
-		printk(KERN_ERR "microcode: error! Can not allocate memory\n");
-		return -ENOMEM;
-	}
-
-	/* free previous update file */
-	vfree(uci->mc.mc_intel);
-
-	memcpy(new_mc, mc, total_size);
-	uci->mc.mc_intel = new_mc;
-	return 1;
 }
 
 static void apply_microcode(int cpu)
@@ -300,7 +285,7 @@ static void apply_microcode(int cpu)
 	unsigned long flags;
 	unsigned int val[2];
 	int cpu_num = raw_smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu_num != cpu);
@@ -338,116 +323,105 @@ static void apply_microcode(int cpu)
 	uci->cpu_sig.rev = val[1];
 }
 
-#ifdef CONFIG_MICROCODE_OLD_INTERFACE
-extern void __user *user_buffer;        /* user area microcode data buffer */
-extern unsigned int user_buffer_size;   /* it's size */
-
-static long get_next_ucode(void **mc, long offset)
+static int generic_load_microcode(int cpu, void *data, size_t size,
+		int (*get_ucode_data)(void *, const void *, size_t))
 {
-	struct microcode_header_intel mc_header;
-	unsigned long total_size;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+	int new_rev = uci->cpu_sig.rev;
+	unsigned int leftover = size;
 
-	/* No more data */
-	if (offset >= user_buffer_size)
-		return 0;
-	if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
-		printk(KERN_ERR "microcode: error! Can not read user data\n");
-		return -EFAULT;
-	}
-	total_size = get_totalsize(&mc_header);
-	if (offset + total_size > user_buffer_size) {
-		printk(KERN_ERR "microcode: error! Bad total size in microcode "
-				"data file\n");
-		return -EINVAL;
-	}
-	*mc = vmalloc(total_size);
-	if (!*mc)
-		return -ENOMEM;
-	if (copy_from_user(*mc, user_buffer + offset, total_size)) {
-		printk(KERN_ERR "microcode: error! Can not read user data\n");
-		vfree(*mc);
-		return -EFAULT;
-	}
-	return offset + total_size;
-}
-#endif
+	while (leftover) {
+		struct microcode_header_intel mc_header;
+		unsigned int mc_size;
 
-static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
-	unsigned long size, long offset)
-{
-	struct microcode_header_intel *mc_header;
-	unsigned long total_size;
+		if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
+			break;
 
-	/* No more data */
-	if (offset >= size)
-		return 0;
-	mc_header = (struct microcode_header_intel *)(buf + offset);
-	total_size = get_totalsize(mc_header);
+		mc_size = get_totalsize(&mc_header);
+		if (!mc_size || mc_size > leftover) {
+			printk(KERN_ERR "microcode: error!"
+					"Bad data in microcode data file\n");
+			break;
+		}
 
-	if (offset + total_size > size) {
-		printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
-		return -EINVAL;
+		mc = vmalloc(mc_size);
+		if (!mc)
+			break;
+
+		if (get_ucode_data(mc, ucode_ptr, mc_size) ||
+		    microcode_sanity_check(mc) < 0) {
+			vfree(mc);
+			break;
+		}
+
+		if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
+			new_rev = mc_header.rev;
+			new_mc  = mc;
+		} else
+			vfree(mc);
+
+		ucode_ptr += mc_size;
+		leftover  -= mc_size;
 	}
 
-	*mc = vmalloc(total_size);
-	if (!*mc) {
-		printk(KERN_ERR "microcode: error! Can not allocate memory\n");
-		return -ENOMEM;
+	if (new_mc) {
+		if (!leftover) {
+			if (uci->mc.mc_intel)
+				vfree(uci->mc.mc_intel);
+			uci->mc.mc_intel = (struct microcode_intel *)new_mc;
+			pr_debug("microcode: CPU%d found a matching microcode update with"
+				 " version 0x%x (current=0x%x)\n",
+				cpu, uci->mc.mc_intel->hdr.rev, uci->cpu_sig.rev);
+		} else
+			vfree(new_mc);
 	}
-	memcpy(*mc, buf + offset, total_size);
-	return offset + total_size;
+
+	return (int)leftover;
 }
 
-/* fake device for request_firmware */
-extern struct platform_device *microcode_pdev;
+static int get_ucode_fw(void *to, const void *from, size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
 
-static int cpu_request_microcode(int cpu)
+static int request_microcode_fw(int cpu, struct device *device)
 {
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	const struct firmware *firmware;
-	const u8 *buf;
-	unsigned long size;
-	long offset = 0;
-	int error;
-	void *mc;
+	int ret;
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu != raw_smp_processor_id());
 	sprintf(name, "intel-ucode/%02x-%02x-%02x",
 		c->x86, c->x86_model, c->x86_mask);
-	error = request_firmware(&firmware, name, &microcode_pdev->dev);
-	if (error) {
+	ret = request_firmware(&firmware, name, device);
+	if (ret) {
 		pr_debug("microcode: data file %s load failed\n", name);
-		return error;
+		return ret;
 	}
-	buf = firmware->data;
-	size = firmware->size;
-	while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
-			> 0) {
-		error = microcode_sanity_check(mc);
-		if (error)
-			break;
-		error = get_matching_microcode(mc, cpu);
-		if (error < 0)
-			break;
-		/*
-		 * It's possible the data file has multiple matching ucode,
-		 * lets keep searching till the latest version
-		 */
-		if (error == 1) {
-			apply_microcode(cpu);
-			error = 0;
-		}
-		vfree(mc);
-	}
-	if (offset > 0)
-		vfree(mc);
-	if (offset < 0)
-		error = offset;
+
+	ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
+			&get_ucode_fw);
+
 	release_firmware(firmware);
 
-	return error;
+	return ret;
+}
+
+static int get_ucode_user(void *to, const void *from, size_t n)
+{
+	return copy_from_user(to, from, n);
+}
+
+static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+{
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu != raw_smp_processor_id());
+
+	return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
 }
 
 static void microcode_fini_cpu(int cpu)
@@ -459,10 +433,8 @@ static void microcode_fini_cpu(int cpu)
 }
 
 static struct microcode_ops microcode_intel_ops = {
-	.get_next_ucode                   = get_next_ucode,
-	.get_matching_microcode           = get_matching_microcode,
-	.microcode_sanity_check           = microcode_sanity_check,
-	.cpu_request_microcode            = cpu_request_microcode,
+	.request_microcode_user		  = request_microcode_user,
+	.request_microcode_fw             = request_microcode_fw,
 	.collect_cpu_info                 = collect_cpu_info,
 	.apply_microcode                  = apply_microcode,
 	.microcode_fini_cpu               = microcode_fini_cpu,
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index 7ceff48fa657..e2887facdb4a 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -5,17 +5,16 @@ extern int microcode_init(void *opaque, struct module *module);
 extern void microcode_exit(void);
 
 struct cpu_signature;
+struct device;
 
 struct microcode_ops {
-	long (*get_next_ucode)(void **mc, long offset);
-	long (*microcode_get_next_ucode)(void **mc, long offset);
-	int (*get_matching_microcode)(void *mc, int cpu);
-	int (*microcode_sanity_check)(void *mc);
-	int (*cpu_request_microcode)(int cpu);
-	int (*collect_cpu_info)(int cpu_num, struct cpu_signature *csig);
-	void (*apply_microcode)(int cpu);
-	void (*microcode_fini_cpu)(int cpu);
-	void (*clear_patch)(void *data);
+	int  (*request_microcode_user) (int cpu, const void __user *buf, size_t size);
+	int  (*request_microcode_fw) (int cpu, struct device *device);
+
+	void (*apply_microcode) (int cpu);
+
+	int  (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
+	void (*microcode_fini_cpu) (int cpu);
 };
 
 struct microcode_header_intel {

From a1c75cc5018f17ff6d80ce45a13435b1536f76db Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Sep 2008 14:50:26 +0200
Subject: [PATCH 092/142] x86, microcode rework, v2, fix

based on patch from Dmitry Adamushko.

- add missing vfree()
- update debug printks

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode.c       | 4 +---
 arch/x86/kernel/microcode_amd.c   | 6 ++++--
 arch/x86/kernel/microcode_intel.c | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 902dada2eb6d..0c2634f4fd7c 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -70,8 +70,6 @@
  *		Fix sigmatch() macro to handle old CPUs with pf == 0.
  *		Thanks to Stuart Swales for pointing out this bug.
  */
-
-/* #define DEBUG pr_debug */
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -396,7 +394,7 @@ static int mc_sysdev_resume(struct sys_device *dev)
 
 	if (!cpu_online(cpu))
 		return 0;
-	pr_debug("microcode: CPU%d resumed\n", cpu);
+
 	/* only CPU 0 will apply ucode here */
 	microcode_update_cpu(0);
 	return 0;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 6815837a7753..48aec9f48e4f 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -92,7 +92,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 	unsigned int i = 0;
 
 	/*
-	 * dimm: do we need this? Why an update via /dev/... is different
+	 * FIXME! dimm: do we need this? Why an update via /dev/... is different
 	 * from the one via firmware?
 	 *
 	 * This is a tricky part. We might be called from a write operation
@@ -246,7 +246,7 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
 		return NULL;
 	}
 
-	/* Why not by means of get_totalsize(hdr)? */
+	/* FIXME! dimm: Why not by means of get_totalsize(hdr)? */
 	total_size = (unsigned long) (hdr[4] + (hdr[5] << 8));
 
 	printk(KERN_INFO "microcode: size %u, total_size %u\n",
@@ -342,6 +342,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 
 		mc_header = (struct microcode_header_amd *)mc;
 		if (get_matching_microcode(cpu, mc, new_rev)) {
+			if (new_mc)
+				vfree(new_mc);
 			new_rev = mc_header->patch_id;
 			new_mc  = mc;
 		} else 
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index f4930b55c6a0..48ed3cef58c1 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,8 +70,6 @@
  *		Fix sigmatch() macro to handle old CPUs with pf == 0.
  *		Thanks to Stuart Swales for pointing out this bug.
  */
-
-/* #define DEBUG */ /* pr_debug */
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -356,6 +354,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 		}
 
 		if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
+			if (new_mc)
+				vfree(new_mc);
 			new_rev = mc_header.rev;
 			new_mc  = mc;
 		} else

From 3d0aedd9538e6be8afec1a9d8b084bf90bc91495 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 12 Sep 2008 17:01:09 -0700
Subject: [PATCH 093/142] x86: signal: put give_sigsegv of setup frames
 together

When setup frame fails, force_sigsegv is called and returns -EFAULT.
There is similar code in ia32_setup_frame(), ia32_setup_rt_frame(),
__setup_frame() and __setup_rt_frame().

Make them identical.

No change in functionality intended.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 26 +++++++++-----------------
 arch/x86/kernel/signal_32.c | 31 ++++++++++++++-----------------
 arch/x86/kernel/signal_64.c | 17 +++++++++--------
 3 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 8d64c1bc8474..5f42cfcc1c5a 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -444,21 +444,21 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 	err |= __put_user(sig, &frame->sig);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	if (_COMPAT_NSIG_WORDS > 1) {
 		err |= __copy_to_user(frame->extramask, &set->sig[1],
 				      sizeof(frame->extramask));
 		if (err)
-			goto give_sigsegv;
+			return -EFAULT;
 	}
 
 	if (ka->sa.sa_flags & SA_RESTORER) {
@@ -479,7 +479,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	 */
 	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long) frame;
@@ -502,10 +502,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 #endif
 
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -533,14 +529,14 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
 	err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
 	err |= copy_siginfo_to_user32(&frame->info, info);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Create the ucontext.  */
 	if (cpu_has_xsave)
@@ -556,7 +552,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 				     regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
@@ -571,7 +567,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 */
 	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long) frame;
@@ -599,8 +595,4 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 #endif
 
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index b668efc18eae..1c22e0067fe7 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -349,21 +349,21 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 	err = __put_user(sig, &frame->sig);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	if (_NSIG_WORDS > 1) {
 		err = __copy_to_user(&frame->extramask, &set->sig[1],
 				      sizeof(frame->extramask));
 		if (err)
-			goto give_sigsegv;
+			return -EFAULT;
 	}
 
 	if (current->mm->context.vdso)
@@ -388,7 +388,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
 
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long)frame;
@@ -403,10 +403,6 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	regs->cs = __USER_CS;
 
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 
 static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -420,14 +416,14 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, info);
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Create the ucontext.  */
 	if (cpu_has_xsave)
@@ -443,7 +439,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 				regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Set up to return from userspace.  */
 	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -463,7 +459,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
 
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long)frame;
@@ -478,10 +474,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->cs = __USER_CS;
 
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 
 /*
@@ -506,6 +498,11 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	else
 		ret = __setup_frame(usig, ka, set, regs);
 
+	if (ret) {
+		force_sigsegv(sig, current);
+		return -EFAULT;
+	}
+
 	return ret;
 }
 
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 43e6862fc7a2..3b79e179ba3a 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -215,12 +215,12 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 	if (ka->sa.sa_flags & SA_SIGINFO) {
 		err |= copy_siginfo_to_user(&frame->info, info);
 		if (err)
-			goto give_sigsegv;
+			return -EFAULT;
 	}
 
 	/* Create the ucontext.  */
@@ -248,11 +248,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
 	} else {
 		/* could use a vstub here */
-		goto give_sigsegv;
+		return -EFAULT;
 	}
 
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* Set up registers for signal handler */
 	regs->di = sig;
@@ -272,10 +272,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->cs = __USER_CS;
 
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 
 /*
@@ -297,6 +293,11 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 #endif
 	ret = __setup_rt_frame(sig, ka, info, set, regs);
 
+	if (ret) {
+		force_sigsegv(sig, current);
+		return -EFAULT;
+	}
+
 	return ret;
 }
 

From 2ba48e16e78216bb5b9fd08a088bfefda478df25 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 12 Sep 2008 17:02:53 -0700
Subject: [PATCH 094/142] x86: signal: remove unneeded err handling

This patch eliminates unused or unneeded variable handling.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 11 ++++-------
 arch/x86/kernel/signal_32.c | 11 ++++-------
 arch/x86/kernel/signal_64.c |  5 ++---
 3 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 5f42cfcc1c5a..e47bed2440ee 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -446,18 +446,15 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	err |= __put_user(sig, &frame->sig);
-	if (err)
+	if (__put_user(sig, &frame->sig))
 		return -EFAULT;
 
-	err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
-	if (err)
+	if (ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
 		return -EFAULT;
 
 	if (_COMPAT_NSIG_WORDS > 1) {
-		err |= __copy_to_user(frame->extramask, &set->sig[1],
-				      sizeof(frame->extramask));
-		if (err)
+		if (__copy_to_user(frame->extramask, &set->sig[1],
+				   sizeof(frame->extramask)))
 			return -EFAULT;
 	}
 
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 1c22e0067fe7..d433861a6599 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -351,18 +351,15 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	err = __put_user(sig, &frame->sig);
-	if (err)
+	if (__put_user(sig, &frame->sig))
 		return -EFAULT;
 
-	err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
-	if (err)
+	if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
 		return -EFAULT;
 
 	if (_NSIG_WORDS > 1) {
-		err = __copy_to_user(&frame->extramask, &set->sig[1],
-				      sizeof(frame->extramask));
-		if (err)
+		if (__copy_to_user(&frame->extramask, &set->sig[1],
+				   sizeof(frame->extramask)))
 			return -EFAULT;
 	}
 
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 3b79e179ba3a..a21c85197295 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -210,7 +210,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 
 		if (save_i387_xstate(fp) < 0)
-			err |= -1;
+			return -EFAULT;
 	} else
 		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
 
@@ -218,8 +218,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		return -EFAULT;
 
 	if (ka->sa.sa_flags & SA_SIGINFO) {
-		err |= copy_siginfo_to_user(&frame->info, info);
-		if (err)
+		if (copy_siginfo_to_user(&frame->info, info))
 			return -EFAULT;
 	}
 

From e6babb6b7fed93c93f8fc5ef8ebd3a474fc2df3e Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 12 Sep 2008 17:03:31 -0700
Subject: [PATCH 095/142] x86: signal: introduce do_rt_sigreturn()

introduce do_rt_sigreturn(), to collect common part of sys_rt_sigreturn().

No change in functionality intended.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 12 +++++++++---
 arch/x86/kernel/signal_64.c | 11 ++++++++---
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d433861a6599..da3cf3270f83 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -215,9 +215,8 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 	return 0;
 }
 
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+static long do_rt_sigreturn(struct pt_regs *regs)
 {
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
 	struct rt_sigframe __user *frame;
 	unsigned long ax;
 	sigset_t set;
@@ -243,10 +242,17 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 	return ax;
 
 badframe:
-	signal_fault(regs, frame, "rt sigreturn");
+	signal_fault(regs, frame, "rt_sigreturn");
 	return 0;
 }
 
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+	return do_rt_sigreturn(regs);
+}
+
 /*
  * Set up a signal frame.
  */
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index a21c85197295..bf77d4789a2d 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -104,11 +104,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	return err;
 }
 
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+static long do_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
-	sigset_t set;
 	unsigned long ax;
+	sigset_t set;
 
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -131,10 +131,15 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	return ax;
 
 badframe:
-	signal_fault(regs, frame, "sigreturn");
+	signal_fault(regs, frame, "rt_sigreturn");
 	return 0;
 }
 
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
+
 /*
  * Set up a signal frame.
  */

From 5649b7c30316a51792808422ac03ee825d26aa5e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 16 Sep 2008 09:29:09 +0200
Subject: [PATCH 096/142] x86: add DMI quirk for AMI BIOS which corrupts
 address 0xc000 during resume

Alan Jenkins and Andy Wettstein reported a suspend/resume memory
corruption bug and extensively documented it here:

   http://bugzilla.kernel.org/show_bug.cgi?id=11237

The bug is that the BIOS overwrites 1K of memory at 0xc000 physical,
without registering it in e820 as reserved or giving the kernel any
idea about this.

Detect AMI BIOSen and reserve that 1K.

We paint this bug around with a very broad brush (reserving that 1K on all
AMI BIOS systems), as the bug was extremely hard to find and needed several
weeks and lots of debugging and patching.

The bug was found via the CONFIG_X86_CHECK_BIOS_CORRUPTION=y debug feature,
if similar bugs are suspected then this feature can be enabled on other
systems as well to scan low memory for corrupted memory.

Reported-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Reported-by: Andy Wettstein <ajw1980@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ec7e56c1b984..3109ca37a67c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -729,6 +729,29 @@ void start_periodic_check_for_corruption(void)
 }
 #endif
 
+static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
+{
+	printk(KERN_NOTICE
+		"%s detected: BIOS corrupts 0xc000, working it around.\n",
+		d->ident);
+
+	reserve_early(0xc000, 0xc400, "BIOS quirk");
+
+	return 0;
+}
+
+/* List of systems that have known low memory corruption BIOS problems */
+static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
+	{
+		.callback = dmi_low_memory_corruption,
+		.ident = "AMI BIOS",
+		.matches = {
+			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+		},
+	},
+	{}
+};
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -752,6 +775,8 @@ void __init setup_arch(char **cmdline_p)
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
 
+	dmi_check_system(bad_bios_dmi_table);
+
 	early_cpu_init();
 	early_ioremap_init();
 
@@ -1037,3 +1062,5 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 }
+
+

From 1e22436eba84edfec9c25e5a25d09062c4f91ca9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 16 Sep 2008 09:58:02 +0200
Subject: [PATCH 097/142] x86: reserve low 64K on AMI and Phoenix BIOS boxen

there's multiple reports about suspend/resume related low memory
corruption in this bugzilla:

  http://bugzilla.kernel.org/show_bug.cgi?id=11237

the common pattern is that the corruption is caused by the BIOS,
and that it affects some portion of the first 64K of physical RAM.

So add a DMI quirk

This will waste 64K RAM on 'good' systems too, but without knowing
the exact nature of this BIOS memory corruption this is the safest
approach.

This might as well solve a wide range of suspend/resume breakages
under Linux.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3109ca37a67c..33719544a224 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -732,10 +732,10 @@ void start_periodic_check_for_corruption(void)
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
 	printk(KERN_NOTICE
-		"%s detected: BIOS corrupts 0xc000, working it around.\n",
+		"%s detected: BIOS may corrupt low RAM, working it around.\n",
 		d->ident);
 
-	reserve_early(0xc000, 0xc400, "BIOS quirk");
+	reserve_early(0x0, 0x10000, "BIOS quirk");
 
 	return 0;
 }
@@ -749,6 +749,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
 		},
 	},
+	{
+		.callback = dmi_low_memory_corruption,
+		.ident = "Phoenix BIOS",
+		.matches = {
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+		},
+	},
 	{}
 };
 

From fc38151947477596aa27df6c4306ad6008dc6711 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 16 Sep 2008 10:07:34 +0200
Subject: [PATCH 098/142] x86: add X86_RESERVE_LOW_64K

This bugzilla:

  http://bugzilla.kernel.org/show_bug.cgi?id=11237

Documents a wide range of systems where the BIOS utilizes the first
64K of physical memory during suspend/resume and other hardware events.

Currently we reserve this memory on all AMI and Phoenix BIOS systems.
Life is too short to hunt subtle memory corruption problems like this,
so we try to be robust by default.

Still, allow this to be overriden: allow users who want that first 64K
of memory to be available to the kernel disable the quirk, via
CONFIG_X86_RESERVE_LOW_64K=n.

Also, allow the early reservation to overlap with other
early reservations.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig        | 20 ++++++++++++++++++++
 arch/x86/kernel/setup.c |  4 +++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7820d447bb8d..633f25dd9ee2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1089,6 +1089,26 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
 	 Set whether the default state of memory_corruption_check is
 	 on or off.
 
+config X86_RESERVE_LOW_64K
+        bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
+	default y
+	help
+	 Reserve the first 64K of physical RAM on BIOSes that are known
+	 to potentially corrupt that memory range. A numbers of BIOSes are
+	 known to utilize this area during suspend/resume, so it must not
+	 be used by the kernel.
+
+	 Set this to N if you are absolutely sure that you trust the BIOS
+	 to get all its memory reservations and usages right.
+
+	 If you have doubts about the BIOS (e.g. suspend/resume does not
+	 work or there's kernel crashes after certain hardware hotplug
+	 events) and it's not AMI or Phoenix, then you might want to enable
+	 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
+	 corruption patterns.
+
+	 Say Y if unsure.
+
 config MATH_EMULATION
 	bool
 	prompt "Math emulation" if X86_32
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 33719544a224..786c1886ae53 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -735,13 +735,14 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 		"%s detected: BIOS may corrupt low RAM, working it around.\n",
 		d->ident);
 
-	reserve_early(0x0, 0x10000, "BIOS quirk");
+	reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk");
 
 	return 0;
 }
 
 /* List of systems that have known low memory corruption BIOS problems */
 static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
+#ifdef CONFIG_X86_RESERVE_LOW_64K
 	{
 		.callback = dmi_low_memory_corruption,
 		.ident = "AMI BIOS",
@@ -757,6 +758,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
 		},
 	},
 	{}
+#endif
 };
 
 /*

From 5871c6b0a52bb052c3891a787655043b90ae18e3 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 20 Sep 2008 20:35:44 -0700
Subject: [PATCH 099/142] x86: use round_jiffies() for the corruption check
 timer

the exact timing of the corruption check isn't too important (it's once a
minute timer), use round_jiffies() to align it and avoid extra wakeups.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 786c1886ae53..161f1b33ecec 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -712,7 +712,7 @@ void check_for_bios_corruption(void)
 static void periodic_check_for_corruption(unsigned long data)
 {
 	check_for_bios_corruption();
-	mod_timer(&periodic_check_timer, jiffies + corruption_check_period*HZ);
+	mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
 }
 
 void start_periodic_check_for_corruption(void)

From 2216d199b1430d1c0affb1498a9ebdbd9c0de439 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 22 Sep 2008 02:52:26 -0700
Subject: [PATCH 100/142] x86: fix CONFIG_X86_RESERVE_LOW_64K=y

The bad_bios_dmi_table() quirk never triggered because we do DMI setup
too late. Move it a bit earlier.

Also change the CONFIG_X86_RESERVE_LOW_64K quirk to operate on the e820
table directly instead of messing with early reservations - this handles
overlaps (which do occur in this low range of RAM) more gracefully.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 161f1b33ecec..d29951c11be0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -735,7 +735,8 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 		"%s detected: BIOS may corrupt low RAM, working it around.\n",
 		d->ident);
 
-	reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk");
+	e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
 	return 0;
 }
@@ -784,8 +785,6 @@ void __init setup_arch(char **cmdline_p)
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
 
-	dmi_check_system(bad_bios_dmi_table);
-
 	early_cpu_init();
 	early_ioremap_init();
 
@@ -880,6 +879,10 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
+	dmi_scan_machine();
+
+	dmi_check_system(bad_bios_dmi_table);
+
 #ifdef CONFIG_X86_32
 	probe_roms();
 #endif
@@ -967,8 +970,6 @@ void __init setup_arch(char **cmdline_p)
 	vsmp_init();
 #endif
 
-	dmi_scan_machine();
-
 	io_delay_init();
 
 	/*

From a8b71a2810386a5ac8f43d2095fe3355f0d8db37 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 23 Sep 2008 00:35:33 -0700
Subject: [PATCH 101/142] x86: fix macro with bad_bios_dmi_table

DMI tables need a blank NULL tail.

fixes the crash on Ingo's test box.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d29951c11be0..3ce3edfcc3cd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -758,8 +758,8 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
 		},
 	},
-	{}
 #endif
+	{}
 };
 
 /*

From 18dbc9160507dc7df998e00cd1dcd7889557307b Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Tue, 23 Sep 2008 12:08:44 +0200
Subject: [PATCH 102/142] x86: moved microcode.c to microcode_intel.c

Combine both generic and arch-specific parts of microcode into a
single module (arch-specific parts are config-dependent).

Also while we are at it, move arch-specific parts from microcode.h
into their respective arch-specific .c files.

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Cc: "Peter Oruba" <peter.oruba@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                              | 10 +-
 arch/x86/kernel/Makefile                      |  8 +-
 arch/x86/kernel/microcode_amd.c               | 72 ++++++++------
 .../kernel/{microcode.c => microcode_core.c}  | 26 ++---
 arch/x86/kernel/microcode_intel.c             | 79 +++++++++------
 include/asm-x86/microcode.h                   | 99 +++++--------------
 6 files changed, 138 insertions(+), 156 deletions(-)
 rename arch/x86/kernel/{microcode.c => microcode_core.c} (96%)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0e5bf1eddcea..2e6080951350 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -801,7 +801,7 @@ config MICROCODE
 	  module will be called microcode.
 
 config MICROCODE_INTEL
-       tristate "Intel microcode patch loading support"
+       bool "Intel microcode patch loading support"
        depends on MICROCODE
        default MICROCODE
        select FW_LOADER
@@ -813,20 +813,14 @@ config MICROCODE_INTEL
          Intel ingredients for this driver, check:
          <http://www.urbanmyth.org/microcode/>.
 
-         This driver is only available as a module: the module
-         will be called microcode_intel.  
-
 config MICROCODE_AMD
-       tristate "AMD microcode patch loading support"
+       bool "AMD microcode patch loading support"
        depends on MICROCODE
        select FW_LOADER
        --help---
          If you select this option, microcode patch loading support for AMD
 	 processors will be enabled.
 
-	 This driver is only available as a module: the module
-	 will be called microcode_amd.
-
    config MICROCODE_OLD_INTERFACE
 	def_bool y
 	depends on MICROCODE
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index be454f348c3b..f891996f6849 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -51,9 +51,6 @@ obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
 obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
-obj-$(CONFIG_MICROCODE)		+= microcode.o
-obj-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
-obj-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
@@ -101,6 +98,11 @@ scx200-y			+= scx200_32.o
 
 obj-$(CONFIG_OLPC)		+= olpc.o
 
+microcode-y				:= microcode_core.o
+microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
+microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
+obj-$(CONFIG_MICROCODE)			+= microcode.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 48aec9f48e4f..03ea4e52e87a 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -46,6 +46,35 @@ MODULE_LICENSE("GPL v2");
 #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
 #define UCODE_UCODE_TYPE           0x00000001
 
+struct equiv_cpu_entry {
+	unsigned int installed_cpu;
+	unsigned int fixed_errata_mask;
+	unsigned int fixed_errata_compare;
+	unsigned int equiv_cpu;
+};
+
+struct microcode_header_amd {
+	unsigned int  data_code;
+	unsigned int  patch_id;
+	unsigned char mc_patch_data_id[2];
+	unsigned char mc_patch_data_len;
+	unsigned char init_flag;
+	unsigned int  mc_patch_data_checksum;
+	unsigned int  nb_dev_id;
+	unsigned int  sb_dev_id;
+	unsigned char processor_rev_id[2];
+	unsigned char nb_rev_id;
+	unsigned char sb_rev_id;
+	unsigned char bios_api_rev;
+	unsigned char reserved1[3];
+	unsigned int  match_reg[8];
+};
+
+struct microcode_amd {
+	struct microcode_header_amd hdr;
+	unsigned int mpb[0];
+};
+
 #define UCODE_MAX_SIZE          (2048)
 #define DEFAULT_UCODE_DATASIZE	(896)
 #define MC_HEADER_SIZE		(sizeof(struct microcode_header_amd))
@@ -189,17 +218,18 @@ static void apply_microcode_amd(int cpu)
 	unsigned int rev;
 	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+	struct microcode_amd *mc_amd = uci->mc;
 	unsigned long addr;
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu_num != cpu);
 
-	if (uci->mc.mc_amd == NULL)
+	if (mc_amd == NULL)
 		return;
 
 	spin_lock_irqsave(&microcode_update_lock, flags);
 
-	addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code;
+	addr = (unsigned long)&mc_amd->hdr.data_code;
 	edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
 	eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
 
@@ -214,16 +244,16 @@ static void apply_microcode_amd(int cpu)
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
 
 	/* check current patch id and patch's id for match */
-	if (rev != uci->mc.mc_amd->hdr.patch_id) {
+	if (rev != mc_amd->hdr.patch_id) {
 		printk(KERN_ERR "microcode: CPU%d update from revision "
 		       "0x%x to 0x%x failed\n", cpu_num,
-		       uci->mc.mc_amd->hdr.patch_id, rev);
+		       mc_amd->hdr.patch_id, rev);
 		return;
 	}
 
 	printk(KERN_INFO "microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x \n",
-	       cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id);
+	       cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
 
 	uci->cpu_sig.rev = rev;
 }
@@ -355,12 +385,12 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 
 	if (new_mc) {
 		if (!leftover) {
-			if (uci->mc.mc_amd)
-				vfree(uci->mc.mc_amd);
-			uci->mc.mc_amd = (struct microcode_amd *)new_mc;
+			if (uci->mc)
+				vfree(uci->mc);
+			uci->mc = new_mc;
 			pr_debug("microcode: CPU%d found a matching microcode update with"
 				" version 0x%x (current=0x%x)\n",
-				cpu, uci->mc.mc_amd->hdr.patch_id, uci->cpu_sig.rev);
+				cpu, new_rev, uci->cpu_sig.rev);
 		} else
 			vfree(new_mc);
 	}
@@ -416,8 +446,8 @@ static void microcode_fini_cpu_amd(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	vfree(uci->mc.mc_amd);
-	uci->mc.mc_amd = NULL;
+	vfree(uci->mc);
+	uci->mc = NULL;
 }
 
 static struct microcode_ops microcode_amd_ops = {
@@ -428,23 +458,7 @@ static struct microcode_ops microcode_amd_ops = {
 	.microcode_fini_cpu               = microcode_fini_cpu_amd,
 };
 
-static int __init microcode_amd_module_init(void)
+struct microcode_ops * __init init_amd_microcode(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	equiv_cpu_table = NULL;
-	if (c->x86_vendor != X86_VENDOR_AMD) {
-		printk(KERN_ERR "microcode: CPU platform is not AMD-capable\n");
-		return -ENODEV;
-	}
-
-	return microcode_init(&microcode_amd_ops, THIS_MODULE);
+	return &microcode_amd_ops;
 }
-
-static void __exit microcode_amd_module_exit(void)
-{
-	microcode_exit();
-}
-
-module_init(microcode_amd_module_init)
-module_exit(microcode_amd_module_exit)
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode_core.c
similarity index 96%
rename from arch/x86/kernel/microcode.c
rename to arch/x86/kernel/microcode_core.c
index 0c2634f4fd7c..ff031dbccdf6 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -298,7 +298,7 @@ static int microcode_resume_cpu(int cpu)
 
 	pr_debug("microcode: CPU%d resumed\n", cpu);
 
-	if (!uci->mc.valid_mc)
+	if (!uci->mc)
 		return 1;
 
 	/*
@@ -443,17 +443,20 @@ static struct notifier_block __refdata mc_cpu_notifier = {
 	.notifier_call = mc_cpu_callback,
 };
 
-int microcode_init(void *opaque, struct module *module)
+static int __init microcode_init(void)
 {
-	struct microcode_ops *ops = (struct microcode_ops *)opaque;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	int error;
 
-	if (microcode_ops) {
-		printk(KERN_ERR "microcode: already loaded the other module\n");
-		return -EEXIST;
-	}
+	if (c->x86_vendor == X86_VENDOR_INTEL)
+		microcode_ops = init_intel_microcode();
+	else if (c->x86_vendor != X86_VENDOR_AMD)
+		microcode_ops = init_amd_microcode();
 
-	microcode_ops = ops;
+	if (!microcode_ops) {
+		printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+		return -ENODEV;
+	}
 
 	error = microcode_dev_init();
 	if (error)
@@ -483,9 +486,8 @@ int microcode_init(void *opaque, struct module *module)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(microcode_init);
 
-void __exit microcode_exit(void)
+static void __exit microcode_exit(void)
 {
 	microcode_dev_exit();
 
@@ -502,4 +504,6 @@ void __exit microcode_exit(void)
 	printk(KERN_INFO
 	       "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }
-EXPORT_SYMBOL_GPL(microcode_exit);
+
+module_init(microcode_init);
+module_exit(microcode_exit);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 48ed3cef58c1..622dc4a21784 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -97,6 +97,38 @@ MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
 MODULE_LICENSE("GPL");
 
+struct microcode_header_intel {
+	unsigned int            hdrver;
+	unsigned int            rev;
+	unsigned int            date;
+	unsigned int            sig;
+	unsigned int            cksum;
+	unsigned int            ldrver;
+	unsigned int            pf;
+	unsigned int            datasize;
+	unsigned int            totalsize;
+	unsigned int            reserved[3];
+};
+
+struct microcode_intel {
+	struct microcode_header_intel hdr;
+	unsigned int            bits[0];
+};
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+	unsigned int            sig;
+	unsigned int            pf;
+	unsigned int            cksum;
+};
+
+struct extended_sigtable {
+	unsigned int            count;
+	unsigned int            cksum;
+	unsigned int            reserved[3];
+	struct extended_signature sigs[0];
+};
+
 #define DEFAULT_UCODE_DATASIZE 	(2000)
 #define MC_HEADER_SIZE		(sizeof(struct microcode_header_intel))
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
@@ -284,11 +316,12 @@ static void apply_microcode(int cpu)
 	unsigned int val[2];
 	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct microcode_intel *mc_intel = uci->mc;
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu_num != cpu);
 
-	if (uci->mc.mc_intel == NULL)
+	if (mc_intel == NULL)
 		return;
 
 	/* serialize access to the physical write to MSR 0x79 */
@@ -296,8 +329,8 @@ static void apply_microcode(int cpu)
 
 	/* write microcode via MSR 0x79 */
 	wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) uci->mc.mc_intel->bits,
-	      (unsigned long) uci->mc.mc_intel->bits >> 16 >> 16);
+	      (unsigned long) mc_intel->bits,
+	      (unsigned long) mc_intel->bits >> 16 >> 16);
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 
 	/* see notes above for revision 1.07.  Apparent chip bug */
@@ -307,7 +340,7 @@ static void apply_microcode(int cpu)
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
-	if (val[1] != uci->mc.mc_intel->hdr.rev) {
+	if (val[1] != mc_intel->hdr.rev) {
 		printk(KERN_ERR "microcode: CPU%d update from revision "
 			"0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]);
 		return;
@@ -315,9 +348,9 @@ static void apply_microcode(int cpu)
 	printk(KERN_INFO "microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x, date = %04x-%02x-%02x \n",
 		cpu_num, uci->cpu_sig.rev, val[1],
-		uci->mc.mc_intel->hdr.date & 0xffff,
-		uci->mc.mc_intel->hdr.date >> 24,
-		(uci->mc.mc_intel->hdr.date >> 16) & 0xff);
+		mc_intel->hdr.date & 0xffff,
+		mc_intel->hdr.date >> 24,
+		(mc_intel->hdr.date >> 16) & 0xff);
 	uci->cpu_sig.rev = val[1];
 }
 
@@ -367,12 +400,12 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 
 	if (new_mc) {
 		if (!leftover) {
-			if (uci->mc.mc_intel)
-				vfree(uci->mc.mc_intel);
-			uci->mc.mc_intel = (struct microcode_intel *)new_mc;
+			if (uci->mc)
+				vfree(uci->mc);
+			uci->mc = (struct microcode_intel *)new_mc;
 			pr_debug("microcode: CPU%d found a matching microcode update with"
 				 " version 0x%x (current=0x%x)\n",
-				cpu, uci->mc.mc_intel->hdr.rev, uci->cpu_sig.rev);
+				cpu, new_rev, uci->cpu_sig.rev);
 		} else
 			vfree(new_mc);
 	}
@@ -428,11 +461,11 @@ static void microcode_fini_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	vfree(uci->mc.mc_intel);
-	uci->mc.mc_intel = NULL;
+	vfree(uci->mc);
+	uci->mc = NULL;
 }
 
-static struct microcode_ops microcode_intel_ops = {
+struct microcode_ops microcode_intel_ops = {
 	.request_microcode_user		  = request_microcode_user,
 	.request_microcode_fw             = request_microcode_fw,
 	.collect_cpu_info                 = collect_cpu_info,
@@ -440,22 +473,8 @@ static struct microcode_ops microcode_intel_ops = {
 	.microcode_fini_cpu               = microcode_fini_cpu,
 };
 
-static int __init microcode_intel_module_init(void)
+struct microcode_ops * __init init_intel_microcode(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (c->x86_vendor != X86_VENDOR_INTEL) {
-                printk(KERN_ERR "microcode: CPU platform is not Intel-capable\n");
-		return -ENODEV;
-	}
-
-	return microcode_init(&microcode_intel_ops, THIS_MODULE);
+	return &microcode_intel_ops;
 }
 
-static void __exit microcode_intel_module_exit(void)
-{
-	microcode_exit();
-}
-
-module_init(microcode_intel_module_init)
-module_exit(microcode_intel_module_exit)
diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h
index e2887facdb4a..62c793bb70ca 100644
--- a/include/asm-x86/microcode.h
+++ b/include/asm-x86/microcode.h
@@ -1,10 +1,12 @@
 #ifndef ASM_X86__MICROCODE_H
 #define ASM_X86__MICROCODE_H
 
-extern int microcode_init(void *opaque, struct module *module);
-extern void microcode_exit(void);
+struct cpu_signature {
+	unsigned int sig;
+	unsigned int pf;
+	unsigned int rev;
+};
 
-struct cpu_signature;
 struct device;
 
 struct microcode_ops {
@@ -17,82 +19,29 @@ struct microcode_ops {
 	void (*microcode_fini_cpu) (int cpu);
 };
 
-struct microcode_header_intel {
-	unsigned int            hdrver;
-	unsigned int            rev;
-	unsigned int            date;
-	unsigned int            sig;
-	unsigned int            cksum;
-	unsigned int            ldrver;
-	unsigned int            pf;
-	unsigned int            datasize;
-	unsigned int            totalsize;
-	unsigned int            reserved[3];
-};
-
-struct microcode_intel {
-	struct microcode_header_intel hdr;
-	unsigned int            bits[0];
-};
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
-	unsigned int            sig;
-	unsigned int            pf;
-	unsigned int            cksum;
-};
-
-struct extended_sigtable {
-	unsigned int            count;
-	unsigned int            cksum;
-	unsigned int            reserved[3];
-	struct extended_signature sigs[0];
-};
-
-struct equiv_cpu_entry {
-	unsigned int installed_cpu;
-	unsigned int fixed_errata_mask;
-	unsigned int fixed_errata_compare;
-	unsigned int equiv_cpu;
-};
-
-struct microcode_header_amd {
-	unsigned int  data_code;
-	unsigned int  patch_id;
-	unsigned char mc_patch_data_id[2];
-	unsigned char mc_patch_data_len;
-	unsigned char init_flag;
-	unsigned int  mc_patch_data_checksum;
-	unsigned int  nb_dev_id;
-	unsigned int  sb_dev_id;
-	unsigned char processor_rev_id[2];
-	unsigned char nb_rev_id;
-	unsigned char sb_rev_id;
-	unsigned char bios_api_rev;
-	unsigned char reserved1[3];
-	unsigned int  match_reg[8];
-};
-
-struct microcode_amd {
-	struct microcode_header_amd hdr;
-	unsigned int mpb[0];
-};
-
-struct cpu_signature {
-	unsigned int sig;
-	unsigned int pf;
-	unsigned int rev;
-};
-
 struct ucode_cpu_info {
 	struct cpu_signature cpu_sig;
 	int valid;
-	union {
-		struct microcode_intel *mc_intel;
-		struct microcode_amd *mc_amd;
-		void *valid_mc;
-	} mc;
+	void *mc;
 };
 extern struct ucode_cpu_info ucode_cpu_info[];
 
+#ifdef CONFIG_MICROCODE_INTEL
+extern struct microcode_ops * __init init_intel_microcode(void);
+#else
+static inline struct microcode_ops * __init init_intel_microcode(void)
+{
+	return NULL;
+}
+#endif /* CONFIG_MICROCODE_INTEL */
+
+#ifdef CONFIG_MICROCODE_AMD
+extern struct microcode_ops * __init init_amd_microcode(void);
+#else
+static inline struct microcode_ops * __init init_amd_microcode(void)
+{
+	return NULL;
+}
+#endif
+
 #endif /* ASM_X86__MICROCODE_H */

From da654b74bda14c45a7d98c731bf3c1a43b6b74e2 Mon Sep 17 00:00:00 2001
From: Srinivasa Ds <srinivasa@in.ibm.com>
Date: Tue, 23 Sep 2008 15:23:52 +0530
Subject: [PATCH 103/142] signals: demultiplexing SIGTRAP signal

Currently a SIGTRAP can denote any one of below reasons.
	- Breakpoint hit
	- H/W debug register hit
	- Single step
	- Signal sent through kill() or rasie()

Architectures like powerpc/parisc provides infrastructure to demultiplex
SIGTRAP signal by passing down the information for receiving SIGTRAP through
si_code of siginfot_t structure. Here is an attempt is generalise this
infrastructure by extending it to x86 and x86_64 archs.

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: akpm@linux-foundation.org
Cc: paulus@samba.org
Cc: linuxppc-dev@ozlabs.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/siginfo.h    |  5 -----
 arch/powerpc/include/asm/siginfo.h |  5 -----
 arch/x86/kernel/ptrace.c           |  7 ++++---
 arch/x86/kernel/traps_32.c         |  4 +++-
 arch/x86/kernel/traps_64.c         |  2 +-
 include/asm-generic/siginfo.h      |  2 ++
 include/asm-parisc/siginfo.h       |  5 -----
 include/asm-x86/ptrace.h           |  2 +-
 include/asm-x86/traps.h            | 10 ++++++++++
 9 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/ia64/include/asm/siginfo.h b/arch/ia64/include/asm/siginfo.h
index 9294e4b0c8bc..118d42979003 100644
--- a/arch/ia64/include/asm/siginfo.h
+++ b/arch/ia64/include/asm/siginfo.h
@@ -113,11 +113,6 @@ typedef struct siginfo {
 #undef NSIGSEGV
 #define NSIGSEGV	3
 
-/*
- * SIGTRAP si_codes
- */
-#define TRAP_BRANCH	(__SI_FAULT|3)	/* process taken branch trap */
-#define TRAP_HWBKPT	(__SI_FAULT|4)	/* hardware breakpoint or watchpoint */
 #undef NSIGTRAP
 #define NSIGTRAP	4
 
diff --git a/arch/powerpc/include/asm/siginfo.h b/arch/powerpc/include/asm/siginfo.h
index 12f1bce037be..49495b0534ed 100644
--- a/arch/powerpc/include/asm/siginfo.h
+++ b/arch/powerpc/include/asm/siginfo.h
@@ -15,11 +15,6 @@
 
 #include <asm-generic/siginfo.h>
 
-/*
- * SIGTRAP si_codes
- */
-#define TRAP_BRANCH	(__SI_FAULT|3)	/* process taken branch trap */
-#define TRAP_HWBKPT	(__SI_FAULT|4)	/* hardware breakpoint or watchpoint */
 #undef NSIGTRAP
 #define NSIGTRAP	4
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 9e43a48ad6e0..bf45cdf1aaca 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1358,7 +1358,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 #endif
 }
 
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+					 int error_code, int si_code)
 {
 	struct siginfo info;
 
@@ -1367,7 +1368,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 
 	memset(&info, 0, sizeof(info));
 	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_BRKPT;
+	info.si_code = si_code;
 
 	/* User-mode ip? */
 	info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
@@ -1454,5 +1455,5 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 	 */
 	if (test_thread_flag(TIF_SINGLESTEP) &&
 	    tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
-		send_sigtrap(current, regs, 0);
+		send_sigtrap(current, regs, 0, TRAP_BRKPT);
 }
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index da5a5964fccb..0429c5de5ea9 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -891,6 +891,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk = current;
 	unsigned int condition;
+	int si_code;
 
 	trace_hardirqs_fixup();
 
@@ -935,8 +936,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
 			goto clear_TF_reenable;
 	}
 
+	si_code = get_si_code((unsigned long)condition);
 	/* Ok, finally something we can handle */
-	send_sigtrap(tsk, regs, error_code);
+	send_sigtrap(tsk, regs, error_code, si_code);
 
 	/*
 	 * Disable additional traps. They'll be re-enabled when
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 56d6f1147785..011d8e1fac6e 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -941,7 +941,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs *regs,
 	tsk->thread.error_code = error_code;
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
-	info.si_code = TRAP_BRKPT;
+	info.si_code = get_si_code(condition);
 	info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 	force_sig_info(SIGTRAP, &info, tsk);
 
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 8786e01e0db8..969570167e9e 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -199,6 +199,8 @@ typedef struct siginfo {
  */
 #define TRAP_BRKPT	(__SI_FAULT|1)	/* process breakpoint */
 #define TRAP_TRACE	(__SI_FAULT|2)	/* process trace trap */
+#define TRAP_BRANCH     (__SI_FAULT|3)  /* process taken branch trap */
+#define TRAP_HWBKPT     (__SI_FAULT|4)  /* hardware breakpoint/watchpoint */
 #define NSIGTRAP	2
 
 /*
diff --git a/include/asm-parisc/siginfo.h b/include/asm-parisc/siginfo.h
index d4909f55fe35..d7034728f377 100644
--- a/include/asm-parisc/siginfo.h
+++ b/include/asm-parisc/siginfo.h
@@ -3,11 +3,6 @@
 
 #include <asm-generic/siginfo.h>
 
-/*
- * SIGTRAP si_codes
- */
-#define TRAP_BRANCH	(__SI_FAULT|3)	/* process taken branch trap */
-#define TRAP_HWBKPT	(__SI_FAULT|4)	/* hardware breakpoint or watchpoint */
 #undef NSIGTRAP
 #define NSIGTRAP	4
 
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index fad807769910..c2f368273079 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -143,7 +143,7 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
 #ifdef CONFIG_X86_32
 extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
-			 int error_code);
+			 int error_code, int si_code);
 #endif
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h
index 2ccebc6fb0b0..4b1e90409251 100644
--- a/include/asm-x86/traps.h
+++ b/include/asm-x86/traps.h
@@ -36,6 +36,16 @@ void do_invalid_op(struct pt_regs *, long);
 void do_general_protection(struct pt_regs *, long);
 void do_nmi(struct pt_regs *, long);
 
+static inline int get_si_code(unsigned long condition)
+{
+	if (condition & DR_STEP)
+		return TRAP_TRACE;
+	else if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))
+		return TRAP_HWBKPT;
+	else
+		return TRAP_BRKPT;
+}
+
 extern int panic_on_unrecovered_nmi;
 extern int kstack_depth_to_print;
 

From b6cffde1a20409f9720d5c9aba28d3efd3a4f04e Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Wed, 17 Sep 2008 15:05:52 +0200
Subject: [PATCH 104/142] x86, microcode rework, v2, renaming

Renaming based on patch from Dmitry Adamushko.

Made code more readable by renaming define and variables related
to microcode _container_file_ header to make it distinguishable from
microcode _patch_ header.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 03ea4e52e87a..62058e9c8b4e 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -304,12 +304,12 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
 static int install_equiv_cpu_table(u8 *buf,
 		int (*get_ucode_data)(void *, const void *, size_t))
 {
-#define UCODE_HEADER_SIZE	12
-	u8 *hdr[UCODE_HEADER_SIZE];
-	unsigned int *buf_pos = (unsigned int *)hdr;
+#define UCODE_CONTAINER_HEADER_SIZE	12
+	u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
+	unsigned int *buf_pos = (unsigned int *)container_hdr;
 	unsigned long size;
 
-	if (get_ucode_data(&hdr, buf, UCODE_HEADER_SIZE))
+	if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
 		return 0;
 
 	size = buf_pos[2];
@@ -326,14 +326,14 @@ static int install_equiv_cpu_table(u8 *buf,
 		return 0;
 	}
 
-	buf += UCODE_HEADER_SIZE;
+	buf += UCODE_CONTAINER_HEADER_SIZE;
 	if (get_ucode_data(equiv_cpu_table, buf, size)) {
 		vfree(equiv_cpu_table);
 		return 0;
 	}
 
-	return size + UCODE_HEADER_SIZE; /* add header length */
-#undef UCODE_HEADER_SIZE
+	return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
+#undef UCODE_CONTAINER_HEADER_SIZE
 }
 
 static void free_equiv_cpu_table(void)

From d4738792fb86600b6cb7220459d9c47e819b3580 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Wed, 17 Sep 2008 15:39:18 +0200
Subject: [PATCH 105/142] x86, microcode rework, v2, renaming cont.

Renaming based on patch from Dmitry Adamushko.

Further clarification by renaming define and variable related to
microcode container file.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 62058e9c8b4e..829415208ff8 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -263,21 +263,20 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
 			unsigned int *mc_size)
 {
 	unsigned int total_size;
-#define UCODE_UNKNOWN_HDR	8
-	u8 hdr[UCODE_UNKNOWN_HDR];
+#define UCODE_CONTAINER_SECTION_HDR	8
+	u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
 	void *mc;
 
-	if (get_ucode_data(hdr, buf, UCODE_UNKNOWN_HDR))
+	if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
 		return NULL;
 
-	if (hdr[0] != UCODE_UCODE_TYPE) {
+	if (section_hdr[0] != UCODE_UCODE_TYPE) {
 		printk(KERN_ERR "microcode: error! "
 		       "Wrong microcode payload type field\n");
 		return NULL;
 	}
 
-	/* FIXME! dimm: Why not by means of get_totalsize(hdr)? */
-	total_size = (unsigned long) (hdr[4] + (hdr[5] << 8));
+	total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
 
 	printk(KERN_INFO "microcode: size %u, total_size %u\n",
 		size, total_size);
@@ -290,13 +289,13 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
 	mc = vmalloc(UCODE_MAX_SIZE);
 	if (mc) {
 		memset(mc, 0, UCODE_MAX_SIZE);
-		if (get_ucode_data(mc, buf + UCODE_UNKNOWN_HDR, total_size)) {
+		if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) {
 			vfree(mc);
 			mc = NULL;
 		} else
-			*mc_size = total_size + UCODE_UNKNOWN_HDR;
+			*mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
 	}
-#undef UCODE_UNKNOWN_HDR
+#undef UCODE_CONTAINER_SECTION_HDR
 	return mc;
 }
 

From e8d3f455de4f42d4bab2f6f1aeb2cf3bd18eb508 Mon Sep 17 00:00:00 2001
From: Srinivasa Ds <srinivasa@in.ibm.com>
Date: Tue, 23 Sep 2008 15:23:52 +0530
Subject: [PATCH 106/142] signals: demultiplexing SIGTRAP signal, fix

fix build breakage, missing header file.

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/traps.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h
index 4b1e90409251..7a692baa51ae 100644
--- a/include/asm-x86/traps.h
+++ b/include/asm-x86/traps.h
@@ -1,6 +1,8 @@
 #ifndef ASM_X86__TRAPS_H
 #define ASM_X86__TRAPS_H
 
+#include <asm/debugreg.h>
+
 /* Common in X86_32 and X86_64 */
 asmlinkage void divide_error(void);
 asmlinkage void debug(void);

From 5fd933303bd1efacbd0acbe452ba9b889440eb40 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 23 Sep 2008 17:19:44 -0700
Subject: [PATCH 107/142] x86: signal: cosmetic unification of do_signal()

Make do_signal() same.
Thia patch modifies only comments in signal_64.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index bf77d4789a2d..5a5fbc3b1eea 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -410,7 +410,8 @@ static void do_signal(struct pt_regs *regs)
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
-		/* Re-enable any watchpoints before delivering the
+		/*
+		 * Re-enable any watchpoints before delivering the
 		 * signal to user space. The processor register will
 		 * have been cleared if the watchpoint triggered
 		 * inside the kernel.
@@ -418,7 +419,7 @@ static void do_signal(struct pt_regs *regs)
 		if (current->thread.debugreg7)
 			set_debugreg(current->thread.debugreg7, 7);
 
-		/* Whee!  Actually deliver the signal.  */
+		/* Whee! Actually deliver the signal.  */
 		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
@@ -441,6 +442,7 @@ static void do_signal(struct pt_regs *regs)
 			regs->ax = regs->orig_ax;
 			regs->ip -= 2;
 			break;
+
 		case -ERESTART_RESTARTBLOCK:
 			regs->ax = NR_restart_syscall;
 			regs->ip -= 2;

From ee847c54ba7fc09f85f13a5bf18f45ea6c19aa83 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 23 Sep 2008 17:21:45 -0700
Subject: [PATCH 108/142] x86: signal: cosmetic unification of
 do_notify_resume()

Make do_notify_resume() same.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c |  8 ++++++++
 arch/x86/kernel/signal_64.c | 16 ++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index da3cf3270f83..b94463f264b4 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -663,6 +663,12 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+	/* notify userspace of pending MCEs */
+	if (thread_info_flags & _TIF_MCE_NOTIFY)
+		mce_notify_user();
+#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
@@ -672,7 +678,9 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 		tracehook_notify_resume(regs);
 	}
 
+#ifdef CONFIG_X86_32
 	clear_thread_flag(TIF_IRET);
+#endif /* CONFIG_X86_32 */
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 5a5fbc3b1eea..9087752f4109 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -460,14 +460,18 @@ static void do_signal(struct pt_regs *regs)
 	}
 }
 
-void do_notify_resume(struct pt_regs *regs, void *unused,
-		      __u32 thread_info_flags)
+/*
+ * notification of userspace execution resumption
+ * - triggered by the TIF_WORK_MASK flags
+ */
+void
+do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-#ifdef CONFIG_X86_MCE
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
 		mce_notify_user();
-#endif /* CONFIG_X86_MCE */
+#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
 
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
@@ -477,6 +481,10 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
 	}
+
+#ifdef CONFIG_X86_32
+	clear_thread_flag(TIF_IRET);
+#endif /* CONFIG_X86_32 */
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)

From 86d3237cd1a09136b4fb3a1d73d3c3fd6331cb14 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 23 Sep 2008 17:22:32 -0700
Subject: [PATCH 109/142] x86: signal: cosmetic unification of handle_signal()

Make handle_signal() same.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 9 +++++++++
 arch/x86/kernel/signal_64.c | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index b94463f264b4..bb05917f232c 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -550,6 +550,15 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_X86_64
+	/*
+	 * This has nothing to do with segment registers,
+	 * despite the name.  This magic affects uaccess.h
+	 * macros' behavior.  Reset it to the normal setting.
+	 */
+	set_fs(USER_DS);
+#endif
+
 	/*
 	 * Clear the direction flag as per the ABI for function entry.
 	 */
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 9087752f4109..963236f2c3c1 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -346,12 +346,14 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_X86_64
 	/*
 	 * This has nothing to do with segment registers,
 	 * despite the name.  This magic affects uaccess.h
 	 * macros' behavior.  Reset it to the normal setting.
 	 */
 	set_fs(USER_DS);
+#endif
 
 	/*
 	 * Clear the direction flag as per the ABI for function entry.

From 2f9284e4e3be7b0b4d8d0638f9805603069a762d Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Tue, 23 Sep 2008 22:56:35 +0200
Subject: [PATCH 110/142] x86, microcode_amd: cleanup, mark
 request_microcode_user() as unsupported

(1) mark mc_size in generic_load_microcode() as unitialized_var to avoid
     gcc's (false) warning;

(2) mark request_microcode_user() as unsupported. The required changes
    can be added later. Note, we don't break any user-space interfaces
    here, as there were no kernels with support for AMD-specific ucode
    update yet. The ucode has to be updated via 'firmware'.

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 36 ++++-----------------------------
 1 file changed, 4 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 829415208ff8..7a1f8eeac2c7 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -120,29 +120,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 	unsigned int equiv_cpu_id = 0x00;
 	unsigned int i = 0;
 
-	/*
-	 * FIXME! dimm: do we need this? Why an update via /dev/... is different
-	 * from the one via firmware?
-	 *
-	 * This is a tricky part. We might be called from a write operation
-	 * to the device file instead of the usual process of firmware
-	 * loading. This routine needs to be able to distinguish both
-	 * cases. This is done by checking if there alread is a equivalent
-	 * CPU table installed. If not, we're written through
-	 * /dev/cpu/microcode.
-	 * Since we ignore all checks. The error case in which going through
-	 * firmware loading and that table is not loaded has already been
-	 * checked earlier.
-	 */
 	BUG_ON(equiv_cpu_table == NULL);
-#if 0
-	if (equiv_cpu_table == NULL) {
-		printk(KERN_INFO "microcode: CPU%d microcode update with "
-		       "version 0x%x (current=0x%x)\n",
-		       cpu, mc_header->patch_id, uci->cpu_sig.rev);
-		goto out;
-	}
-#endif
 	current_cpu_id = cpuid_eax(0x00000001);
 
 	while (equiv_cpu_table[i].installed_cpu != 0) {
@@ -362,7 +340,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 	leftover = size - offset;
 
 	while (leftover) {
-		unsigned int mc_size;
+		unsigned int uninitialized_var(mc_size);
 		struct microcode_header_amd *mc_header;
 
 		mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size);
@@ -428,17 +406,11 @@ static int request_microcode_fw(int cpu, struct device *device)
 	return ret;
 }
 
-static int get_ucode_user(void *to, const void *from, size_t n)
-{
-	return copy_from_user(to, from, n);
-}
-
 static int request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
-
-	return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
+	printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode"
+			"is not supported\n");
+	return -1;
 }
 
 static void microcode_fini_cpu_amd(int cpu)

From 82b078659ed04e1ecdebf8326e189cf76ed361af Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Wed, 24 Sep 2008 11:50:35 +0200
Subject: [PATCH 111/142] x86: microcode patch loader bugfix

Corrected CPU vendor check condition for AMD microcode patch loader
initialization.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index ff031dbccdf6..8db2eb55e9e2 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -450,7 +450,7 @@ static int __init microcode_init(void)
 
 	if (c->x86_vendor == X86_VENDOR_INTEL)
 		microcode_ops = init_intel_microcode();
-	else if (c->x86_vendor != X86_VENDOR_AMD)
+	else if (c->x86_vendor == X86_VENDOR_AMD)
 		microcode_ops = init_amd_microcode();
 
 	if (!microcode_ops) {

From 8d8c13bdb53977319593d9efc4971a4cacc8bd03 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 24 Sep 2008 19:10:29 -0700
Subject: [PATCH 112/142] x86: signal_32.c: introduce signr_convert()

Introduce signr_convert().
This function will help unification of setup_rt_frame().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index bb05917f232c..b1bc90f19b9a 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -482,18 +482,21 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 /*
  * OK, we're invoking a handler:
  */
+static int signr_convert(int sig)
+{
+	struct thread_info *info = current_thread_info();
+
+	if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
+		return info->exec_domain->signal_invmap[sig];
+	return sig;
+}
+
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	       sigset_t *set, struct pt_regs *regs)
 {
+	int usig = signr_convert(sig);
 	int ret;
-	int usig;
-
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
 
 	/* Set up the stack frame */
 	if (ka->sa.sa_flags & SA_SIGINFO)

From b94fd69827b2104a2a4d9d0bc05a8e908a937ae8 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 24 Sep 2008 19:12:54 -0700
Subject: [PATCH 113/142] x86: signal_64.c: introduce helper function
 signr_convert()

This helper function is for unification of setup_rt_frame().
No effect in binary.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_64.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 963236f2c3c1..c5d80024a8f3 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -281,18 +281,24 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 /*
  * OK, we're invoking a handler
  */
+static int signr_convert(int sig)
+{
+	return sig;
+}
+
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	       sigset_t *set, struct pt_regs *regs)
 {
+	int usig = signr_convert(sig);
 	int ret;
 
 #ifdef CONFIG_IA32_EMULATION
 	if (test_thread_flag(TIF_IA32)) {
 		if (ka->sa.sa_flags & SA_SIGINFO)
-			ret = ia32_setup_rt_frame(sig, ka, info, set, regs);
+			ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
 		else
-			ret = ia32_setup_frame(sig, ka, set, regs);
+			ret = ia32_setup_frame(usig, ka, set, regs);
 	} else
 #endif
 	ret = __setup_rt_frame(sig, ka, info, set, regs);

From 455edbc423db282bb64dec2d7c8968498ea8e619 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 24 Sep 2008 19:13:11 -0700
Subject: [PATCH 114/142] x86: signal: introduce helper macro is_ia32

Introduce new macro is_ia32 for unification of setup_rt_frame().
No effect in binary, compiler will optimize.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 13 +++++++++----
 arch/x86/kernel/signal_64.c | 13 +++++++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index b1bc90f19b9a..cf62f70cc2a6 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -491,6 +491,8 @@ static int signr_convert(int sig)
 	return sig;
 }
 
+#define is_ia32	1
+
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	       sigset_t *set, struct pt_regs *regs)
@@ -499,10 +501,13 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	int ret;
 
 	/* Set up the stack frame */
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		ret = __setup_rt_frame(usig, ka, info, set, regs);
-	else
-		ret = __setup_frame(usig, ka, set, regs);
+	if (is_ia32) {
+		if (ka->sa.sa_flags & SA_SIGINFO)
+			ret = __setup_rt_frame(usig, ka, info, set, regs);
+		else
+			ret = __setup_frame(usig, ka, set, regs);
+	} else
+		ret = __setup_rt_frame(sig, ka, info, set, regs);
 
 	if (ret) {
 		force_sigsegv(sig, current);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index c5d80024a8f3..53f86d95985b 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -286,6 +286,12 @@ static int signr_convert(int sig)
 	return sig;
 }
 
+#ifdef CONFIG_IA32_EMULATION
+#define is_ia32	test_thread_flag(TIF_IA32)
+#else
+#define is_ia32	0
+#endif
+
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	       sigset_t *set, struct pt_regs *regs)
@@ -293,15 +299,14 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	int usig = signr_convert(sig);
 	int ret;
 
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32)) {
+	/* Set up the stack frame */
+	if (is_ia32) {
 		if (ka->sa.sa_flags & SA_SIGINFO)
 			ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
 		else
 			ret = ia32_setup_frame(usig, ka, set, regs);
 	} else
-#endif
-	ret = __setup_rt_frame(sig, ka, info, set, regs);
+		ret = __setup_rt_frame(sig, ka, info, set, regs);
 
 	if (ret) {
 		force_sigsegv(sig, current);

From 4694d2391221e14fe671602fe34ea7f24f5a561f Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 24 Sep 2008 19:13:29 -0700
Subject: [PATCH 115/142] x86: signal_32.c: introduce macro ia32_setup_frame
 and ia32_setup_rt_frame

Make 32-bit setup_rt_frame() look like 64-bit version for unification.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index cf62f70cc2a6..4337cd510f0a 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -492,6 +492,8 @@ static int signr_convert(int sig)
 }
 
 #define is_ia32	1
+#define ia32_setup_frame	__setup_frame
+#define ia32_setup_rt_frame	__setup_rt_frame
 
 static int
 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -503,9 +505,9 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	/* Set up the stack frame */
 	if (is_ia32) {
 		if (ka->sa.sa_flags & SA_SIGINFO)
-			ret = __setup_rt_frame(usig, ka, info, set, regs);
+			ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
 		else
-			ret = __setup_frame(usig, ka, set, regs);
+			ret = ia32_setup_frame(usig, ka, set, regs);
 	} else
 		ret = __setup_rt_frame(sig, ka, info, set, regs);
 

From 08115ab4d98cb577a83971ebd57cdfbcc6f50b68 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert@redhat.com>
Date: Mon, 29 Sep 2008 18:24:23 -0400
Subject: [PATCH 116/142] xen: make CONFIG_XEN_SAVE_RESTORE depend on
 CONFIG_XEN

Xen options need to depend on XEN.
Also, add newline at end of file.

Without this patch you need to disable CONFIG_PM in order to
disable CPU hotplugging.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Acked-by Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index d3e68465ace9..87b9ab166423 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -26,7 +26,7 @@ config XEN_MAX_DOMAIN_MEMORY
 
 config XEN_SAVE_RESTORE
        bool
-       depends on PM
+       depends on XEN && PM
        default y
 
 config XEN_DEBUG_FS
@@ -35,4 +35,4 @@ config XEN_DEBUG_FS
 	default n
 	help
 	  Enable statistics output and various tuning options in debugfs.
-	  Enabling this option may incur a significant performance overhead.
\ No newline at end of file
+	  Enabling this option may incur a significant performance overhead.

From bff0aa4b8f8a1c8492e99c522d044bb98a6ec098 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 30 Sep 2008 20:28:20 -0700
Subject: [PATCH 117/142] x86: ia32_signal.c: remove unnecessary cast to u32

__put_user() looks type of the 2nd parameter, so casting the 1st parameter
is not necessary.

   text	   data	    bss	    dec	    hex	filename
   6227	      0	      8	   6235	   185b	ia32_signal.o.new
   6227	      0	      8	   6235	   185b	ia32_signal.o.old

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e47bed2440ee..690a480c68c5 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -351,21 +351,21 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	savesegment(es, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
-	err |= __put_user((u32)regs->di, &sc->di);
-	err |= __put_user((u32)regs->si, &sc->si);
-	err |= __put_user((u32)regs->bp, &sc->bp);
-	err |= __put_user((u32)regs->sp, &sc->sp);
-	err |= __put_user((u32)regs->bx, &sc->bx);
-	err |= __put_user((u32)regs->dx, &sc->dx);
-	err |= __put_user((u32)regs->cx, &sc->cx);
-	err |= __put_user((u32)regs->ax, &sc->ax);
-	err |= __put_user((u32)regs->cs, &sc->cs);
-	err |= __put_user((u32)regs->ss, &sc->ss);
+	err |= __put_user(regs->di, &sc->di);
+	err |= __put_user(regs->si, &sc->si);
+	err |= __put_user(regs->bp, &sc->bp);
+	err |= __put_user(regs->sp, &sc->sp);
+	err |= __put_user(regs->bx, &sc->bx);
+	err |= __put_user(regs->dx, &sc->dx);
+	err |= __put_user(regs->cx, &sc->cx);
+	err |= __put_user(regs->ax, &sc->ax);
+	err |= __put_user(regs->cs, &sc->cs);
+	err |= __put_user(regs->ss, &sc->ss);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user((u32)regs->ip, &sc->ip);
-	err |= __put_user((u32)regs->flags, &sc->flags);
-	err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
+	err |= __put_user(regs->ip, &sc->ip);
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->sp, &sc->sp_at_signal);
 
 	tmp = save_i387_xstate_ia32(fpstate);
 	if (tmp < 0)

From 7b9cee16ffb495558c1e3ada55cba906e520006e Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 30 Sep 2008 20:30:51 -0700
Subject: [PATCH 118/142] x86: ia32_signal.c remove unnecessary function calls

the below 2 functions are called in save_i387_xstate_ia32()
- clear_used_math();
- stts();

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 690a480c68c5..4bc02b23674b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -370,12 +370,9 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	tmp = save_i387_xstate_ia32(fpstate);
 	if (tmp < 0)
 		err = -EFAULT;
-	else {
-		clear_used_math();
-		stts();
+	else
 		err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
 					&sc->fpstate);
-	}
 
 	/* non-iBCS2 extensions.. */
 	err |= __put_user(mask, &sc->oldmask);

From fd1452ebf257317f24e0e285a17a2ec2ce3e6df7 Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Thu, 2 Oct 2008 16:56:19 +0300
Subject: [PATCH 119/142] x86/microcode: fix sleeping function called from
 invalid context at kernel/mutex.c

Fix the following "sleeping function called from invalid context" bug:

...
__might_sleep
mutex_lock_nested
microcode_update_cpu
mc_sysdev_resume
__sysdev_resume
sysdev_resume
device_power_up
...

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_core.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 8db2eb55e9e2..936d8d55f230 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -324,10 +324,6 @@ void microcode_update_cpu(int cpu)
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	int err = 0;
 
-	/* We should bind the task to the CPU */
-	BUG_ON(raw_smp_processor_id() != cpu);
-
-	mutex_lock(&microcode_mutex);
 	/*
 	 * Check if the system resume is in progress (uci->valid != NULL),
 	 * otherwise just request a firmware:
@@ -340,11 +336,8 @@ void microcode_update_cpu(int cpu)
 			err = microcode_ops->request_microcode_fw(cpu,
 					&microcode_pdev->dev);
 	}
-
 	if (!err)
 		microcode_ops->apply_microcode(cpu);
-
-	mutex_unlock(&microcode_mutex);
 }
 
 static void microcode_init_cpu(int cpu)
@@ -352,7 +345,13 @@ static void microcode_init_cpu(int cpu)
 	cpumask_t old = current->cpus_allowed;
 
 	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	/* We should bind the task to the CPU */
+	BUG_ON(raw_smp_processor_id() != cpu);
+
+	mutex_lock(&microcode_mutex);
 	microcode_update_cpu(cpu);
+	mutex_unlock(&microcode_mutex);
+
 	set_cpus_allowed_ptr(current, &old);
 }
 

From db053b86f4b1ec790da2dafe2acb93be76288bb9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 2 Oct 2008 16:41:31 -0700
Subject: [PATCH 120/142] xen: clean up x86-64 warnings

There are a couple of Xen features which rely on directly accessing
per-cpu data via a segment register, which is not yet available on
x86-64.  In the meantime, just disable direct access to the vcpu info
structure; this leaves some of the code as dead, but it will come to
life in time, and the warnings are suppressed.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c  | 61 +++++++--------------------------------
 arch/x86/xen/xen-asm_64.S | 20 +++++++++++--
 2 files changed, 27 insertions(+), 54 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8ca2f88bde1e..85692c9f6496 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -112,7 +112,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
  *
  * 0: not available, 1: available
  */
-static int have_vcpu_info_placement = 1;
+static int have_vcpu_info_placement =
+#ifdef CONFIG_X86_32
+	1
+#else
+	0
+#endif
+	;
+
 
 static void xen_vcpu_setup(int cpu)
 {
@@ -941,6 +948,7 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
 }
 #endif
 
+#ifdef CONFIG_X86_32
 static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 {
 	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
@@ -959,6 +967,7 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 
 	xen_set_pte(ptep, pte);
 }
+#endif
 
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
@@ -1025,7 +1034,6 @@ void xen_setup_vcpu_info_placement(void)
 
 	/* xen_vcpu_setup managed to place the vcpu_info within the
 	   percpu area for all cpus, so make use of it */
-#ifdef CONFIG_X86_32
 	if (have_vcpu_info_placement) {
 		printk(KERN_INFO "Xen: using vcpu_info placement\n");
 
@@ -1035,7 +1043,6 @@ void xen_setup_vcpu_info_placement(void)
 		pv_irq_ops.irq_enable = xen_irq_enable_direct;
 		pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
 	}
-#endif
 }
 
 static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@@ -1056,12 +1063,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 	goto patch_site
 
 	switch (type) {
-#ifdef CONFIG_X86_32
 		SITE(pv_irq_ops, irq_enable);
 		SITE(pv_irq_ops, irq_disable);
 		SITE(pv_irq_ops, save_fl);
 		SITE(pv_irq_ops, restore_fl);
-#endif /* CONFIG_X86_32 */
 #undef SITE
 
 	patch_site:
@@ -1399,48 +1404,11 @@ static void *m2v(phys_addr_t maddr)
 	return __ka(m2p(maddr));
 }
 
-#ifdef CONFIG_X86_64
-static void walk(pgd_t *pgd, unsigned long addr)
-{
-	unsigned l4idx = pgd_index(addr);
-	unsigned l3idx = pud_index(addr);
-	unsigned l2idx = pmd_index(addr);
-	unsigned l1idx = pte_index(addr);
-	pgd_t l4;
-	pud_t l3;
-	pmd_t l2;
-	pte_t l1;
-
-	xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
-		       pgd, addr, l4idx, l3idx, l2idx, l1idx);
-
-	l4 = pgd[l4idx];
-	xen_raw_printk("  l4: %016lx\n", l4.pgd);
-	xen_raw_printk("      %016lx\n", pgd_val(l4));
-
-	l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
-	xen_raw_printk("  l3: %016lx\n", l3.pud);
-	xen_raw_printk("      %016lx\n", pud_val(l3));
-
-	l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
-	xen_raw_printk("  l2: %016lx\n", l2.pmd);
-	xen_raw_printk("      %016lx\n", pmd_val(l2));
-
-	l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
-	xen_raw_printk("  l1: %016lx\n", l1.pte);
-	xen_raw_printk("      %016lx\n", pte_val(l1));
-}
-#endif
-
 static void set_page_prot(void *addr, pgprot_t prot)
 {
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
 
-	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
-		       addr, pfn, get_phys_to_machine(pfn),
-		       pgprot_val(prot), pte.pte);
-
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
 }
@@ -1698,15 +1666,6 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_raw_console_write("about to get started...\n");
 
-#if 0
-	xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
-		       &boot_params, __pa_symbol(&boot_params),
-		       __va(__pa_symbol(&boot_params)));
-
-	walk(pgd, &boot_params);
-	walk(pgd, __va(__pa(&boot_params)));
-#endif
-
 	/* Start the world */
 #ifdef CONFIG_X86_32
 	i386_start_kernel();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 3b9bda46487a..05794c566e87 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -26,8 +26,15 @@
 /* Pseudo-flag used for virtual NMI, which we don't implement yet */
 #define XEN_EFLAGS_NMI	0x80000000
 
-#if 0
-#include <asm/percpu.h>
+#if 1
+/*
+	x86-64 does not yet support direct access to percpu variables
+	via a segment override, so we just need to make sure this code
+	never gets used
+ */
+#define BUG			ud2a
+#define PER_CPU_VAR(var, off)	0xdeadbeef
+#endif
 
 /*
 	Enable events.  This clears the event mask and tests the pending
@@ -35,6 +42,8 @@
 	events, then enter the hypervisor to get them handled.
  */
 ENTRY(xen_irq_enable_direct)
+	BUG
+
 	/* Unmask events */
 	movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
 
@@ -58,6 +67,8 @@ ENDPATCH(xen_irq_enable_direct)
 	non-zero.
  */
 ENTRY(xen_irq_disable_direct)
+	BUG
+
 	movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
 ENDPATCH(xen_irq_disable_direct)
 	ret
@@ -74,6 +85,8 @@ ENDPATCH(xen_irq_disable_direct)
 	Xen and x86 use opposite senses (mask vs enable).
  */
 ENTRY(xen_save_fl_direct)
+	BUG
+
 	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
 	setz %ah
 	addb %ah,%ah
@@ -91,6 +104,8 @@ ENDPATCH(xen_save_fl_direct)
 	if so.
  */
 ENTRY(xen_restore_fl_direct)
+	BUG
+
 	testb $X86_EFLAGS_IF>>8, %ah
 	setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
 	/* Preempt here doesn't matter because that will deal with
@@ -133,7 +148,6 @@ check_events:
 	pop %rcx
 	pop %rax
 	ret
-#endif
 
 ENTRY(xen_adjust_exception_frame)
 	mov 8+0(%rsp),%rcx

From d19c8e516e0a17e049bcfbe96f86e040254ddf14 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 2 Oct 2008 16:42:35 -0700
Subject: [PATCH 121/142] xen: remove unused balloon.h

The balloon driver doesn't have any externally callable functions at
the moment, so remove the (effectively empty) header.  We can add it
back if we need to.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/xen/balloon.c |  1 -
 include/xen/balloon.h | 61 -------------------------------------------
 2 files changed, 62 deletions(-)
 delete mode 100644 include/xen/balloon.h

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a51f3e17a5fd..8c83abc73400 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -53,7 +53,6 @@
 #include <asm/tlb.h>
 
 #include <xen/interface/memory.h>
-#include <xen/balloon.h>
 #include <xen/xenbus.h>
 #include <xen/features.h>
 #include <xen/page.h>
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
deleted file mode 100644
index fe43b0f3c86a..000000000000
--- a/include/xen/balloon.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/******************************************************************************
- * balloon.h
- *
- * Xen balloon driver - enables returning/claiming memory to/from Xen.
- *
- * Copyright (c) 2003, B Dragovic
- * Copyright (c) 2003-2004, M Williamson, K Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __XEN_BALLOON_H__
-#define __XEN_BALLOON_H__
-
-#include <linux/spinlock.h>
-
-#if 0
-/*
- * Inform the balloon driver that it should allow some slop for device-driver
- * memory activities.
- */
-void balloon_update_driver_allowance(long delta);
-
-/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
-struct page **alloc_empty_pages_and_pagevec(int nr_pages);
-void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
-
-void balloon_release_driver_page(struct page *page);
-
-/*
- * Prevent the balloon driver from changing the memory reservation during
- * a driver critical region.
- */
-extern spinlock_t balloon_lock;
-#define balloon_lock(__flags)   spin_lock_irqsave(&balloon_lock, __flags)
-#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
-#endif
-
-#endif /* __XEN_BALLOON_H__ */

From a2e8d3dcfd420177aaa0c53aca60a869bad75f4b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 2 Oct 2008 22:09:20 -0700
Subject: [PATCH 122/142] x86: signal: move macros out from
 restore_sigcontext()

move macros, COPY, COPY_SEG*, GET_SEG, out from restore_sigcontext().
x86_64: introduce COPY_SEG_STRICT for cs.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 38 ++++++++++++++++++++-----------------
 arch/x86/kernel/signal_64.c | 18 +++++++++++-------
 2 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 4337cd510f0a..545448b7aeba 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -113,6 +113,27 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
 
+#define COPY(x)			{		\
+	err |= __get_user(regs->x, &sc->x);	\
+}
+
+#define COPY_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp;			\
+}
+
+#define COPY_SEG_STRICT(seg)	{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp | 3;			\
+}
+
+#define GET_SEG(seg)		{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		loadsegment(seg, tmp);			\
+}
 
 /*
  * Do a signal return; undo the signal stack.
@@ -126,23 +147,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
-
-#define COPY_SEG(seg)							\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
-	  regs->seg = tmp; }
-
-#define COPY_SEG_STRICT(seg)						\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
-	  regs->seg = tmp|3; }
-
-#define GET_SEG(seg)							\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
-	  loadsegment(seg, tmp); }
-
 	GET_SEG(gs);
 	COPY_SEG(fs);
 	COPY_SEG(es);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 53f86d95985b..feff4a91d095 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -52,6 +52,16 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
 
+#define COPY(x)			{		\
+	err |= __get_user(regs->x, &sc->x);	\
+}
+
+#define COPY_SEG_STRICT(seg)	{			\
+		unsigned short tmp;			\
+		err |= __get_user(tmp, &sc->seg);	\
+		regs->seg = tmp | 3;			\
+}
+
 /*
  * Do a signal return; undo the signal stack.
  */
@@ -64,8 +74,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		(err |= __get_user(regs->x, &sc->x))
-
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
 	COPY(r8);
@@ -80,11 +88,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	/* Kernel saves and restores only the CS segment register on signals,
 	 * which is the bare minimum needed to allow mixed 32/64-bit code.
 	 * App's signal handler can save/restore other segments if needed. */
-	{
-		unsigned cs;
-		err |= __get_user(cs, &sc->cs);
-		regs->cs = cs | 3;	/* Force into user mode */
-	}
+	COPY_SEG_STRICT(cs);
 
 	{
 		unsigned int tmpflags;

From 69e13ad56f9e2cd81c4f8bfd6267211c10c14c08 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 2 Oct 2008 22:18:47 -0700
Subject: [PATCH 123/142] x86: signal: remove indent in restore_sigcontext()

remove braces and indent for flags and fpstate in restore_sigcontext().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 21 +++++++--------------
 arch/x86/kernel/signal_64.c | 19 +++++++------------
 2 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 545448b7aeba..d6dd057d0f22 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -142,6 +142,8 @@ static int
 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 		   unsigned long *pax)
 {
+	void __user *buf;
+	unsigned int tmpflags;
 	unsigned int err = 0;
 
 	/* Always make any pending restarted system calls return -EINTR */
@@ -156,21 +158,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	COPY_SEG_STRICT(cs);
 	COPY_SEG_STRICT(ss);
 
-	{
-		unsigned int tmpflags;
+	err |= __get_user(tmpflags, &sc->flags);
+	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+	regs->orig_ax = -1;		/* disable syscall checks */
 
-		err |= __get_user(tmpflags, &sc->flags);
-		regs->flags = (regs->flags & ~FIX_EFLAGS) |
-						(tmpflags & FIX_EFLAGS);
-		regs->orig_ax = -1;		/* disable syscall checks */
-	}
-
-	{
-		void __user *buf;
-
-		err |= __get_user(buf, &sc->fpstate);
-		err |= restore_i387_xstate(buf);
-	}
+	err |= __get_user(buf, &sc->fpstate);
+	err |= restore_i387_xstate(buf);
 
 	err |= __get_user(*pax, &sc->ax);
 	return err;
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index feff4a91d095..a5c9627f4db9 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -69,6 +69,8 @@ static int
 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 		   unsigned long *pax)
 {
+	void __user *buf;
+	unsigned int tmpflags;
 	unsigned int err = 0;
 
 	/* Always make any pending restarted system calls return -EINTR */
@@ -90,19 +92,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	 * App's signal handler can save/restore other segments if needed. */
 	COPY_SEG_STRICT(cs);
 
-	{
-		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->flags);
-		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-		regs->orig_ax = -1;		/* disable syscall checks */
-	}
+	err |= __get_user(tmpflags, &sc->flags);
+	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+	regs->orig_ax = -1;		/* disable syscall checks */
 
-	{
-		void __user *buf;
-
-		err |= __get_user(buf, &sc->fpstate);
-		err |= restore_i387_xstate(buf);
-	}
+	err |= __get_user(buf, &sc->fpstate);
+	err |= restore_i387_xstate(buf);
 
 	err |= __get_user(*pax, &sc->ax);
 	return err;

From 6cdcdb99cf7c2e1835fc5b471864d21161c3e679 Mon Sep 17 00:00:00 2001
From: Andrey Borzenkov <arvidjaar@newmail.ru>
Date: Fri, 3 Oct 2008 21:08:49 +0400
Subject: [PATCH 124/142] x86 setup: fix ghost entries under /sys/firmware/edd
 take 3

Some BIOSes do not indicate error when trying to read from non-
existing device. Zero buffer before reading and check that we
possibly have valid MBR by looking for MBR magic.

This was fixed in different way for edd.S in
http://marc.info/?l=linux-kernel&m=114087765422490&w=2, but lost
again when edd.S was rewritten in C.

Signed-off-by: Andrey Borzenkov < arvidjaar@mail.ru>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/edd.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index bf4ae6ff518e..067e28cd3c5f 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -43,6 +43,7 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
 	char *mbrbuf_ptr, *mbrbuf_end;
 	u32 buf_base, mbr_base;
 	extern char _end[];
+	u16 mbr_magic;
 
 	sector_size = ei->params.bytes_per_sector;
 	if (!sector_size)
@@ -60,11 +61,15 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
 	if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
 		return -1;
 
+	memset(mbrbuf_ptr, 0, sector_size);
 	if (read_mbr(devno, mbrbuf_ptr))
 		return -1;
 
 	*mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
-	return 0;
+	mbr_magic = *(u16 *)&mbrbuf_ptr[510];
+
+	/* check for valid MBR magic */
+	return mbr_magic == 0xAA55 ? 0 : -1;
 }
 
 static int get_edd_info(u8 devno, struct edd_info *ei)

From 98920dc3d1113b883cbc73e3293446d3525c6042 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 3 Oct 2008 10:22:33 -0700
Subject: [PATCH 125/142] Revert "x86: fix ghost EDD devices in /sys again"

This reverts commit 464f04c9e9b3b1c4f5ffb89c51d8ba2a2034c846.
Obsoleted by commit 6cdcdb99cf7c2e1835fc5b471864d21161c3e679.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/edd.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 067e28cd3c5f..1aae8f3e5ca1 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -32,9 +32,7 @@ static int read_mbr(u8 devno, void *buf)
 		     : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
 		     : : "esi", "edi", "memory");
 
-	/* Some BIOSes do not set carry flag on error but still return
-	 * error in AH. The condition below is expected to catch both */
-	return -!!ax;		/* 0 or -1 */
+	return -(u8)ax;		/* 0 or -1 */
 }
 
 static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)

From ba31a5f88b6f907e715ff43db06403e12465b703 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sat, 4 Oct 2008 21:21:44 +0200
Subject: [PATCH 126/142] x86 setup: remove DEF_INITSEG and DEF_SETUPSEG

Since v.2.6.23 DEF_INITSEG and DEF_SETUPSEG are unused. Commit c397368
("Remove old i386 setup code") dropped their usage for i386. They did not
return in the x86 tree. (Something similar must have happened for x86_64.)
Remove these.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/boot.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h
index 2faed7ecb092..7b287df4ab55 100644
--- a/include/asm-x86/boot.h
+++ b/include/asm-x86/boot.h
@@ -2,9 +2,7 @@
 #define _ASM_BOOT_H
 
 /* Don't touch these, unless you really know what you're doing. */
-#define DEF_INITSEG	0x9000
 #define DEF_SYSSEG	0x1000
-#define DEF_SETUPSEG	0x9020
 #define DEF_SYSSIZE	0x7F00
 
 /* Internal svga startup constants */

From d960c9ce47c5360b39e47c064818e91c89df0e21 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sat, 4 Oct 2008 21:18:51 +0200
Subject: [PATCH 127/142] x86 setup: remove IMAGE_OFFSET

After commit 968de4f ("i386: Relocatable kernel support") IMAGE_OFFSET wasn't
actually used anymore in the (current) X86 build system. Now remove its last
traces.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/Makefile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index cceba1f46365..cd48c7210016 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -72,9 +72,7 @@ KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
 KBUILD_CFLAGS +=   $(call cc-option,-m32)
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 
-$(obj)/zImage:  IMAGE_OFFSET := 0x1000
 $(obj)/zImage:  asflags-y := $(SVGA_MODE) $(RAMDISK)
-$(obj)/bzImage: IMAGE_OFFSET := 0x100000
 $(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
 $(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
 $(obj)/bzImage: BUILDFLAGS   := -b
@@ -117,7 +115,7 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/compressed/vmlinux: FORCE
-	$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
+	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
 
 # Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
 FDARGS =

From 2407390bd20de38740eef87eab4fe3d1deafdbdd Mon Sep 17 00:00:00 2001
From: Michal Januszewski <spock@gentoo.org>
Date: Sun, 5 Oct 2008 12:16:04 +0200
Subject: [PATCH 128/142] x86: replace a magic number with a named constant in
 the VESA boot code

Replace a magic number with a named constant in the VESA boot code.

Signed-off-by: Michal Januszewski <spock@gentoo.org>
Cc: linux-fbdev-devel@lists.sourceforge.net
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/boot/video-vesa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 401ad998ad08..1e6fe0214c85 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -224,7 +224,7 @@ static void vesa_store_pm_info(void)
 static void vesa_store_mode_params_graphics(void)
 {
 	/* Tell the kernel we're in VESA graphics mode */
-	boot_params.screen_info.orig_video_isVGA = 0x23;
+	boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
 
 	/* Mode parameters */
 	boot_params.screen_info.vesa_attributes = vminfo.mode_attr;

From 33fb0e4eb53f16af312f9698f974e2e64af39c12 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 7 Oct 2008 00:11:22 +0200
Subject: [PATCH 129/142] x86: SB450: skip IRQ0 override if it is not routed to
 INT2 of IOAPIC

On some HP nx6... laptops (e.g. nx6325) BIOS reports an IRQ0 override
but the SB450 chipset is configured such that timer interrupts goe to
INT0 of IOAPIC.

Check IRQ0 routing and if it is routed to INT0 of IOAPIC skip the
timer override.

[ This more generic PCI ID based quirk should alleviate the need for
  dmi_ignore_irq0_timer_override DMI quirks. ]

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Tested-by: Dmitry Torokhov <dtor@mail.ru>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/early-quirks.c | 48 ++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 4353cf5e6fac..6b839b147644 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -95,6 +95,52 @@ static void __init nvidia_bugs(int num, int slot, int func)
 
 }
 
+static u32 ati_ixp4x0_rev(int num, int slot, int func)
+{
+	u32 d;
+	u8  b;
+
+	b = read_pci_config_byte(num, slot, func, 0xac);
+	b &= ~(1<<5);
+	write_pci_config_byte(num, slot, func, 0xac, b);
+
+	d = read_pci_config(num, slot, func, 0x70);
+	d |= 1<<8;
+	write_pci_config(num, slot, func, 0x70, d);
+
+	d = read_pci_config(num, slot, func, 0x8);
+	d &= 0xff;
+	return d;
+}
+
+static void __init ati_bugs(int num, int slot, int func)
+{
+#if defined(CONFIG_ACPI) && defined (CONFIG_X86_IO_APIC)
+	u32 d;
+	u8  b;
+
+	if (acpi_use_timer_override)
+		return;
+
+	d = ati_ixp4x0_rev(num, slot, func);
+	if (d  < 0x82)
+		acpi_skip_timer_override = 1;
+	else {
+		/* check for IRQ0 interrupt swap */
+		outb(0x72, 0xcd6); b = inb(0xcd7);
+		if (!(b & 0x2))
+			acpi_skip_timer_override = 1;
+	}
+
+	if (acpi_skip_timer_override) {
+		printk(KERN_INFO "SB4X0 revision 0x%x\n", d);
+		printk(KERN_INFO "Ignoring ACPI timer override.\n");
+		printk(KERN_INFO "If you got timer trouble "
+		       "try acpi_use_timer_override\n");
+	}
+#endif
+}
+
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -114,6 +160,8 @@ static struct chipset early_qrk[] __initdata = {
 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
 	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
 	  PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
+	{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
+	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
 	{}
 };
 

From 8d89adf44cf750e49691ba5b744b2ad77a05e997 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 7 Oct 2008 06:47:52 +0200
Subject: [PATCH 130/142] x86: SB450: deprioritize DMI quirks

This PCI ID based quick should be a full solution for the IRQ0 override
related slowdown problem on SB450 based systems:

  33fb0e4: x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC

Emit a warning in those cases where the DMI quirk triggers but
the PCI ID based quirk didnt.

If this warning does not trigger then we can phase out the DMI quirks.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c102af85df9c..096102d9b24e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1421,8 +1421,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
  */
 static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
 {
-	pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
-	acpi_skip_timer_override = 1;
+	/*
+	 * The ati_ixp4x0_rev() early PCI quirk should have set
+	 * the acpi_skip_timer_override flag already:
+	 */
+	if (!acpi_skip_timer_override) {
+		WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
+		pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
+			d->ident);
+		acpi_skip_timer_override = 1;
+	}
 	return 0;
 }
 

From f364eadab59b316ea0bd9f9bc01af0ad89065569 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 7 Oct 2008 14:04:27 -0700
Subject: [PATCH 131/142] x86: xsave: fix error condition in save_i387_xstate()

Actually return failure on error.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/xsave.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index ed5274a5bb0f..448fde96963c 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -121,6 +121,8 @@ int save_i387_xstate(void __user *buf)
 		err |= __put_user(FP_XSTATE_MAGIC2,
 				  (__u32 __user *) (buf + sig_xstate_size
 						    - FP_XSTATE_MAGIC2_SIZE));
+		if (err)
+			return err;
 	}
 
 	return 1;

From 04944b793e18ece23f63c0252646b310c1845940 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 7 Oct 2008 14:04:28 -0700
Subject: [PATCH 132/142] x86: xsave: set FP, SSE bits in the xsave header in
 the user sigcontext

If a processor implementation discern that a processor state component is in
its initialized state, it may modify the corresponding bit in the
xsave header.xstate_bv as '0'. State in the memory layout setup by 'xsave'
will be consistent with the bit values in the header.

During signal handling, legacy applications may change the FP/SSE bits
in the sigcontext memory layout without touching the FP/SSE header bits
in the xsave header. So always set FP/SSE bits in the xsave header
while saving the sigcontext state to the user space. During signal return,
this will enable the kernel to capture any changes to the FP/SSE bits by the
legacy applications which don't touch xsave headers.

xsave aware apps can change the xstate_bv in the xsave header aswell
as change any contents in the memory layout. xrestor as part of sigreturn
will capture all the changes.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/i387.c  | 14 ++++++++++++++
 arch/x86/kernel/xsave.c | 25 +++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 45723f1fe198..1f20608d4ca8 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -468,9 +468,23 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
 
 static int save_i387_xsave(void __user *buf)
 {
+	struct task_struct *tsk = current;
 	struct _fpstate_ia32 __user *fx = buf;
 	int err = 0;
 
+	/*
+	 * For legacy compatible, we always set FP/SSE bits in the bit
+	 * vector while saving the state to the user context.
+	 * This will enable us capturing any changes(during sigreturn) to
+	 * the FP/SSE bits by the legacy applications which don't touch
+	 * xstate_bv in the xsave header.
+	 *
+	 * xsave aware applications can change the xstate_bv in the xsave
+	 * header as well as change any contents in the memory layout.
+	 * xrestore as part of sigreturn will capture all the changes.
+	 */
+	tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
+
 	if (save_i387_fxsave(fx) < 0)
 		return -1;
 
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 448fde96963c..2f98323716d9 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf)
 
 	if (task_thread_info(tsk)->status & TS_XSAVE) {
 		struct _fpstate __user *fx = buf;
+		struct _xstate __user *x = buf;
+		u64 xstate_bv;
 
 		err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
 				     sizeof(struct _fpx_sw_bytes));
@@ -121,6 +123,29 @@ int save_i387_xstate(void __user *buf)
 		err |= __put_user(FP_XSTATE_MAGIC2,
 				  (__u32 __user *) (buf + sig_xstate_size
 						    - FP_XSTATE_MAGIC2_SIZE));
+
+		/*
+		 * Read the xstate_bv which we copied (directly from the cpu or
+		 * from the state in task struct) to the user buffers and
+		 * set the FP/SSE bits.
+		 */
+		err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+
+		/*
+		 * For legacy compatible, we always set FP/SSE bits in the bit
+		 * vector while saving the state to the user context. This will
+		 * enable us capturing any changes(during sigreturn) to
+		 * the FP/SSE bits by the legacy applications which don't touch
+		 * xstate_bv in the xsave header.
+		 *
+		 * xsave aware apps can change the xstate_bv in the xsave
+		 * header as well as change any contents in the memory layout.
+		 * xrestore as part of sigreturn will capture all the changes.
+		 */
+		xstate_bv |= XSTATE_FPSSE;
+
+		err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+
 		if (err)
 			return err;
 	}

From eefb47f6a1e855653d275cb90592a3587ea93a09 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 8 Oct 2008 13:01:39 -0700
Subject: [PATCH 133/142] xen: use spin_lock_nest_lock when pinning a pagetable

When pinning/unpinning a pagetable with split pte locks, we can end up
holding multiple pte locks at once (we need to hold the locks while
there's a pending batched hypercall affecting the pte page).  Because
all the pte locks are in the same lock class, lockdep thinks that
we're potentially taking a lock recursively.

This warning is spurious because we always take the pte locks while
holding mm->page_table_lock.  lockdep now has spin_lock_nest_lock to
express this kind of dominant lock use, so use it here so that lockdep
knows what's going on.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 74 ++++++++++++++++++++++++++++++----------------
 arch/x86/xen/mmu.h |  3 --
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 64e58681767e..ae173f6edd8b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -651,9 +651,12 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
  * For 64-bit, we must skip the Xen hole in the middle of the address
  * space, just after the big x86-64 virtual hole.
  */
-static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
+static int xen_pgd_walk(struct mm_struct *mm,
+			int (*func)(struct mm_struct *mm, struct page *,
+				    enum pt_level),
 			unsigned long limit)
 {
+	pgd_t *pgd = mm->pgd;
 	int flush = 0;
 	unsigned hole_low, hole_high;
 	unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -698,7 +701,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 		pud = pud_offset(&pgd[pgdidx], 0);
 
 		if (PTRS_PER_PUD > 1) /* not folded */
-			flush |= (*func)(virt_to_page(pud), PT_PUD);
+			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
 
 		for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
 			pmd_t *pmd;
@@ -713,7 +716,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 			pmd = pmd_offset(&pud[pudidx], 0);
 
 			if (PTRS_PER_PMD > 1) /* not folded */
-				flush |= (*func)(virt_to_page(pmd), PT_PMD);
+				flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
 
 			for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
 				struct page *pte;
@@ -727,7 +730,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 					continue;
 
 				pte = pmd_page(pmd[pmdidx]);
-				flush |= (*func)(pte, PT_PTE);
+				flush |= (*func)(mm, pte, PT_PTE);
 			}
 		}
 	}
@@ -735,20 +738,20 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 out:
 	/* Do the top level last, so that the callbacks can use it as
 	   a cue to do final things like tlb flushes. */
-	flush |= (*func)(virt_to_page(pgd), PT_PGD);
+	flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
 
 	return flush;
 }
 
 /* If we're using split pte locks, then take the page's lock and
    return a pointer to it.  Otherwise return NULL. */
-static spinlock_t *xen_pte_lock(struct page *page)
+static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
 {
 	spinlock_t *ptl = NULL;
 
 #if USE_SPLIT_PTLOCKS
 	ptl = __pte_lockptr(page);
-	spin_lock(ptl);
+	spin_lock_nest_lock(ptl, &mm->page_table_lock);
 #endif
 
 	return ptl;
@@ -772,7 +775,8 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 }
 
-static int xen_pin_page(struct page *page, enum pt_level level)
+static int xen_pin_page(struct mm_struct *mm, struct page *page,
+			enum pt_level level)
 {
 	unsigned pgfl = TestSetPagePinned(page);
 	int flush;
@@ -813,7 +817,7 @@ static int xen_pin_page(struct page *page, enum pt_level level)
 		 */
 		ptl = NULL;
 		if (level == PT_PTE)
-			ptl = xen_pte_lock(page);
+			ptl = xen_pte_lock(page, mm);
 
 		MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
 					pfn_pte(pfn, PAGE_KERNEL_RO),
@@ -834,11 +838,11 @@ static int xen_pin_page(struct page *page, enum pt_level level)
 /* This is called just after a mm has been created, but it has not
    been used yet.  We need to make sure that its pagetable is all
    read-only, and can be pinned. */
-void xen_pgd_pin(pgd_t *pgd)
+static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 {
 	xen_mc_batch();
 
-	if (xen_pgd_walk(pgd, xen_pin_page, USER_LIMIT)) {
+	if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
 		/* re-enable interrupts for kmap_flush_unused */
 		xen_mc_issue(0);
 		kmap_flush_unused();
@@ -852,25 +856,35 @@ void xen_pgd_pin(pgd_t *pgd)
 		xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
 
 		if (user_pgd) {
-			xen_pin_page(virt_to_page(user_pgd), PT_PGD);
+			xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
 			xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
 		}
 	}
 #else /* CONFIG_X86_32 */
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is pinnable */
-	xen_pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+	xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+		     PT_PMD);
 #endif
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 #endif /* CONFIG_X86_64 */
 	xen_mc_issue(0);
 }
 
+static void xen_pgd_pin(struct mm_struct *mm)
+{
+	__xen_pgd_pin(mm, mm->pgd);
+}
+
 /*
  * On save, we need to pin all pagetables to make sure they get their
  * mfns turned into pfns.  Search the list for any unpinned pgds and pin
  * them (unpinned pgds are not currently in use, probably because the
  * process is under construction or destruction).
+ *
+ * Expected to be called in stop_machine() ("equivalent to taking
+ * every spinlock in the system"), so the locking doesn't really
+ * matter all that much.
  */
 void xen_mm_pin_all(void)
 {
@@ -881,7 +895,7 @@ void xen_mm_pin_all(void)
 
 	list_for_each_entry(page, &pgd_list, lru) {
 		if (!PagePinned(page)) {
-			xen_pgd_pin((pgd_t *)page_address(page));
+			__xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
 			SetPageSavePinned(page);
 		}
 	}
@@ -894,7 +908,8 @@ void xen_mm_pin_all(void)
  * that's before we have page structures to store the bits.  So do all
  * the book-keeping now.
  */
-static __init int xen_mark_pinned(struct page *page, enum pt_level level)
+static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
+				  enum pt_level level)
 {
 	SetPagePinned(page);
 	return 0;
@@ -902,10 +917,11 @@ static __init int xen_mark_pinned(struct page *page, enum pt_level level)
 
 void __init xen_mark_init_mm_pinned(void)
 {
-	xen_pgd_walk(init_mm.pgd, xen_mark_pinned, FIXADDR_TOP);
+	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
 
-static int xen_unpin_page(struct page *page, enum pt_level level)
+static int xen_unpin_page(struct mm_struct *mm, struct page *page,
+			  enum pt_level level)
 {
 	unsigned pgfl = TestClearPagePinned(page);
 
@@ -923,7 +939,7 @@ static int xen_unpin_page(struct page *page, enum pt_level level)
 		 * partially-pinned state.
 		 */
 		if (level == PT_PTE) {
-			ptl = xen_pte_lock(page);
+			ptl = xen_pte_lock(page, mm);
 
 			if (ptl)
 				xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
@@ -945,7 +961,7 @@ static int xen_unpin_page(struct page *page, enum pt_level level)
 }
 
 /* Release a pagetables pages back as normal RW */
-static void xen_pgd_unpin(pgd_t *pgd)
+static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
 {
 	xen_mc_batch();
 
@@ -957,21 +973,27 @@ static void xen_pgd_unpin(pgd_t *pgd)
 
 		if (user_pgd) {
 			xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
-			xen_unpin_page(virt_to_page(user_pgd), PT_PGD);
+			xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
 		}
 	}
 #endif
 
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is unpinned */
-	xen_unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+	xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+		       PT_PMD);
 #endif
 
-	xen_pgd_walk(pgd, xen_unpin_page, USER_LIMIT);
+	xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
 
 	xen_mc_issue(0);
 }
 
+static void xen_pgd_unpin(struct mm_struct *mm)
+{
+	__xen_pgd_unpin(mm, mm->pgd);
+}
+
 /*
  * On resume, undo any pinning done at save, so that the rest of the
  * kernel doesn't see any unexpected pinned pagetables.
@@ -986,7 +1008,7 @@ void xen_mm_unpin_all(void)
 	list_for_each_entry(page, &pgd_list, lru) {
 		if (PageSavePinned(page)) {
 			BUG_ON(!PagePinned(page));
-			xen_pgd_unpin((pgd_t *)page_address(page));
+			__xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
 			ClearPageSavePinned(page);
 		}
 	}
@@ -997,14 +1019,14 @@ void xen_mm_unpin_all(void)
 void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
 {
 	spin_lock(&next->page_table_lock);
-	xen_pgd_pin(next->pgd);
+	xen_pgd_pin(next);
 	spin_unlock(&next->page_table_lock);
 }
 
 void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	spin_lock(&mm->page_table_lock);
-	xen_pgd_pin(mm->pgd);
+	xen_pgd_pin(mm);
 	spin_unlock(&mm->page_table_lock);
 }
 
@@ -1095,7 +1117,7 @@ void xen_exit_mmap(struct mm_struct *mm)
 
 	/* pgd may not be pinned in the error exit path of execve */
 	if (xen_page_pinned(mm->pgd))
-		xen_pgd_unpin(mm->pgd);
+		xen_pgd_unpin(mm);
 
 	spin_unlock(&mm->page_table_lock);
 }
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 0f59bd03f9e3..98d71659da5a 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -18,9 +18,6 @@ void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
 void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
 void xen_exit_mmap(struct mm_struct *mm);
 
-void xen_pgd_pin(pgd_t *pgd);
-//void xen_pgd_unpin(pgd_t *pgd);
-
 pteval_t xen_pte_val(pte_t);
 pmdval_t xen_pmd_val(pmd_t);
 pgdval_t xen_pgd_val(pgd_t);

From 5dc64a3442b98eaa0e3730c35fcf00cf962a93e7 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Fri, 10 Oct 2008 11:27:38 +0100
Subject: [PATCH 134/142] xen: do not reserve 2 pages of padding between
 hypervisor and fixmap.

When reserving space for the hypervisor the Xen paravirt backend adds
an extra two pages (this was carried forward from the 2.6.18-xen tree
which had them "for safety"). Depending on various CONFIG options this
can cause the boot time fixmaps to span multiple PMDs which is not
supported and triggers a WARN in early_ioremap_init().

This was exposed by 2216d199b1430d1c0affb1498a9ebdbd9c0de439 which
moved the dmi table parsing earlier.
    x86: fix CONFIG_X86_RESERVE_LOW_64K=y

    The bad_bios_dmi_table() quirk never triggered because we do DMI setup
    too late. Move it a bit earlier.

There is no real reason to reserve these two extra pages and the
fixmap already incorporates FIX_HOLE which serves the same
purpose. None of the other callers of reserve_top_address do this.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 85692c9f6496..977a54255fb4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1370,7 +1370,7 @@ static void __init xen_reserve_top(void)
 	if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
 		top = pp.virt_start;
 
-	reserve_top_address(-top + 2 * PAGE_SIZE);
+	reserve_top_address(-top);
 #endif	/* CONFIG_X86_32 */
 }
 

From 325af5fb1418c79953db0954556de048e061d8b6 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 8 Aug 2008 15:58:39 -0700
Subject: [PATCH 135/142] x86: ioperm user_regset

This adds a user_regset type for the x86 io permissions bitmap.
This makes it appear in core dumps (when ioperm has been used).
It will also make it visible to debuggers in the future.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
[conflict resolutions: Signed-off-by: Ingo Molnar <mingo@elte.hu> ]
---
 arch/x86/kernel/ptrace.c | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/elf.h      |  1 +
 2 files changed, 38 insertions(+)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e375b658efc3..4e1ef66c2ea4 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -40,7 +40,9 @@ enum x86_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
 	REGSET_XFP,
+	REGSET_IOPERM64 = REGSET_XFP,
 	REGSET_TLS,
+	REGSET_IOPERM32,
 };
 
 /*
@@ -555,6 +557,29 @@ static int ptrace_set_debugreg(struct task_struct *child,
 	return 0;
 }
 
+/*
+ * These access the current or another (stopped) task's io permission
+ * bitmap for debugging or core dump.
+ */
+static int ioperm_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	return target->thread.io_bitmap_max / regset->size;
+}
+
+static int ioperm_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	if (!target->thread.io_bitmap_ptr)
+		return -ENXIO;
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   target->thread.io_bitmap_ptr,
+				   0, IO_BITMAP_BYTES);
+}
+
 #ifdef CONFIG_X86_PTRACE_BTS
 /*
  * The configuration for a particular BTS hardware implementation.
@@ -1385,6 +1410,12 @@ static const struct user_regset x86_64_regsets[] = {
 		.size = sizeof(long), .align = sizeof(long),
 		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
 	},
+	[REGSET_IOPERM64] = {
+		.core_note_type = NT_386_IOPERM,
+		.n = IO_BITMAP_LONGS,
+		.size = sizeof(long), .align = sizeof(long),
+		.active = ioperm_active, .get = ioperm_get
+	},
 };
 
 static const struct user_regset_view user_x86_64_view = {
@@ -1431,6 +1462,12 @@ static const struct user_regset x86_32_regsets[] = {
 		.active = regset_tls_active,
 		.get = regset_tls_get, .set = regset_tls_set
 	},
+	[REGSET_IOPERM32] = {
+		.core_note_type = NT_386_IOPERM,
+		.n = IO_BITMAP_BYTES / sizeof(u32),
+		.size = sizeof(u32), .align = sizeof(u32),
+		.active = ioperm_active, .get = ioperm_get
+	},
 };
 
 static const struct user_regset_view user_x86_32_view = {
diff --git a/include/linux/elf.h b/include/linux/elf.h
index edc3dac3f02f..0b61ca41a044 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -360,6 +360,7 @@ typedef struct elf64_shdr {
 #define NT_PPC_SPE	0x101		/* PowerPC SPE/EVR registers */
 #define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
+#define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
 
 
 /* Note header in a PT_NOTE section */

From 46eaa6702016e3ac9a188172a2c309d6ca1be1cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 12 Oct 2008 15:06:29 +0200
Subject: [PATCH 136/142] x86: memory corruption check - cleanup

Move the prototypes from the generic kernel.h header to the more
appropriate include/asm-x86/bios_ebda.h header file.

Also, remove the check from the power management code - this is a
pure x86 matter for now.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c       |  1 +
 arch/x86/mm/init_64.c       |  1 +
 drivers/base/power/main.c   |  1 -
 include/asm-x86/bios_ebda.h | 17 +++++++++++++++++
 include/linux/kernel.h      | 17 -----------------
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7e05462ffb11..bbe044dbe014 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
 #include <linux/cpumask.h>
 
 #include <asm/asm.h>
+#include <asm/bios_ebda.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d84d3e91d348..3e10054c5731 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -31,6 +31,7 @@
 #include <linux/nmi.h>
 
 #include <asm/processor.h>
+#include <asm/bios_ebda.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index bf6d3554e506..273a944d4040 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -254,7 +254,6 @@ static char *pm_verb(int event)
 
 static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
 {
-	check_for_bios_corruption();
 	dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
 		((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
 		", may wakeup" : "");
diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h
index ec42ed874591..79b4b88505d7 100644
--- a/include/asm-x86/bios_ebda.h
+++ b/include/asm-x86/bios_ebda.h
@@ -16,4 +16,21 @@ static inline unsigned int get_bios_ebda(void)
 
 void reserve_ebda_region(void);
 
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+/*
+ * This is obviously not a great place for this, but we want to be
+ * able to scatter it around anywhere in the kernel.
+ */
+void check_for_bios_corruption(void);
+void start_periodic_check_for_corruption(void);
+#else
+static inline void check_for_bios_corruption(void)
+{
+}
+
+static inline void start_periodic_check_for_corruption(void)
+{
+}
+#endif
+
 #endif /* ASM_X86__BIOS_EBDA_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 50873b211788..2651f805ba6d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -240,23 +240,6 @@ extern const char *print_tainted(void);
 extern void add_taint(unsigned);
 extern int root_mountflags;
 
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
-/*
- * This is obviously not a great place for this, but we want to be
- * able to scatter it around anywhere in the kernel.
- */
-void check_for_bios_corruption(void);
-void start_periodic_check_for_corruption(void);
-#else
-static inline void check_for_bios_corruption(void)
-{
-}
-
-static inline void start_periodic_check_for_corruption(void)
-{
-}
-#endif
-
 /* Values used for system_state */
 extern enum system_states {
 	SYSTEM_BOOTING,

From 9f482807a6bd7e2aa1ed0d8cfc48463ec4ca3568 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 18 Aug 2008 12:59:32 +0200
Subject: [PATCH 137/142] x86, fpu: check __clear_user() return value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix warning:

  arch/x86/kernel/xsave.c: In function ‘save_i387_xstate’:
  arch/x86/kernel/xsave.c:98: warning: ignoring return value of ‘__clear_user’, declared with attribute warn_unused_result

check the return value and act on it. We should not be ignoring faults
at this point.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/xsave.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 2f98323716d9..9abac8a9d823 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -95,7 +95,9 @@ int save_i387_xstate(void __user *buf)
 	 	 * Start with clearing the user buffer. This will present a
 	 	 * clean context for the bytes not touched by the fxsave/xsave.
 		 */
-		__clear_user(buf, sig_xstate_size);
+		err = __clear_user(buf, sig_xstate_size);
+		if (err)
+			return err;
 
 		if (task_thread_info(tsk)->status & TS_XSAVE)
 			err = xsave_user(buf);

From 45e96f26f257bd873017c6244a6cafd27f6f5439 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 Aug 2008 10:37:14 +0200
Subject: [PATCH 138/142] warnings: fix arch/x86/kernel/early_printk.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix warning:

  arch/x86/kernel/early_printk.c:993: warning: ‘enable_debug_console’ defined but not used

Eliminate dead code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/early_printk.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 02fc5b40c14b..34ad997d3834 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -989,22 +989,4 @@ static int __init setup_early_printk(char *buf)
 	return 0;
 }
 
-static void __init enable_debug_console(char *buf)
-{
-#ifdef DBGP_DEBUG
-	struct console *old_early_console = NULL;
-
-	if (early_console_initialized && early_console) {
-		old_early_console = early_console;
-		unregister_console(early_console);
-		early_console_initialized = 0;
-	}
-
-	setup_early_printk(buf);
-
-	if (early_console == old_early_console && old_early_console)
-		register_console(old_early_console);
-#endif
-}
-
 early_param("earlyprintk", setup_early_printk);

From d562353a4533c4671af683499191707c9a77c406 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 12 Oct 2008 15:22:22 +0200
Subject: [PATCH 139/142] warnings: fix arch/x86/kernel/io_apic_64.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

 arch/x86/kernel/io_apic_64.c: In function ‘print_local_APIC’:
 arch/x86/kernel/io_apic_64.c:1284: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘long unsigned int’
 arch/x86/kernel/io_apic_64.c:1285: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘long unsigned int’

We want to print the two halves of 'icr' at 32 bit width.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index a1bec2969c6a..02063ae042f7 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1281,8 +1281,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
 	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
 
 	v = apic_read(APIC_LVTT);
 	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);

From 8a66712ba0969aea5580b9e312badfc2490fc614 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 12 Oct 2008 15:24:53 +0200
Subject: [PATCH 140/142] x86, amd-iommu: propagate PCI device enabling error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

propagate an error in enabling the PCI device.

Also eliminates this warning:

 arch/x86/kernel/amd_iommu_init.c: In function ‘init_iommu_one’:
 arch/x86/kernel/amd_iommu_init.c:726: warning: ignoring return value of ‘pci_enable_device’, declared with attribute warn_unused_result

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 148fcfe22f17..4cd8083c58be 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -723,9 +723,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	init_iommu_from_acpi(iommu, h);
 	init_iommu_devices(iommu);
 
-	pci_enable_device(iommu->dev);
-
-	return 0;
+	return pci_enable_device(iommu->dev);
 }
 
 /*

From b7b3a42533e1e41297fe517533375f9f8f7b92d0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 12 Oct 2008 15:36:24 +0200
Subject: [PATCH 141/142] x86: extend processor type select help text

extend the help text of the CONFIG_CPU_SUP_* config options to
express what it does and what effects it has.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.cpu | 63 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f8843c3ae77d..a2ea1488b37e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -430,48 +430,97 @@ config CPU_SUP_INTEL
 	default y
 	bool "Support Intel processors" if PROCESSOR_SELECT
 	help
-	  This enables extended support for Intel processors
+	  This enables detection, tunings and quirks for Intel processors
+
+	  You need this enabled if you want your kernel to run on an
+	  Intel CPU. Disabling this option on other types of CPUs
+	  makes the kernel a tiny bit smaller. Disabling it on an Intel
+	  CPU might render the kernel unbootable.
+
+	  If unsure, say N.
 
 config CPU_SUP_CYRIX_32
 	default y
 	bool "Support Cyrix processors" if PROCESSOR_SELECT
 	depends on !64BIT
 	help
-	  This enables extended support for Cyrix processors
+	  This enables detection, tunings and quirks for Cyrix processors
+
+	  You need this enabled if you want your kernel to run on a
+	  Cyrix CPU. Disabling this option on other types of CPUs
+	  makes the kernel a tiny bit smaller. Disabling it on a Cyrix
+	  CPU might render the kernel unbootable.
+
+	  If unsure, say N.
 
 config CPU_SUP_AMD
 	default y
 	bool "Support AMD processors" if PROCESSOR_SELECT
 	help
-	  This enables extended support for AMD processors
+	  This enables detection, tunings and quirks for AMD processors
+
+	  You need this enabled if you want your kernel to run on an
+	  AMD CPU. Disabling this option on other types of CPUs
+	  makes the kernel a tiny bit smaller. Disabling it on an AMD
+	  CPU might render the kernel unbootable.
+
+	  If unsure, say N.
 
 config CPU_SUP_CENTAUR_32
 	default y
 	bool "Support Centaur processors" if PROCESSOR_SELECT
 	depends on !64BIT
 	help
-	  This enables extended support for Centaur processors
+	  This enables detection, tunings and quirks for Centaur processors
+
+	  You need this enabled if you want your kernel to run on a
+	  Centaur CPU. Disabling this option on other types of CPUs
+	  makes the kernel a tiny bit smaller. Disabling it on a Centaur
+	  CPU might render the kernel unbootable.
+
+	  If unsure, say N.
 
 config CPU_SUP_CENTAUR_64
 	default y
 	bool "Support Centaur processors" if PROCESSOR_SELECT
 	depends on 64BIT
 	help
-	  This enables extended support for Centaur processors
+	  This enables detection, tunings and quirks for Centaur processors
+
+	  You need this enabled if you want your kernel to run on a
+	  Centaur CPU. Disabling this option on other types of CPUs
+	  makes the kernel a tiny bit smaller. Disabling it on a Centaur
+	  CPU might render the kernel unbootable.
+
+	  If unsure, say N.
 
 config CPU_SUP_TRANSMETA_32
 	default y
 	bool "Support Transmeta processors" if PROCESSOR_SELECT
 	depends on !64BIT
 	help
-	  This enables extended support for Transmeta processors
+	  This enables detection, tunings and quirks for Transmeta processors
+
+	  You need this enabled if you want your kernel to run on a
+	  Transmeta CPU. Disabling this option on other types of CPUs
+	  makes the kernel a tiny bit smaller. Disabling it on a Transmeta
+	  CPU might render the kernel unbootable.
+
+	  If unsure, say N.
 
 config CPU_SUP_UMC_32
 	default y
 	bool "Support UMC processors" if PROCESSOR_SELECT
 	depends on !64BIT
 	help
-	  This enables extended support for UMC processors
+	  This enables detection, tunings and quirks for UMC processors
+
+	  You need this enabled if you want your kernel to run on a
+	  UMC CPU. Disabling this option on other types of CPUs
+	  makes the kernel a tiny bit smaller. Disabling it on a UMC
+	  CPU might render the kernel unbootable.
+
+	  If unsure, say N.
 
 config X86_DS
 	bool "Debug Store support"

From 1db5fff9aeab18566eb380e354629fdbbe7792f0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 12 Oct 2008 15:40:45 +0200
Subject: [PATCH 142/142] x86: make processor type select depend on
 CONFIG_EMBEDDED

deselecting one of the CPU type CONFIG_CPU_SUP_* config options
can render a kernel unbootable. Make sure this option is only
available if CONFIG_EMBEDDED is enabled.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.cpu | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index a2ea1488b37e..c5f101360520 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -420,7 +420,6 @@ config X86_DEBUGCTLMSR
 	depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
 
 menuconfig PROCESSOR_SELECT
-	default y
 	bool "Supported processor vendors" if EMBEDDED
 	help
 	  This lets you choose what x86 vendor support code your kernel