From 853d9b18f1e861d37e9b271742329f8c1176eabe Mon Sep 17 00:00:00 2001
From: Levente Kurusa <levex@linux.com>
Date: Fri, 29 Nov 2013 21:28:48 +0100
Subject: [PATCH 1/5] x86, mce: Call put_device on device_register failure

This patch adds a call to put_device() when the device_register() call
has failed. This is required so that the last reference to the device is
given up.

Signed-off-by: Levente Kurusa <levex@linux.com>
Link: http://lkml.kernel.org/r/5298F900.9000208@linux.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b3218cdee95f..a389c1d859ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu)
 	dev->release = &mce_device_release;
 
 	err = device_register(dev);
-	if (err)
+	if (err) {
+		put_device(dev);
 		return err;
+	}
 
 	for (i = 0; mce_device_attrs[i]; i++) {
 		err = device_create_file(dev, mce_device_attrs[i]);

From 545104dd2ba0feb62544284ee1d6c98fe6ef8245 Mon Sep 17 00:00:00 2001
From: Rui Wang <ruiv.wang@gmail.com>
Date: Sun, 8 Dec 2013 12:17:53 +0800
Subject: [PATCH 2/5] PCI, AER: Fix severity usage in aer trace event

There's inconsistency between dmesg and the trace event output.
When dmesg says "severity=Corrected", the trace event says
"severity=Fatal". What happens is that HW_EVENT_ERR_CORRECTED is
defined in edac.h:

enum hw_event_mc_err_type {
        HW_EVENT_ERR_CORRECTED,
        HW_EVENT_ERR_UNCORRECTED,
        HW_EVENT_ERR_FATAL,
        HW_EVENT_ERR_INFO,
};

while aer_print_error() uses aer_error_severity_string[] defined as:

static const char *aer_error_severity_string[] = {
        "Uncorrected (Non-Fatal)",
        "Uncorrected (Fatal)",
        "Corrected"
};

In this case dmesg is correct because info->severity is assigned in
aer_isr_one_error() using the definitions in include/linux/ras.h:

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Acked-by: Ethan Zhao <ethan.kernel@gmail.com>
Link: http://lkml.kernel.org/r/CANVTcTaP18CiGOSEcX5Ch_wPw9mEhkgokfp+d+ZOMFD+Ce4juA@mail.gmail.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 include/trace/events/ras.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
index 88b878383797..1c875ad1ee5f 100644
--- a/include/trace/events/ras.h
+++ b/include/trace/events/ras.h
@@ -5,7 +5,7 @@
 #define _TRACE_AER_H
 
 #include <linux/tracepoint.h>
-#include <linux/edac.h>
+#include <linux/aer.h>
 
 
 /*
@@ -63,10 +63,10 @@ TRACE_EVENT(aer_event,
 
 	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
 		__get_str(dev_name),
-		__entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
-			__entry->severity == HW_EVENT_ERR_FATAL ?
-			"Fatal" : "Uncorrected",
-		__entry->severity == HW_EVENT_ERR_CORRECTED ?
+		__entry->severity == AER_CORRECTABLE ? "Corrected" :
+			__entry->severity == AER_FATAL ?
+			"Fatal" : "Uncorrected, non-fatal",
+		__entry->severity == AER_CORRECTABLE ?
 		__print_flags(__entry->status, "|", aer_correctable_errors) :
 		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
 );

From c700f013adb0ec57518a7fe0163e3117659ce249 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 6 Dec 2013 01:17:08 -0500
Subject: [PATCH 3/5] EDAC: Add an edac_report parameter to EDAC

This new parameter is used to control how to report HW error reporting,
especially for newer Intel platform, like Ivybridge-EX, which contains
an enhanced error decoding functionality in the firmware, i.e. eMCA.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1386310630-12529-2-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 Documentation/kernel-parameters.txt |  8 ++++++++
 drivers/edac/edac_stub.c            | 19 +++++++++++++++++++
 include/linux/edac.h                | 28 ++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 50680a59a2ff..453092c822f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -881,6 +881,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 			The xen output can only be used by Xen PV guests.
 
+	edac_report=	[HW,EDAC] Control how to report EDAC event
+			Format: {"on" | "off" | "force"}
+			on: enable EDAC to report H/W event. May be overridden
+			by other higher priority error reporting module.
+			off: disable H/W event reporting through EDAC.
+			force: enforce the use of EDAC to report H/W event.
+			default: on.
+
 	ekgdboc=	[X86,KGDB] Allow early kernel console debugging
 			ekgdboc=kbd
 
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 351945fa2ecd..9d9e18aefaaa 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert);
 
 static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
 
+int edac_report_status = EDAC_REPORTING_ENABLED;
+EXPORT_SYMBOL_GPL(edac_report_status);
+
+static int __init edac_report_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	if (!strncmp(str, "on", 2))
+		set_edac_report_status(EDAC_REPORTING_ENABLED);
+	else if (!strncmp(str, "off", 3))
+		set_edac_report_status(EDAC_REPORTING_DISABLED);
+	else if (!strncmp(str, "force", 5))
+		set_edac_report_status(EDAC_REPORTING_FORCE);
+
+	return 0;
+}
+__setup("edac_report=", edac_report_setup);
+
 /*
  * called to determine if there is an EDAC driver interested in
  * knowing an event (such as NMI) occurred
diff --git a/include/linux/edac.h b/include/linux/edac.h
index dbdffe8d4469..8e6c20af11a2 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -35,6 +35,34 @@ extern void edac_atomic_assert_error(void);
 extern struct bus_type *edac_get_sysfs_subsys(void);
 extern void edac_put_sysfs_subsys(void);
 
+enum {
+	EDAC_REPORTING_ENABLED,
+	EDAC_REPORTING_DISABLED,
+	EDAC_REPORTING_FORCE
+};
+
+extern int edac_report_status;
+#ifdef CONFIG_EDAC
+static inline int get_edac_report_status(void)
+{
+	return edac_report_status;
+}
+
+static inline void set_edac_report_status(int new)
+{
+	edac_report_status = new;
+}
+#else
+static inline int get_edac_report_status(void)
+{
+	return EDAC_REPORTING_DISABLED;
+}
+
+static inline void set_edac_report_status(int new)
+{
+}
+#endif
+
 static inline void opstate_init(void)
 {
 	switch (edac_op_state) {

From fd521039666529a4674b9822f1cc873672f57ee6 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 6 Dec 2013 01:17:09 -0500
Subject: [PATCH 4/5] EDAC, sb_edac: Modify H/W event reporting policy

Newer Intel platforms support more than one method to report H/W event.
On this kind of platform, H/W event report can adopt new method and
traditional EDAC method should be disabled. Moreover, if EDAC event
report method is set to *force*, it means event must be reported via
EDAC interface. IOW, it overrides the default event report policy.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1386310630-12529-3-git-send-email-gong.chen@linux.intel.com
[ Boris: massage commit and error messages ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/sb_edac.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 8472405c5586..e2e2cb365764 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct mem_ctl_info *mci;
 	struct sbridge_pvt *pvt;
 
+	if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+		return NOTIFY_DONE;
+
 	mci = get_mci_for_node_id(mce->socketid);
 	if (!mci)
 		return NOTIFY_BAD;
@@ -2142,9 +2145,10 @@ static int __init sbridge_init(void)
 	opstate_init();
 
 	pci_rc = pci_register_driver(&sbridge_driver);
-
 	if (pci_rc >= 0) {
 		mce_register_decode_chain(&sbridge_mce_dec);
+		if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+			sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
 		return 0;
 	}
 

From 42139eb356e3384759ca143ae04d82376346eb4c Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 6 Dec 2013 01:17:10 -0500
Subject: [PATCH 5/5] ACPI, eMCA: Combine eMCA/EDAC event reporting priority

eMCA has higher H/W event reporting priority. Once it is
loaded, EDAC event reporting should be disabled, unless EDAC
overrides eMCA priority via command line parameter "edac_report=force".

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1386310630-12529-4-git-send-email-gong.chen@linux.intel.com
[ Boris: massage printk message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/acpi/acpi_extlog.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a6869e110ce5..5d33c5415405 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@
 #include <acpi/acpi_bus.h>
 #include <linux/cper.h>
 #include <linux/ratelimit.h>
+#include <linux/edac.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 
@@ -43,6 +44,8 @@ struct extlog_l1_head {
 	u8  rev1[12];
 };
 
+static int old_edac_report_status;
+
 static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
 
 /* L1 table related physical address */
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 
 	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
 
-	return NOTIFY_DONE;
+	return NOTIFY_STOP;
 }
 
 static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
@@ -231,8 +234,12 @@ static int __init extlog_init(void)
 	u64 cap;
 	int rc;
 
-	rc = -ENODEV;
+	if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
+		pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
+		return -EPERM;
+	}
 
+	rc = -ENODEV;
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (!(cap & MCG_ELOG_P))
 		return rc;
@@ -287,6 +294,12 @@ static int __init extlog_init(void)
 	if (elog_buf == NULL)
 		goto err_release_elog;
 
+	/*
+	 * eMCA event report method has higher priority than EDAC method,
+	 * unless EDAC event report method is mandatory.
+	 */
+	old_edac_report_status = get_edac_report_status();
+	set_edac_report_status(EDAC_REPORTING_DISABLED);
 	mce_register_decode_chain(&extlog_mce_dec);
 	/* enable OS to be involved to take over management from BIOS */
 	((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,6 +321,7 @@ static int __init extlog_init(void)
 
 static void __exit extlog_exit(void)
 {
+	set_edac_report_status(old_edac_report_status);
 	mce_unregister_decode_chain(&extlog_mce_dec);
 	((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
 	if (extlog_l1_addr)