Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "MCE handling updates, but also some generic drivers/edac/ changes to better organize the Kconfig space" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/ras: Move AMD MCE injector to arch/x86/ras/ x86/mce: Add a wrapper around mce_log() for injection x86/mce: Rename rcu_dereference_check_mce() to mce_log_get_idx_check() RAS: Add a menuconfig option with descriptive text x86/mce: Reenable CMCI banks when swiching back to interrupt mode x86/mce: Clear Local MCE opt-in before kexec x86/mce: Remove unused function declarations x86/mce: Kill drain_mcelog_buffer() x86/mce: Avoid potential deadlock due to printk() in MCE context x86/mce: Remove the MCE ring for Action Optional errors x86/mce: Don't use percpu workqueues x86/mce: Provide a lockless memory pool to save error records x86/mce: Reuse one of the u16 padding fields in 'struct mce'
This commit is contained in:
commit
3959df1dfb
18 changed files with 329 additions and 164 deletions
|
@ -955,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
|
|||
|
||||
config X86_MCE
|
||||
bool "Machine Check / overheating reporting"
|
||||
select GENERIC_ALLOCATOR
|
||||
default y
|
||||
---help---
|
||||
Machine Check support allows the processor to notify the
|
||||
|
|
|
@ -212,6 +212,8 @@ drivers-$(CONFIG_PM) += arch/x86/power/
|
|||
|
||||
drivers-$(CONFIG_FB) += arch/x86/video/
|
||||
|
||||
drivers-$(CONFIG_RAS) += arch/x86/ras/
|
||||
|
||||
####
|
||||
# boot loader support. Several targets are kept for legacy purposes
|
||||
|
||||
|
|
|
@ -151,10 +151,12 @@ extern int mce_p5_enabled;
|
|||
#ifdef CONFIG_X86_MCE
|
||||
int mcheck_init(void);
|
||||
void mcheck_cpu_init(struct cpuinfo_x86 *c);
|
||||
void mcheck_cpu_clear(struct cpuinfo_x86 *c);
|
||||
void mcheck_vendor_init_severity(void);
|
||||
#else
|
||||
static inline int mcheck_init(void) { return 0; }
|
||||
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
|
||||
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
|
||||
static inline void mcheck_vendor_init_severity(void) {}
|
||||
#endif
|
||||
|
||||
|
@ -181,20 +183,18 @@ DECLARE_PER_CPU(struct device *, mce_device);
|
|||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c);
|
||||
void mce_intel_feature_clear(struct cpuinfo_x86 *c);
|
||||
void cmci_clear(void);
|
||||
void cmci_reenable(void);
|
||||
void cmci_rediscover(void);
|
||||
void cmci_recheck(void);
|
||||
void lmce_clear(void);
|
||||
void lmce_enable(void);
|
||||
#else
|
||||
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
|
||||
static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { }
|
||||
static inline void cmci_clear(void) {}
|
||||
static inline void cmci_reenable(void) {}
|
||||
static inline void cmci_rediscover(void) {}
|
||||
static inline void cmci_recheck(void) {}
|
||||
static inline void lmce_clear(void) {}
|
||||
static inline void lmce_enable(void) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
|
|
|
@ -15,7 +15,8 @@ struct mce {
|
|||
__u64 time; /* wall time_t when error was detected */
|
||||
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
|
||||
__u8 inject_flags; /* software inject flags */
|
||||
__u16 pad;
|
||||
__u8 severity;
|
||||
__u8 usable_addr;
|
||||
__u32 cpuid; /* CPUID 1 EAX */
|
||||
__u8 cs; /* code segment */
|
||||
__u8 bank; /* machine check bank */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
obj-y = mce.o mce-severity.o
|
||||
obj-y = mce.o mce-severity.o mce-genpool.o
|
||||
|
||||
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
|
||||
|
|
|
@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
|
|||
|
||||
m.addr = mem_err->physical_addr;
|
||||
mce_log(&m);
|
||||
mce_notify_irq();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
|
||||
|
||||
|
|
99
arch/x86/kernel/cpu/mcheck/mce-genpool.c
Normal file
99
arch/x86/kernel/cpu/mcheck/mce-genpool.c
Normal file
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* MCE event pool management in MCE context
|
||||
*
|
||||
* Copyright (C) 2015 Intel Corp.
|
||||
* Author: Chen, Gong <gong.chen@linux.intel.com>
|
||||
*
|
||||
* This file is licensed under GPLv2.
|
||||
*/
|
||||
#include <linux/smp.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/genalloc.h>
|
||||
#include <linux/llist.h>
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* printk() is not safe in MCE context. This is a lock-less memory allocator
|
||||
* used to save error information organized in a lock-less list.
|
||||
*
|
||||
* This memory pool is only to be used to save MCE records in MCE context.
|
||||
* MCE events are rare, so a fixed size memory pool should be enough. Use
|
||||
* 2 pages to save MCE events for now (~80 MCE records at most).
|
||||
*/
|
||||
#define MCE_POOLSZ (2 * PAGE_SIZE)
|
||||
|
||||
static struct gen_pool *mce_evt_pool;
|
||||
static LLIST_HEAD(mce_event_llist);
|
||||
static char gen_pool_buf[MCE_POOLSZ];
|
||||
|
||||
void mce_gen_pool_process(void)
|
||||
{
|
||||
struct llist_node *head;
|
||||
struct mce_evt_llist *node;
|
||||
struct mce *mce;
|
||||
|
||||
head = llist_del_all(&mce_event_llist);
|
||||
if (!head)
|
||||
return;
|
||||
|
||||
head = llist_reverse_order(head);
|
||||
llist_for_each_entry(node, head, llnode) {
|
||||
mce = &node->mce;
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
|
||||
}
|
||||
}
|
||||
|
||||
bool mce_gen_pool_empty(void)
|
||||
{
|
||||
return llist_empty(&mce_event_llist);
|
||||
}
|
||||
|
||||
int mce_gen_pool_add(struct mce *mce)
|
||||
{
|
||||
struct mce_evt_llist *node;
|
||||
|
||||
if (!mce_evt_pool)
|
||||
return -EINVAL;
|
||||
|
||||
node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
|
||||
if (!node) {
|
||||
pr_warn_ratelimited("MCE records pool full!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memcpy(&node->mce, mce, sizeof(*mce));
|
||||
llist_add(&node->llnode, &mce_event_llist);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mce_gen_pool_create(void)
|
||||
{
|
||||
struct gen_pool *tmpp;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
|
||||
if (!tmpp)
|
||||
goto out;
|
||||
|
||||
ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
|
||||
if (ret) {
|
||||
gen_pool_destroy(tmpp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mce_evt_pool = tmpp;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mce_gen_pool_init(void)
|
||||
{
|
||||
/* Just init mce_gen_pool once. */
|
||||
if (mce_evt_pool)
|
||||
return 0;
|
||||
|
||||
return mce_gen_pool_create();
|
||||
}
|
|
@ -13,6 +13,8 @@ enum severity_level {
|
|||
MCE_PANIC_SEVERITY,
|
||||
};
|
||||
|
||||
extern struct atomic_notifier_head x86_mce_decoder_chain;
|
||||
|
||||
#define ATTR_LEN 16
|
||||
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
|
||||
|
||||
|
@ -24,6 +26,16 @@ struct mce_bank {
|
|||
char attrname[ATTR_LEN]; /* attribute name */
|
||||
};
|
||||
|
||||
struct mce_evt_llist {
|
||||
struct llist_node llnode;
|
||||
struct mce mce;
|
||||
};
|
||||
|
||||
void mce_gen_pool_process(void);
|
||||
bool mce_gen_pool_empty(void);
|
||||
int mce_gen_pool_add(struct mce *mce);
|
||||
int mce_gen_pool_init(void);
|
||||
|
||||
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
|
||||
struct dentry *mce_get_debugfs_dir(void);
|
||||
|
||||
|
@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id)
|
|||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
void mce_inject_log(struct mce *m);
|
||||
|
|
|
@ -52,11 +52,11 @@
|
|||
|
||||
static DEFINE_MUTEX(mce_chrdev_read_mutex);
|
||||
|
||||
#define rcu_dereference_check_mce(p) \
|
||||
#define mce_log_get_idx_check(p) \
|
||||
({ \
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
|
||||
!lockdep_is_held(&mce_chrdev_read_mutex), \
|
||||
"suspicious rcu_dereference_check_mce() usage"); \
|
||||
"suspicious mce_log_get_idx_check() usage"); \
|
||||
smp_load_acquire(&(p)); \
|
||||
})
|
||||
|
||||
|
@ -110,15 +110,17 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
|
|||
*/
|
||||
mce_banks_t mce_banks_ce_disabled;
|
||||
|
||||
static DEFINE_PER_CPU(struct work_struct, mce_work);
|
||||
static struct work_struct mce_work;
|
||||
static struct irq_work mce_irq_work;
|
||||
|
||||
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
|
||||
static int mce_usable_address(struct mce *m);
|
||||
|
||||
/*
|
||||
* CPU/chipset specific EDAC code can register a notifier call here to print
|
||||
* MCE errors in a human-readable form.
|
||||
*/
|
||||
static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
|
||||
/* Do initial initialization of a struct mce */
|
||||
void mce_setup(struct mce *m)
|
||||
|
@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
|
|||
/* Emit the trace record: */
|
||||
trace_mce_record(mce);
|
||||
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||
if (!mce_gen_pool_add(mce))
|
||||
irq_work_queue(&mce_irq_work);
|
||||
|
||||
mce->finished = 0;
|
||||
wmb();
|
||||
for (;;) {
|
||||
entry = rcu_dereference_check_mce(mcelog.next);
|
||||
entry = mce_log_get_idx_check(mcelog.next);
|
||||
for (;;) {
|
||||
|
||||
/*
|
||||
|
@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
|
|||
set_bit(0, &mce_need_notify);
|
||||
}
|
||||
|
||||
static void drain_mcelog_buffer(void)
|
||||
void mce_inject_log(struct mce *m)
|
||||
{
|
||||
unsigned int next, i, prev = 0;
|
||||
|
||||
next = ACCESS_ONCE(mcelog.next);
|
||||
|
||||
do {
|
||||
struct mce *m;
|
||||
|
||||
/* drain what was logged during boot */
|
||||
for (i = prev; i < next; i++) {
|
||||
unsigned long start = jiffies;
|
||||
unsigned retries = 1;
|
||||
|
||||
m = &mcelog.entry[i];
|
||||
|
||||
while (!m->finished) {
|
||||
if (time_after_eq(jiffies, start + 2*retries))
|
||||
retries++;
|
||||
|
||||
cpu_relax();
|
||||
|
||||
if (!m->finished && retries >= 4) {
|
||||
pr_err("skipping error being logged currently!\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
smp_rmb();
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
|
||||
}
|
||||
|
||||
memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
|
||||
prev = next;
|
||||
next = cmpxchg(&mcelog.next, prev, 0);
|
||||
} while (next != prev);
|
||||
mutex_lock(&mce_chrdev_read_mutex);
|
||||
mce_log(m);
|
||||
mutex_unlock(&mce_chrdev_read_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_inject_log);
|
||||
|
||||
static struct notifier_block mce_srao_nb;
|
||||
|
||||
void mce_register_decode_chain(struct notifier_block *nb)
|
||||
{
|
||||
/* Ensure SRAO notifier has the highest priority in the decode chain. */
|
||||
if (nb != &mce_srao_nb && nb->priority == INT_MAX)
|
||||
nb->priority -= 1;
|
||||
|
||||
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
|
||||
drain_mcelog_buffer();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
|
||||
|
||||
|
@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple lockless ring to communicate PFNs from the exception handler with the
|
||||
* process context work function. This is vastly simplified because there's
|
||||
* only a single reader and a single writer.
|
||||
*/
|
||||
#define MCE_RING_SIZE 16 /* we use one entry less */
|
||||
|
||||
struct mce_ring {
|
||||
unsigned short start;
|
||||
unsigned short end;
|
||||
unsigned long ring[MCE_RING_SIZE];
|
||||
};
|
||||
static DEFINE_PER_CPU(struct mce_ring, mce_ring);
|
||||
|
||||
/* Runs with CPU affinity in workqueue */
|
||||
static int mce_ring_empty(void)
|
||||
{
|
||||
struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
||||
|
||||
return r->start == r->end;
|
||||
}
|
||||
|
||||
static int mce_ring_get(unsigned long *pfn)
|
||||
{
|
||||
struct mce_ring *r;
|
||||
int ret = 0;
|
||||
|
||||
*pfn = 0;
|
||||
get_cpu();
|
||||
r = this_cpu_ptr(&mce_ring);
|
||||
if (r->start == r->end)
|
||||
goto out;
|
||||
*pfn = r->ring[r->start];
|
||||
r->start = (r->start + 1) % MCE_RING_SIZE;
|
||||
ret = 1;
|
||||
out:
|
||||
put_cpu();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Always runs in MCE context with preempt off */
|
||||
static int mce_ring_add(unsigned long pfn)
|
||||
{
|
||||
struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
||||
unsigned next;
|
||||
|
||||
next = (r->end + 1) % MCE_RING_SIZE;
|
||||
if (next == r->start)
|
||||
return -1;
|
||||
r->ring[r->end] = pfn;
|
||||
wmb();
|
||||
r->end = next;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mce_available(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mca_cfg.disabled)
|
||||
|
@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
|
|||
|
||||
static void mce_schedule_work(void)
|
||||
{
|
||||
if (!mce_ring_empty())
|
||||
schedule_work(this_cpu_ptr(&mce_work));
|
||||
if (!mce_gen_pool_empty() && keventd_up())
|
||||
schedule_work(&mce_work);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
|
||||
|
||||
static void mce_irq_work_cb(struct irq_work *entry)
|
||||
{
|
||||
mce_notify_irq();
|
||||
|
@ -551,9 +472,30 @@ static void mce_report_event(struct pt_regs *regs)
|
|||
return;
|
||||
}
|
||||
|
||||
irq_work_queue(this_cpu_ptr(&mce_irq_work));
|
||||
irq_work_queue(&mce_irq_work);
|
||||
}
|
||||
|
||||
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
struct mce *mce = (struct mce *)data;
|
||||
unsigned long pfn;
|
||||
|
||||
if (!mce)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
|
||||
pfn = mce->addr >> PAGE_SHIFT;
|
||||
memory_failure(pfn, MCE_VECTOR, 0);
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
static struct notifier_block mce_srao_nb = {
|
||||
.notifier_call = srao_decode_notifier,
|
||||
.priority = INT_MAX,
|
||||
};
|
||||
|
||||
/*
|
||||
* Read ADDR and MISC registers.
|
||||
*/
|
||||
|
@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||
*/
|
||||
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
|
||||
if (m.status & MCI_STATUS_ADDRV) {
|
||||
mce_ring_add(m.addr >> PAGE_SHIFT);
|
||||
mce_schedule_work();
|
||||
m.severity = severity;
|
||||
m.usable_addr = mce_usable_address(&m);
|
||||
|
||||
if (!mce_gen_pool_add(&m))
|
||||
mce_schedule_work();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1143,15 +1088,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
|
||||
mce_read_aux(&m, i);
|
||||
|
||||
/*
|
||||
* Action optional error. Queue address for later processing.
|
||||
* When the ring overflows we just ignore the AO error.
|
||||
* RED-PEN add some logging mechanism when
|
||||
* usable_address or mce_add_ring fails.
|
||||
* RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
|
||||
*/
|
||||
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
|
||||
mce_ring_add(m.addr >> PAGE_SHIFT);
|
||||
/* assuming valid severity level != 0 */
|
||||
m.severity = severity;
|
||||
m.usable_addr = mce_usable_address(&m);
|
||||
|
||||
mce_log(&m);
|
||||
|
||||
|
@ -1247,14 +1186,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
|
|||
/*
|
||||
* Action optional processing happens here (picking up
|
||||
* from the list of faulting pages that do_machine_check()
|
||||
* placed into the "ring").
|
||||
* placed into the genpool).
|
||||
*/
|
||||
static void mce_process_work(struct work_struct *dummy)
|
||||
{
|
||||
unsigned long pfn;
|
||||
|
||||
while (mce_ring_get(&pfn))
|
||||
memory_failure(pfn, MCE_VECTOR, 0);
|
||||
mce_gen_pool_process();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
|
@ -1678,6 +1614,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
|||
}
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
|
||||
{
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
mce_intel_feature_clear(c);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
||||
{
|
||||
unsigned long iv = check_interval * HZ;
|
||||
|
@ -1731,13 +1678,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
|||
return;
|
||||
}
|
||||
|
||||
if (mce_gen_pool_init()) {
|
||||
mca_cfg.disabled = true;
|
||||
pr_emerg("Couldn't allocate MCE records pool!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
machine_check_vector = do_machine_check;
|
||||
|
||||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_vendor(c);
|
||||
__mcheck_cpu_init_timer();
|
||||
INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
|
||||
init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called for each booted CPU to clear some machine checks opt-ins
|
||||
*/
|
||||
void mcheck_cpu_clear(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mca_cfg.disabled)
|
||||
return;
|
||||
|
||||
if (!mce_available(c))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Possibly to clear general settings generic to x86
|
||||
* __mcheck_cpu_clear_generic(c);
|
||||
*/
|
||||
__mcheck_cpu_clear_vendor(c);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1850,7 +1820,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
|
|||
goto out;
|
||||
}
|
||||
|
||||
next = rcu_dereference_check_mce(mcelog.next);
|
||||
next = mce_log_get_idx_check(mcelog.next);
|
||||
|
||||
/* Only supports full reads right now */
|
||||
err = -EINVAL;
|
||||
|
@ -2056,8 +2026,12 @@ __setup("mce", mcheck_enable);
|
|||
int __init mcheck_init(void)
|
||||
{
|
||||
mcheck_intel_therm_init();
|
||||
mce_register_decode_chain(&mce_srao_nb);
|
||||
mcheck_vendor_init_severity();
|
||||
|
||||
INIT_WORK(&mce_work, mce_process_work);
|
||||
init_irq_work(&mce_irq_work, mce_irq_work_cb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2591,5 +2565,20 @@ static int __init mcheck_debugfs_init(void)
|
|||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(mcheck_debugfs_init);
|
||||
#else
|
||||
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
|
||||
#endif
|
||||
|
||||
static int __init mcheck_late_init(void)
|
||||
{
|
||||
mcheck_debugfs_init();
|
||||
|
||||
/*
|
||||
* Flush out everything that has been logged during early boot, now that
|
||||
* everything has been initialized (workqueues, decoders, ...).
|
||||
*/
|
||||
mce_schedule_work();
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(mcheck_late_init);
|
||||
|
|
|
@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
|
|||
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
|
||||
}
|
||||
|
||||
static void cmci_toggle_interrupt_mode(bool on)
|
||||
{
|
||||
unsigned long flags, *owned;
|
||||
int bank;
|
||||
u64 val;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
owned = this_cpu_ptr(mce_banks_owned);
|
||||
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
|
||||
if (on)
|
||||
val |= MCI_CTL2_CMCI_EN;
|
||||
else
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
unsigned long cmci_intel_adjust_timer(unsigned long interval)
|
||||
{
|
||||
if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
|
||||
|
@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
|
|||
*/
|
||||
if (!atomic_read(&cmci_storm_on_cpus)) {
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
|
||||
cmci_reenable();
|
||||
cmci_toggle_interrupt_mode(true);
|
||||
cmci_recheck();
|
||||
}
|
||||
return CMCI_POLL_INTERVAL;
|
||||
|
@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
|
|||
}
|
||||
}
|
||||
|
||||
static void cmci_storm_disable_banks(void)
|
||||
{
|
||||
unsigned long flags, *owned;
|
||||
int bank;
|
||||
u64 val;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
owned = this_cpu_ptr(mce_banks_owned);
|
||||
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static bool cmci_storm_detect(void)
|
||||
{
|
||||
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
||||
|
@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
|
|||
if (cnt <= CMCI_STORM_THRESHOLD)
|
||||
return false;
|
||||
|
||||
cmci_storm_disable_banks();
|
||||
cmci_toggle_interrupt_mode(false);
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
||||
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
||||
mce_timer_kick(CMCI_STORM_INTERVAL);
|
||||
|
@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void)
|
|||
return;
|
||||
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -435,7 +439,7 @@ static void intel_init_cmci(void)
|
|||
cmci_recheck();
|
||||
}
|
||||
|
||||
void intel_init_lmce(void)
|
||||
static void intel_init_lmce(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
|
@ -448,9 +452,26 @@ void intel_init_lmce(void)
|
|||
wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
|
||||
}
|
||||
|
||||
static void intel_clear_lmce(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (!lmce_supported())
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
|
||||
val &= ~MCG_EXT_CTL_LMCE_EN;
|
||||
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
|
||||
}
|
||||
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_init_thermal(c);
|
||||
intel_init_cmci();
|
||||
intel_init_lmce();
|
||||
}
|
||||
|
||||
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_clear_lmce();
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <asm/debugreg.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
|
@ -319,6 +320,7 @@ void stop_this_cpu(void *dummy)
|
|||
*/
|
||||
set_cpu_online(smp_processor_id(), false);
|
||||
disable_local_APIC();
|
||||
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
|
||||
|
||||
for (;;)
|
||||
halt();
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <asm/proto.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
/*
|
||||
* Some notes on x86 processor bugs affecting SMP operation:
|
||||
|
@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait)
|
|||
finish:
|
||||
local_irq_save(flags);
|
||||
disable_local_APIC();
|
||||
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
|
11
arch/x86/ras/Kconfig
Normal file
11
arch/x86/ras/Kconfig
Normal file
|
@ -0,0 +1,11 @@
|
|||
config AMD_MCE_INJ
|
||||
tristate "Simple MCE injection interface for AMD processors"
|
||||
depends on RAS && EDAC_DECODE_MCE && DEBUG_FS
|
||||
default n
|
||||
help
|
||||
This is a simple debugfs interface to inject MCEs and test different
|
||||
aspects of the MCE handling code.
|
||||
|
||||
WARNING: Do not even assume this interface is staying stable!
|
||||
|
||||
|
2
arch/x86/ras/Makefile
Normal file
2
arch/x86/ras/Makefile
Normal file
|
@ -0,0 +1,2 @@
|
|||
obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
* This file may be distributed under the terms of the GNU General Public
|
||||
* License version 2.
|
||||
*
|
||||
* Copyright (c) 2010-14: Borislav Petkov <bp@alien8.de>
|
||||
* Copyright (c) 2010-15: Borislav Petkov <bp@alien8.de>
|
||||
* Advanced Micro Devices Inc.
|
||||
*/
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce_amd.h"
|
||||
#include "../kernel/cpu/mcheck/mce-internal.h"
|
||||
|
||||
/*
|
||||
* Collect all the MCi_XXX settings
|
||||
|
@ -195,7 +195,7 @@ static void do_inject(void)
|
|||
i_mce.status |= MCI_STATUS_MISCV;
|
||||
|
||||
if (inj_type == SW_INJ) {
|
||||
amd_decode_mce(NULL, 0, &i_mce);
|
||||
mce_inject_log(&i_mce);
|
||||
return;
|
||||
}
|
||||
|
|
@ -61,16 +61,6 @@ config EDAC_DECODE_MCE
|
|||
which occur really early upon boot, before the module infrastructure
|
||||
has been initialized.
|
||||
|
||||
config EDAC_MCE_INJ
|
||||
tristate "Simple MCE injection interface"
|
||||
depends on EDAC_DECODE_MCE && DEBUG_FS
|
||||
default n
|
||||
help
|
||||
This is a simple debugfs interface to inject MCEs and test different
|
||||
aspects of the MCE handling code.
|
||||
|
||||
WARNING: Do not even assume this interface is staying stable!
|
||||
|
||||
config EDAC_MM_EDAC
|
||||
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
|
||||
select RAS
|
||||
|
|
|
@ -17,7 +17,6 @@ edac_core-y += edac_pci.o edac_pci_sysfs.o
|
|||
endif
|
||||
|
||||
obj-$(CONFIG_EDAC_GHES) += ghes_edac.o
|
||||
obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
|
||||
|
||||
edac_mce_amd-y := mce_amd.o
|
||||
obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
|
||||
|
|
|
@ -1,2 +1,35 @@
|
|||
config RAS
|
||||
bool
|
||||
menuconfig RAS
|
||||
bool "Reliability, Availability and Serviceability (RAS) features"
|
||||
help
|
||||
Reliability, availability and serviceability (RAS) is a computer
|
||||
hardware engineering term. Computers designed with higher levels
|
||||
of RAS have a multitude of features that protect data integrity
|
||||
and help them stay available for long periods of time without
|
||||
failure.
|
||||
|
||||
Reliability can be defined as the probability that the system will
|
||||
produce correct outputs up to some given time. Reliability is
|
||||
enhanced by features that help to avoid, detect and repair hardware
|
||||
faults.
|
||||
|
||||
Availability is the probability a system is operational at a given
|
||||
time, i.e. the amount of time a device is actually operating as the
|
||||
percentage of total time it should be operating.
|
||||
|
||||
Serviceability or maintainability is the simplicity and speed with
|
||||
which a system can be repaired or maintained; if the time to repair
|
||||
a failed system increases, then availability will decrease.
|
||||
|
||||
Note that Reliability and Availability are distinct concepts:
|
||||
Reliability is a measure of the ability of a system to function
|
||||
correctly, including avoiding data corruption, whereas Availability
|
||||
measures how often it is available for use, even though it may not
|
||||
be functioning correctly. For example, a server may run forever and
|
||||
so have ideal availability, but may be unreliable, with frequent
|
||||
data corruption.
|
||||
|
||||
if RAS
|
||||
|
||||
source arch/x86/ras/Kconfig
|
||||
|
||||
endif
|
||||
|
|
Loading…
Reference in a new issue