Merge branch 'apei-release' into release
This commit is contained in:
commit
25076246e8
11 changed files with 420 additions and 151 deletions
|
@ -92,6 +92,11 @@ vendor_id: <integer>, device_id: <integer>
|
|||
class_code: <integer>]
|
||||
[serial number: <integer>, <integer>]
|
||||
[bridge: secondary_status: <integer>, control: <integer>]
|
||||
[aer_status: <integer>, aer_mask: <integer>
|
||||
<aer status string>
|
||||
[aer_uncor_severity: <integer>]
|
||||
aer_layer=<aer layer string>, aer_agent=<aer agent string>
|
||||
aer_tlp_header: <integer> <integer> <integer> <integer>]
|
||||
|
||||
<pcie port type string>* := PCIe end point | legacy PCI end point | \
|
||||
unknown | unknown | root port | upstream switch port | \
|
||||
|
@ -99,6 +104,26 @@ downstream switch port | PCIe to PCI/PCI-X bridge | \
|
|||
PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
|
||||
root complex event collector
|
||||
|
||||
if section severity is fatal or recoverable
|
||||
<aer status string># :=
|
||||
unknown | unknown | unknown | unknown | Data Link Protocol | \
|
||||
unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
|
||||
Poisoned TLP | Flow Control Protocol | Completion Timeout | \
|
||||
Completer Abort | Unexpected Completion | Receiver Overflow | \
|
||||
Malformed TLP | ECRC | Unsupported Request
|
||||
else
|
||||
<aer status string># :=
|
||||
Receiver Error | unknown | unknown | unknown | unknown | unknown | \
|
||||
Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
|
||||
Replay Timer Timeout | Advisory Non-Fatal
|
||||
fi
|
||||
|
||||
<aer layer string> :=
|
||||
Physical Layer | Data Link Layer | Transaction Layer
|
||||
|
||||
<aer agent string> :=
|
||||
Receiver ID | Requester ID | Completer ID | Transmitter ID
|
||||
|
||||
Where, [] designate corresponding content is optional
|
||||
|
||||
All <field string> description with * has the following format:
|
||||
|
|
|
@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
|
|||
ssize_t apei_read_mce(struct mce *m, u64 *record_id)
|
||||
{
|
||||
struct cper_mce_record rcd;
|
||||
ssize_t len;
|
||||
|
||||
len = erst_read_next(&rcd.hdr, sizeof(rcd));
|
||||
if (len <= 0)
|
||||
return len;
|
||||
/* Can not skip other records in storage via ERST unless clear them */
|
||||
else if (len != sizeof(rcd) ||
|
||||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
|
||||
if (printk_ratelimit())
|
||||
pr_warning(
|
||||
"MCE-APEI: Can not skip the unknown record in ERST");
|
||||
return -EIO;
|
||||
}
|
||||
int rc, pos;
|
||||
|
||||
rc = erst_get_record_id_begin(&pos);
|
||||
if (rc)
|
||||
return rc;
|
||||
retry:
|
||||
rc = erst_get_record_id_next(&pos, record_id);
|
||||
if (rc)
|
||||
goto out;
|
||||
/* no more record */
|
||||
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
|
||||
goto out;
|
||||
rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
|
||||
/* someone else has cleared the record, try next one */
|
||||
if (rc == -ENOENT)
|
||||
goto retry;
|
||||
else if (rc < 0)
|
||||
goto out;
|
||||
/* try to skip other type records in storage */
|
||||
else if (rc != sizeof(rcd) ||
|
||||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
|
||||
goto retry;
|
||||
memcpy(m, &rcd.mce, sizeof(*m));
|
||||
*record_id = rcd.hdr.record_id;
|
||||
rc = sizeof(*m);
|
||||
out:
|
||||
erst_get_record_id_end();
|
||||
|
||||
return sizeof(*m);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Check whether there is record in ERST */
|
||||
|
|
|
@ -21,6 +21,13 @@ config ACPI_APEI_GHES
|
|||
by firmware to produce more valuable hardware error
|
||||
information for Linux.
|
||||
|
||||
config ACPI_APEI_PCIEAER
|
||||
bool "APEI PCIe AER logging/recovering support"
|
||||
depends on ACPI_APEI && PCIEAER
|
||||
help
|
||||
PCIe AER errors may be reported via APEI firmware first mode.
|
||||
Turn on this option to enable the corresponding support.
|
||||
|
||||
config ACPI_APEI_EINJ
|
||||
tristate "APEI Error INJection (EINJ)"
|
||||
depends on ACPI_APEI && DEBUG_FS
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <linux/time.h>
|
||||
#include <linux/cper.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/aer.h>
|
||||
|
||||
/*
|
||||
* CPER record ID need to be unique even after reboot, because record
|
||||
|
@ -70,8 +71,8 @@ static const char *cper_severity_str(unsigned int severity)
|
|||
* If the output length is longer than 80, multiple line will be
|
||||
* printed, with @pfx is printed at the beginning of each line.
|
||||
*/
|
||||
static void cper_print_bits(const char *pfx, unsigned int bits,
|
||||
const char *strs[], unsigned int strs_size)
|
||||
void cper_print_bits(const char *pfx, unsigned int bits,
|
||||
const char *strs[], unsigned int strs_size)
|
||||
{
|
||||
int i, len = 0;
|
||||
const char *str;
|
||||
|
@ -81,6 +82,8 @@ static void cper_print_bits(const char *pfx, unsigned int bits,
|
|||
if (!(bits & (1U << i)))
|
||||
continue;
|
||||
str = strs[i];
|
||||
if (!str)
|
||||
continue;
|
||||
if (len && len + strlen(str) + 2 > 80) {
|
||||
printk("%s\n", buf);
|
||||
len = 0;
|
||||
|
@ -243,7 +246,8 @@ static const char *cper_pcie_port_type_strs[] = {
|
|||
"root complex event collector",
|
||||
};
|
||||
|
||||
static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
|
||||
static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
|
||||
const struct acpi_hest_generic_data *gdata)
|
||||
{
|
||||
if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
|
||||
printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
|
||||
|
@ -276,6 +280,12 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
|
|||
printk(
|
||||
"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
|
||||
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
|
||||
#ifdef CONFIG_ACPI_APEI_PCIEAER
|
||||
if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
|
||||
struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
|
||||
cper_print_aer(pfx, gdata->error_severity, aer_regs);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static const char *apei_estatus_section_flag_strs[] = {
|
||||
|
@ -322,7 +332,7 @@ static void apei_estatus_print_section(
|
|||
struct cper_sec_pcie *pcie = (void *)(gdata + 1);
|
||||
printk("%s""section_type: PCIe error\n", pfx);
|
||||
if (gdata->error_data_length >= sizeof(*pcie))
|
||||
cper_print_pcie(pfx, pcie);
|
||||
cper_print_pcie(pfx, pcie, gdata);
|
||||
else
|
||||
goto err_section_too_small;
|
||||
} else
|
||||
|
|
|
@ -43,12 +43,27 @@ static DEFINE_MUTEX(erst_dbg_mutex);
|
|||
|
||||
static int erst_dbg_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int rc, *pos;
|
||||
|
||||
if (erst_disable)
|
||||
return -ENODEV;
|
||||
|
||||
pos = (int *)&file->private_data;
|
||||
|
||||
rc = erst_get_record_id_begin(pos);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return nonseekable_open(inode, file);
|
||||
}
|
||||
|
||||
static int erst_dbg_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
erst_get_record_id_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
int rc;
|
||||
|
@ -79,18 +94,20 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
|
|||
static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
int rc;
|
||||
int rc, *pos;
|
||||
ssize_t len = 0;
|
||||
u64 id;
|
||||
|
||||
if (*off != 0)
|
||||
if (*off)
|
||||
return -EINVAL;
|
||||
|
||||
if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
|
||||
return -EINTR;
|
||||
|
||||
pos = (int *)&filp->private_data;
|
||||
|
||||
retry_next:
|
||||
rc = erst_get_next_record_id(&id);
|
||||
rc = erst_get_record_id_next(pos, &id);
|
||||
if (rc)
|
||||
goto out;
|
||||
/* no more record */
|
||||
|
@ -181,6 +198,7 @@ static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
|
|||
static const struct file_operations erst_dbg_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = erst_dbg_open,
|
||||
.release = erst_dbg_release,
|
||||
.read = erst_dbg_read,
|
||||
.write = erst_dbg_write,
|
||||
.unlocked_ioctl = erst_dbg_ioctl,
|
||||
|
|
|
@ -429,6 +429,22 @@ ssize_t erst_get_record_count(void)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(erst_get_record_count);
|
||||
|
||||
#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
|
||||
#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
|
||||
|
||||
struct erst_record_id_cache {
|
||||
struct mutex lock;
|
||||
u64 *entries;
|
||||
int len;
|
||||
int size;
|
||||
int refcount;
|
||||
};
|
||||
|
||||
static struct erst_record_id_cache erst_record_id_cache = {
|
||||
.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
|
||||
.refcount = 0,
|
||||
};
|
||||
|
||||
static int __erst_get_next_record_id(u64 *record_id)
|
||||
{
|
||||
struct apei_exec_context ctx;
|
||||
|
@ -443,26 +459,179 @@ static int __erst_get_next_record_id(u64 *record_id)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int erst_get_record_id_begin(int *pos)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (erst_disable)
|
||||
return -ENODEV;
|
||||
|
||||
rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
|
||||
if (rc)
|
||||
return rc;
|
||||
erst_record_id_cache.refcount++;
|
||||
mutex_unlock(&erst_record_id_cache.lock);
|
||||
|
||||
*pos = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
|
||||
|
||||
/* erst_record_id_cache.lock must be held by caller */
|
||||
static int __erst_record_id_cache_add_one(void)
|
||||
{
|
||||
u64 id, prev_id, first_id;
|
||||
int i, rc;
|
||||
u64 *entries;
|
||||
unsigned long flags;
|
||||
|
||||
id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
|
||||
retry:
|
||||
raw_spin_lock_irqsave(&erst_lock, flags);
|
||||
rc = __erst_get_next_record_id(&id);
|
||||
raw_spin_unlock_irqrestore(&erst_lock, flags);
|
||||
if (rc == -ENOENT)
|
||||
return 0;
|
||||
if (rc)
|
||||
return rc;
|
||||
if (id == APEI_ERST_INVALID_RECORD_ID)
|
||||
return 0;
|
||||
/* can not skip current ID, or loop back to first ID */
|
||||
if (id == prev_id || id == first_id)
|
||||
return 0;
|
||||
if (first_id == APEI_ERST_INVALID_RECORD_ID)
|
||||
first_id = id;
|
||||
prev_id = id;
|
||||
|
||||
entries = erst_record_id_cache.entries;
|
||||
for (i = 0; i < erst_record_id_cache.len; i++) {
|
||||
if (entries[i] == id)
|
||||
break;
|
||||
}
|
||||
/* record id already in cache, try next */
|
||||
if (i < erst_record_id_cache.len)
|
||||
goto retry;
|
||||
if (erst_record_id_cache.len >= erst_record_id_cache.size) {
|
||||
int new_size, alloc_size;
|
||||
u64 *new_entries;
|
||||
|
||||
new_size = erst_record_id_cache.size * 2;
|
||||
new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
|
||||
ERST_RECORD_ID_CACHE_SIZE_MAX);
|
||||
if (new_size <= erst_record_id_cache.size) {
|
||||
if (printk_ratelimit())
|
||||
pr_warning(FW_WARN ERST_PFX
|
||||
"too many record ID!\n");
|
||||
return 0;
|
||||
}
|
||||
alloc_size = new_size * sizeof(entries[0]);
|
||||
if (alloc_size < PAGE_SIZE)
|
||||
new_entries = kmalloc(alloc_size, GFP_KERNEL);
|
||||
else
|
||||
new_entries = vmalloc(alloc_size);
|
||||
if (!new_entries)
|
||||
return -ENOMEM;
|
||||
memcpy(new_entries, entries,
|
||||
erst_record_id_cache.len * sizeof(entries[0]));
|
||||
if (erst_record_id_cache.size < PAGE_SIZE)
|
||||
kfree(entries);
|
||||
else
|
||||
vfree(entries);
|
||||
erst_record_id_cache.entries = entries = new_entries;
|
||||
erst_record_id_cache.size = new_size;
|
||||
}
|
||||
entries[i] = id;
|
||||
erst_record_id_cache.len++;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the record ID of an existing error record on the persistent
|
||||
* storage. If there is no error record on the persistent storage, the
|
||||
* returned record_id is APEI_ERST_INVALID_RECORD_ID.
|
||||
*/
|
||||
int erst_get_next_record_id(u64 *record_id)
|
||||
int erst_get_record_id_next(int *pos, u64 *record_id)
|
||||
{
|
||||
int rc;
|
||||
unsigned long flags;
|
||||
int rc = 0;
|
||||
u64 *entries;
|
||||
|
||||
if (erst_disable)
|
||||
return -ENODEV;
|
||||
|
||||
raw_spin_lock_irqsave(&erst_lock, flags);
|
||||
rc = __erst_get_next_record_id(record_id);
|
||||
raw_spin_unlock_irqrestore(&erst_lock, flags);
|
||||
/* must be enclosed by erst_get_record_id_begin/end */
|
||||
BUG_ON(!erst_record_id_cache.refcount);
|
||||
BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
|
||||
|
||||
mutex_lock(&erst_record_id_cache.lock);
|
||||
entries = erst_record_id_cache.entries;
|
||||
for (; *pos < erst_record_id_cache.len; (*pos)++)
|
||||
if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
|
||||
break;
|
||||
/* found next record id in cache */
|
||||
if (*pos < erst_record_id_cache.len) {
|
||||
*record_id = entries[*pos];
|
||||
(*pos)++;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Try to add one more record ID to cache */
|
||||
rc = __erst_record_id_cache_add_one();
|
||||
if (rc < 0)
|
||||
goto out_unlock;
|
||||
/* successfully add one new ID */
|
||||
if (rc == 1) {
|
||||
*record_id = erst_record_id_cache.entries[*pos];
|
||||
(*pos)++;
|
||||
rc = 0;
|
||||
} else {
|
||||
*pos = -1;
|
||||
*record_id = APEI_ERST_INVALID_RECORD_ID;
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&erst_record_id_cache.lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(erst_get_next_record_id);
|
||||
EXPORT_SYMBOL_GPL(erst_get_record_id_next);
|
||||
|
||||
/* erst_record_id_cache.lock must be held by caller */
|
||||
static void __erst_record_id_cache_compact(void)
|
||||
{
|
||||
int i, wpos = 0;
|
||||
u64 *entries;
|
||||
|
||||
if (erst_record_id_cache.refcount)
|
||||
return;
|
||||
|
||||
entries = erst_record_id_cache.entries;
|
||||
for (i = 0; i < erst_record_id_cache.len; i++) {
|
||||
if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
|
||||
continue;
|
||||
if (wpos != i)
|
||||
memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
|
||||
wpos++;
|
||||
}
|
||||
erst_record_id_cache.len = wpos;
|
||||
}
|
||||
|
||||
void erst_get_record_id_end(void)
|
||||
{
|
||||
/*
|
||||
* erst_disable != 0 should be detected by invoker via the
|
||||
* return value of erst_get_record_id_begin/next, so this
|
||||
* function should not be called for erst_disable != 0.
|
||||
*/
|
||||
BUG_ON(erst_disable);
|
||||
|
||||
mutex_lock(&erst_record_id_cache.lock);
|
||||
erst_record_id_cache.refcount--;
|
||||
BUG_ON(erst_record_id_cache.refcount < 0);
|
||||
__erst_record_id_cache_compact();
|
||||
mutex_unlock(&erst_record_id_cache.lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(erst_get_record_id_end);
|
||||
|
||||
static int __erst_write_to_storage(u64 offset)
|
||||
{
|
||||
|
@ -703,56 +872,34 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(erst_read);
|
||||
|
||||
/*
|
||||
* If return value > buflen, the buffer size is not big enough,
|
||||
* else if return value = 0, there is no more record to read,
|
||||
* else if return value < 0, something goes wrong,
|
||||
* else everything is OK, and return value is record length
|
||||
*/
|
||||
ssize_t erst_read_next(struct cper_record_header *record, size_t buflen)
|
||||
{
|
||||
int rc;
|
||||
ssize_t len;
|
||||
unsigned long flags;
|
||||
u64 record_id;
|
||||
|
||||
if (erst_disable)
|
||||
return -ENODEV;
|
||||
|
||||
raw_spin_lock_irqsave(&erst_lock, flags);
|
||||
rc = __erst_get_next_record_id(&record_id);
|
||||
if (rc) {
|
||||
raw_spin_unlock_irqrestore(&erst_lock, flags);
|
||||
return rc;
|
||||
}
|
||||
/* no more record */
|
||||
if (record_id == APEI_ERST_INVALID_RECORD_ID) {
|
||||
raw_spin_unlock_irqrestore(&erst_lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
len = __erst_read(record_id, record, buflen);
|
||||
raw_spin_unlock_irqrestore(&erst_lock, flags);
|
||||
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(erst_read_next);
|
||||
|
||||
int erst_clear(u64 record_id)
|
||||
{
|
||||
int rc;
|
||||
int rc, i;
|
||||
unsigned long flags;
|
||||
u64 *entries;
|
||||
|
||||
if (erst_disable)
|
||||
return -ENODEV;
|
||||
|
||||
rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
|
||||
if (rc)
|
||||
return rc;
|
||||
raw_spin_lock_irqsave(&erst_lock, flags);
|
||||
if (erst_erange.attr & ERST_RANGE_NVRAM)
|
||||
rc = __erst_clear_from_nvram(record_id);
|
||||
else
|
||||
rc = __erst_clear_from_storage(record_id);
|
||||
raw_spin_unlock_irqrestore(&erst_lock, flags);
|
||||
|
||||
if (rc)
|
||||
goto out;
|
||||
entries = erst_record_id_cache.entries;
|
||||
for (i = 0; i < erst_record_id_cache.len; i++) {
|
||||
if (entries[i] == record_id)
|
||||
entries[i] = APEI_ERST_INVALID_RECORD_ID;
|
||||
}
|
||||
__erst_record_id_cache_compact();
|
||||
out:
|
||||
mutex_unlock(&erst_record_id_cache.lock);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(erst_clear);
|
||||
|
|
|
@ -35,13 +35,6 @@
|
|||
PCI_ERR_UNC_UNX_COMP| \
|
||||
PCI_ERR_UNC_MALF_TLP)
|
||||
|
||||
struct header_log_regs {
|
||||
unsigned int dw0;
|
||||
unsigned int dw1;
|
||||
unsigned int dw2;
|
||||
unsigned int dw3;
|
||||
};
|
||||
|
||||
#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */
|
||||
struct aer_err_info {
|
||||
struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
|
||||
|
@ -59,7 +52,7 @@ struct aer_err_info {
|
|||
|
||||
unsigned int status; /* COR/UNCOR Error Status */
|
||||
unsigned int mask; /* COR/UNCOR Error Mask */
|
||||
struct header_log_regs tlp; /* TLP Header */
|
||||
struct aer_header_log_regs tlp; /* TLP Header */
|
||||
};
|
||||
|
||||
struct aer_err_source {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <linux/errno.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/cper.h>
|
||||
|
||||
#include "aerdrv.h"
|
||||
|
||||
|
@ -57,86 +58,44 @@
|
|||
(e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
|
||||
AER_TRANSACTION_LAYER_ERROR)
|
||||
|
||||
#define AER_PR(info, pdev, fmt, args...) \
|
||||
printk("%s%s %s: " fmt, (info->severity == AER_CORRECTABLE) ? \
|
||||
KERN_WARNING : KERN_ERR, dev_driver_string(&pdev->dev), \
|
||||
dev_name(&pdev->dev), ## args)
|
||||
|
||||
/*
|
||||
* AER error strings
|
||||
*/
|
||||
static char *aer_error_severity_string[] = {
|
||||
static const char *aer_error_severity_string[] = {
|
||||
"Uncorrected (Non-Fatal)",
|
||||
"Uncorrected (Fatal)",
|
||||
"Corrected"
|
||||
};
|
||||
|
||||
static char *aer_error_layer[] = {
|
||||
static const char *aer_error_layer[] = {
|
||||
"Physical Layer",
|
||||
"Data Link Layer",
|
||||
"Transaction Layer"
|
||||
};
|
||||
static char *aer_correctable_error_string[] = {
|
||||
"Receiver Error ", /* Bit Position 0 */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Bad TLP ", /* Bit Position 6 */
|
||||
"Bad DLLP ", /* Bit Position 7 */
|
||||
"RELAY_NUM Rollover ", /* Bit Position 8 */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Replay Timer Timeout ", /* Bit Position 12 */
|
||||
"Advisory Non-Fatal ", /* Bit Position 13 */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
static const char *aer_correctable_error_string[] = {
|
||||
"Receiver Error", /* Bit Position 0 */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Bad TLP", /* Bit Position 6 */
|
||||
"Bad DLLP", /* Bit Position 7 */
|
||||
"RELAY_NUM Rollover", /* Bit Position 8 */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Replay Timer Timeout", /* Bit Position 12 */
|
||||
"Advisory Non-Fatal", /* Bit Position 13 */
|
||||
};
|
||||
|
||||
static char *aer_uncorrectable_error_string[] = {
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Data Link Protocol ", /* Bit Position 4 */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Poisoned TLP ", /* Bit Position 12 */
|
||||
"Flow Control Protocol ", /* Bit Position 13 */
|
||||
"Completion Timeout ", /* Bit Position 14 */
|
||||
"Completer Abort ", /* Bit Position 15 */
|
||||
"Unexpected Completion ", /* Bit Position 16 */
|
||||
"Receiver Overflow ", /* Bit Position 17 */
|
||||
"Malformed TLP ", /* Bit Position 18 */
|
||||
"ECRC ", /* Bit Position 19 */
|
||||
"Unsupported Request ", /* Bit Position 20 */
|
||||
static const char *aer_uncorrectable_error_string[] = {
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Data Link Protocol", /* Bit Position 4 */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
@ -144,19 +103,29 @@ static char *aer_uncorrectable_error_string[] = {
|
|||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"Poisoned TLP", /* Bit Position 12 */
|
||||
"Flow Control Protocol", /* Bit Position 13 */
|
||||
"Completion Timeout", /* Bit Position 14 */
|
||||
"Completer Abort", /* Bit Position 15 */
|
||||
"Unexpected Completion", /* Bit Position 16 */
|
||||
"Receiver Overflow", /* Bit Position 17 */
|
||||
"Malformed TLP", /* Bit Position 18 */
|
||||
"ECRC", /* Bit Position 19 */
|
||||
"Unsupported Request", /* Bit Position 20 */
|
||||
};
|
||||
|
||||
static char *aer_agent_string[] = {
|
||||
static const char *aer_agent_string[] = {
|
||||
"Receiver ID",
|
||||
"Requester ID",
|
||||
"Completer ID",
|
||||
"Transmitter ID"
|
||||
};
|
||||
|
||||
static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
|
||||
static void __aer_print_error(const char *prefix,
|
||||
struct aer_err_info *info)
|
||||
{
|
||||
int i, status;
|
||||
char *errmsg = NULL;
|
||||
const char *errmsg = NULL;
|
||||
|
||||
status = (info->status & ~info->mask);
|
||||
|
||||
|
@ -165,15 +134,17 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
|
|||
continue;
|
||||
|
||||
if (info->severity == AER_CORRECTABLE)
|
||||
errmsg = aer_correctable_error_string[i];
|
||||
errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
|
||||
aer_correctable_error_string[i] : NULL;
|
||||
else
|
||||
errmsg = aer_uncorrectable_error_string[i];
|
||||
errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
|
||||
aer_uncorrectable_error_string[i] : NULL;
|
||||
|
||||
if (errmsg)
|
||||
AER_PR(info, dev, " [%2d] %s%s\n", i, errmsg,
|
||||
printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg,
|
||||
info->first_error == i ? " (First)" : "");
|
||||
else
|
||||
AER_PR(info, dev, " [%2d] Unknown Error Bit%s\n", i,
|
||||
printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i,
|
||||
info->first_error == i ? " (First)" : "");
|
||||
}
|
||||
}
|
||||
|
@ -181,11 +152,15 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
|
|||
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
|
||||
{
|
||||
int id = ((dev->bus->number << 8) | dev->devfn);
|
||||
char prefix[44];
|
||||
|
||||
snprintf(prefix, sizeof(prefix), "%s%s %s: ",
|
||||
(info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
|
||||
dev_driver_string(&dev->dev), dev_name(&dev->dev));
|
||||
|
||||
if (info->status == 0) {
|
||||
AER_PR(info, dev,
|
||||
"PCIe Bus Error: severity=%s, type=Unaccessible, "
|
||||
"id=%04x(Unregistered Agent ID)\n",
|
||||
printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
|
||||
"id=%04x(Unregistered Agent ID)\n", prefix,
|
||||
aer_error_severity_string[info->severity], id);
|
||||
} else {
|
||||
int layer, agent;
|
||||
|
@ -193,23 +168,22 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
|
|||
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
|
||||
agent = AER_GET_AGENT(info->severity, info->status);
|
||||
|
||||
AER_PR(info, dev,
|
||||
"PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
|
||||
aer_error_severity_string[info->severity],
|
||||
printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
|
||||
prefix, aer_error_severity_string[info->severity],
|
||||
aer_error_layer[layer], id, aer_agent_string[agent]);
|
||||
|
||||
AER_PR(info, dev,
|
||||
" device [%04x:%04x] error status/mask=%08x/%08x\n",
|
||||
dev->vendor, dev->device, info->status, info->mask);
|
||||
printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n",
|
||||
prefix, dev->vendor, dev->device,
|
||||
info->status, info->mask);
|
||||
|
||||
__aer_print_error(info, dev);
|
||||
__aer_print_error(prefix, info);
|
||||
|
||||
if (info->tlp_header_valid) {
|
||||
unsigned char *tlp = (unsigned char *) &info->tlp;
|
||||
AER_PR(info, dev, " TLP Header:"
|
||||
printk("%s"" TLP Header:"
|
||||
" %02x%02x%02x%02x %02x%02x%02x%02x"
|
||||
" %02x%02x%02x%02x %02x%02x%02x%02x\n",
|
||||
*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
|
||||
prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
|
||||
*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
|
||||
*(tlp + 11), *(tlp + 10), *(tlp + 9),
|
||||
*(tlp + 8), *(tlp + 15), *(tlp + 14),
|
||||
|
@ -218,8 +192,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
|
|||
}
|
||||
|
||||
if (info->id && info->error_dev_num > 1 && info->id == id)
|
||||
AER_PR(info, dev,
|
||||
" Error of this Agent(%04x) is reported first\n", id);
|
||||
printk("%s"" Error of this Agent(%04x) is reported first\n",
|
||||
prefix, id);
|
||||
}
|
||||
|
||||
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
|
||||
|
@ -228,3 +202,61 @@ void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
|
|||
info->multi_error_valid ? "Multiple " : "",
|
||||
aer_error_severity_string[info->severity], info->id);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI_PCIEAER
|
||||
static int cper_severity_to_aer(int cper_severity)
|
||||
{
|
||||
switch (cper_severity) {
|
||||
case CPER_SEV_RECOVERABLE:
|
||||
return AER_NONFATAL;
|
||||
case CPER_SEV_FATAL:
|
||||
return AER_FATAL;
|
||||
default:
|
||||
return AER_CORRECTABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void cper_print_aer(const char *prefix, int cper_severity,
|
||||
struct aer_capability_regs *aer)
|
||||
{
|
||||
int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
|
||||
u32 status, mask;
|
||||
const char **status_strs;
|
||||
|
||||
aer_severity = cper_severity_to_aer(cper_severity);
|
||||
if (aer_severity == AER_CORRECTABLE) {
|
||||
status = aer->cor_status;
|
||||
mask = aer->cor_mask;
|
||||
status_strs = aer_correctable_error_string;
|
||||
status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
|
||||
} else {
|
||||
status = aer->uncor_status;
|
||||
mask = aer->uncor_mask;
|
||||
status_strs = aer_uncorrectable_error_string;
|
||||
status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
|
||||
tlp_header_valid = status & AER_LOG_TLP_MASKS;
|
||||
}
|
||||
layer = AER_GET_LAYER_ERROR(aer_severity, status);
|
||||
agent = AER_GET_AGENT(aer_severity, status);
|
||||
printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
|
||||
prefix, status, mask);
|
||||
cper_print_bits(prefix, status, status_strs, status_strs_size);
|
||||
printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
|
||||
aer_error_layer[layer], aer_agent_string[agent]);
|
||||
if (aer_severity != AER_CORRECTABLE)
|
||||
printk("%s""aer_uncor_severity: 0x%08x\n",
|
||||
prefix, aer->uncor_severity);
|
||||
if (tlp_header_valid) {
|
||||
const unsigned char *tlp;
|
||||
tlp = (const unsigned char *)&aer->header_log;
|
||||
printk("%s""aer_tlp_header:"
|
||||
" %02x%02x%02x%02x %02x%02x%02x%02x"
|
||||
" %02x%02x%02x%02x %02x%02x%02x%02x\n",
|
||||
prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
|
||||
*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
|
||||
*(tlp + 11), *(tlp + 10), *(tlp + 9),
|
||||
*(tlp + 8), *(tlp + 15), *(tlp + 14),
|
||||
*(tlp + 13), *(tlp + 12));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -30,10 +30,11 @@ int apei_hest_parse(apei_hest_func_t func, void *data);
|
|||
|
||||
int erst_write(const struct cper_record_header *record);
|
||||
ssize_t erst_get_record_count(void);
|
||||
int erst_get_next_record_id(u64 *record_id);
|
||||
int erst_get_record_id_begin(int *pos);
|
||||
int erst_get_record_id_next(int *pos, u64 *record_id);
|
||||
void erst_get_record_id_end(void);
|
||||
ssize_t erst_read(u64 record_id, struct cper_record_header *record,
|
||||
size_t buflen);
|
||||
ssize_t erst_read_next(struct cper_record_header *record, size_t buflen);
|
||||
int erst_clear(u64 record_id);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -7,6 +7,28 @@
|
|||
#ifndef _AER_H_
|
||||
#define _AER_H_
|
||||
|
||||
struct aer_header_log_regs {
|
||||
unsigned int dw0;
|
||||
unsigned int dw1;
|
||||
unsigned int dw2;
|
||||
unsigned int dw3;
|
||||
};
|
||||
|
||||
struct aer_capability_regs {
|
||||
u32 header;
|
||||
u32 uncor_status;
|
||||
u32 uncor_mask;
|
||||
u32 uncor_severity;
|
||||
u32 cor_status;
|
||||
u32 cor_mask;
|
||||
u32 cap_control;
|
||||
struct aer_header_log_regs header_log;
|
||||
u32 root_command;
|
||||
u32 root_status;
|
||||
u16 cor_err_source;
|
||||
u16 uncor_err_source;
|
||||
};
|
||||
|
||||
#if defined(CONFIG_PCIEAER)
|
||||
/* pci-e port driver needs this function to enable aer */
|
||||
extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
|
||||
|
@ -27,5 +49,7 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
|
|||
}
|
||||
#endif
|
||||
|
||||
extern void cper_print_aer(const char *prefix, int cper_severity,
|
||||
struct aer_capability_regs *aer);
|
||||
#endif //_AER_H_
|
||||
|
||||
|
|
|
@ -388,5 +388,7 @@ struct cper_sec_pcie {
|
|||
#pragma pack()
|
||||
|
||||
u64 cper_next_record_id(void);
|
||||
void cper_print_bits(const char *prefix, unsigned int bits,
|
||||
const char *strs[], unsigned int strs_size);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue