amd64_edac: Carve out ECC-related hw settings
This is in preparation for the init path reorganization where we want only to 1) test whether a particular node supports ECC 2) can it be enabled and only then do the necessary allocation/initialization. For that, we need to decouple the ECC settings of the node from the instance's descriptor. The should be no functional change introduced by this patch. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
This commit is contained in:
parent
f1db274e1b
commit
ae7bb7c679
2 changed files with 49 additions and 24 deletions
|
@ -18,6 +18,7 @@ static struct msr __percpu *msrs;
|
||||||
/* Per-node driver instances */
|
/* Per-node driver instances */
|
||||||
static struct mem_ctl_info **mcis;
|
static struct mem_ctl_info **mcis;
|
||||||
static struct amd64_pvt **pvts;
|
static struct amd64_pvt **pvts;
|
||||||
|
static struct ecc_settings **ecc_stngs;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and
|
* Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and
|
||||||
|
@ -2293,7 +2294,7 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
|
static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
|
||||||
{
|
{
|
||||||
cpumask_var_t cmask;
|
cpumask_var_t cmask;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
@ -2303,7 +2304,7 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id);
|
get_cpus_on_this_dct_cpumask(cmask, nid);
|
||||||
|
|
||||||
rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
|
rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
|
||||||
|
|
||||||
|
@ -2313,14 +2314,14 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
|
||||||
|
|
||||||
if (on) {
|
if (on) {
|
||||||
if (reg->l & K8_MSR_MCGCTL_NBE)
|
if (reg->l & K8_MSR_MCGCTL_NBE)
|
||||||
pvt->flags.nb_mce_enable = 1;
|
s->flags.nb_mce_enable = 1;
|
||||||
|
|
||||||
reg->l |= K8_MSR_MCGCTL_NBE;
|
reg->l |= K8_MSR_MCGCTL_NBE;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Turn off NB MCE reporting only when it was off before
|
* Turn off NB MCE reporting only when it was off before
|
||||||
*/
|
*/
|
||||||
if (!pvt->flags.nb_mce_enable)
|
if (!s->flags.nb_mce_enable)
|
||||||
reg->l &= ~K8_MSR_MCGCTL_NBE;
|
reg->l &= ~K8_MSR_MCGCTL_NBE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2334,18 +2335,20 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
|
||||||
static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
|
static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
|
||||||
{
|
{
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
|
u8 nid = pvt->mc_node_id;
|
||||||
|
struct ecc_settings *s = ecc_stngs[nid];
|
||||||
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
|
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
|
||||||
|
|
||||||
amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value);
|
amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value);
|
||||||
|
|
||||||
/* turn on UECCn and CECCEn bits */
|
/* turn on UECCEn and CECCEn bits */
|
||||||
pvt->old_nbctl = value & mask;
|
s->old_nbctl = value & mask;
|
||||||
pvt->nbctl_mcgctl_saved = 1;
|
s->nbctl_valid = true;
|
||||||
|
|
||||||
value |= mask;
|
value |= mask;
|
||||||
pci_write_config_dword(pvt->F3, K8_NBCTL, value);
|
pci_write_config_dword(pvt->F3, K8_NBCTL, value);
|
||||||
|
|
||||||
if (amd64_toggle_ecc_err_reporting(pvt, ON))
|
if (amd64_toggle_ecc_err_reporting(s, nid, ON))
|
||||||
amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
|
amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
|
||||||
|
|
||||||
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
|
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
|
||||||
|
@ -2357,7 +2360,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
|
||||||
if (!(value & K8_NBCFG_ECC_ENABLE)) {
|
if (!(value & K8_NBCFG_ECC_ENABLE)) {
|
||||||
amd64_warn("DRAM ECC disabled on this node, enabling...\n");
|
amd64_warn("DRAM ECC disabled on this node, enabling...\n");
|
||||||
|
|
||||||
pvt->flags.nb_ecc_prev = 0;
|
s->flags.nb_ecc_prev = 0;
|
||||||
|
|
||||||
/* Attempt to turn on DRAM ECC Enable */
|
/* Attempt to turn on DRAM ECC Enable */
|
||||||
value |= K8_NBCFG_ECC_ENABLE;
|
value |= K8_NBCFG_ECC_ENABLE;
|
||||||
|
@ -2372,7 +2375,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
|
||||||
amd64_info("Hardware accepted DRAM ECC Enable\n");
|
amd64_info("Hardware accepted DRAM ECC Enable\n");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pvt->flags.nb_ecc_prev = 1;
|
s->flags.nb_ecc_prev = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
|
debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
|
||||||
|
@ -2384,26 +2387,28 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
|
||||||
|
|
||||||
static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
|
static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
|
||||||
{
|
{
|
||||||
|
u8 nid = pvt->mc_node_id;
|
||||||
|
struct ecc_settings *s = ecc_stngs[nid];
|
||||||
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
|
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
|
||||||
|
|
||||||
if (!pvt->nbctl_mcgctl_saved)
|
if (!s->nbctl_valid)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value);
|
amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value);
|
||||||
value &= ~mask;
|
value &= ~mask;
|
||||||
value |= pvt->old_nbctl;
|
value |= s->old_nbctl;
|
||||||
|
|
||||||
pci_write_config_dword(pvt->F3, K8_NBCTL, value);
|
pci_write_config_dword(pvt->F3, K8_NBCTL, value);
|
||||||
|
|
||||||
/* restore previous BIOS DRAM ECC "off" setting which we force-enabled */
|
/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
|
||||||
if (!pvt->flags.nb_ecc_prev) {
|
if (!s->flags.nb_ecc_prev) {
|
||||||
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
|
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
|
||||||
value &= ~K8_NBCFG_ECC_ENABLE;
|
value &= ~K8_NBCFG_ECC_ENABLE;
|
||||||
pci_write_config_dword(pvt->F3, K8_NBCFG, value);
|
pci_write_config_dword(pvt->F3, K8_NBCFG, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* restore the NB Enable MCGCTL bit */
|
/* restore the NB Enable MCGCTL bit */
|
||||||
if (amd64_toggle_ecc_err_reporting(pvt, OFF))
|
if (amd64_toggle_ecc_err_reporting(s, nid, OFF))
|
||||||
amd64_warn("Error restoring NB MCGCTL settings!\n");
|
amd64_warn("Error restoring NB MCGCTL settings!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2654,6 +2659,8 @@ static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
|
||||||
const struct pci_device_id *mc_type)
|
const struct pci_device_id *mc_type)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
u8 nid = get_node_id(pdev);
|
||||||
|
struct ecc_settings *s;
|
||||||
|
|
||||||
ret = pci_enable_device(pdev);
|
ret = pci_enable_device(pdev);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
|
@ -2661,9 +2668,16 @@ static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = -ENOMEM;
|
||||||
|
s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
|
||||||
|
if (!s)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ecc_stngs[nid] = s;
|
||||||
|
|
||||||
ret = amd64_probe_one_instance(pdev);
|
ret = amd64_probe_one_instance(pdev);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
amd64_err("Error probing instance: %d\n", get_node_id(pdev));
|
amd64_err("Error probing instance: %d\n", nid);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -2688,6 +2702,9 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
|
||||||
amd_report_gart_errors(false);
|
amd_report_gart_errors(false);
|
||||||
amd_unregister_ecc_decoder(amd64_decode_bus_error);
|
amd_unregister_ecc_decoder(amd64_decode_bus_error);
|
||||||
|
|
||||||
|
kfree(ecc_stngs[pvt->mc_node_id]);
|
||||||
|
ecc_stngs[pvt->mc_node_id] = NULL;
|
||||||
|
|
||||||
/* Free the EDAC CORE resources */
|
/* Free the EDAC CORE resources */
|
||||||
mci->pvt_info = NULL;
|
mci->pvt_info = NULL;
|
||||||
mcis[pvt->mc_node_id] = NULL;
|
mcis[pvt->mc_node_id] = NULL;
|
||||||
|
@ -2769,7 +2786,8 @@ static int __init amd64_edac_init(void)
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
pvts = kzalloc(amd_nb_num() * sizeof(pvts[0]), GFP_KERNEL);
|
pvts = kzalloc(amd_nb_num() * sizeof(pvts[0]), GFP_KERNEL);
|
||||||
mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
|
mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
|
||||||
if (!(pvts && mcis))
|
ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
|
||||||
|
if (!(pvts && mcis && ecc_stngs))
|
||||||
goto err_ret;
|
goto err_ret;
|
||||||
|
|
||||||
msrs = msrs_alloc();
|
msrs = msrs_alloc();
|
||||||
|
@ -2820,6 +2838,9 @@ static void __exit amd64_edac_exit(void)
|
||||||
|
|
||||||
pci_unregister_driver(&amd64_pci_driver);
|
pci_unregister_driver(&amd64_pci_driver);
|
||||||
|
|
||||||
|
kfree(ecc_stngs);
|
||||||
|
ecc_stngs = NULL;
|
||||||
|
|
||||||
kfree(mcis);
|
kfree(mcis);
|
||||||
mcis = NULL;
|
mcis = NULL;
|
||||||
|
|
||||||
|
|
|
@ -461,17 +461,21 @@ struct amd64_pvt {
|
||||||
/* place to store error injection parameters prior to issue */
|
/* place to store error injection parameters prior to issue */
|
||||||
struct error_injection injection;
|
struct error_injection injection;
|
||||||
|
|
||||||
/* Save old hw registers' values before we modified them */
|
|
||||||
u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */
|
|
||||||
u32 old_nbctl;
|
|
||||||
|
|
||||||
/* DCT per-family scrubrate setting */
|
/* DCT per-family scrubrate setting */
|
||||||
u32 min_scrubrate;
|
u32 min_scrubrate;
|
||||||
|
|
||||||
/* family name this instance is running on */
|
/* family name this instance is running on */
|
||||||
const char *ctl_name;
|
const char *ctl_name;
|
||||||
|
|
||||||
/* misc settings */
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* per-node ECC settings descriptor
|
||||||
|
*/
|
||||||
|
struct ecc_settings {
|
||||||
|
u32 old_nbctl;
|
||||||
|
bool nbctl_valid;
|
||||||
|
|
||||||
struct flags {
|
struct flags {
|
||||||
unsigned long nb_mce_enable:1;
|
unsigned long nb_mce_enable:1;
|
||||||
unsigned long nb_ecc_prev:1;
|
unsigned long nb_ecc_prev:1;
|
||||||
|
|
Loading…
Reference in a new issue