Message ID | 20190913221344.13055-2-tony.luck@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | EDAC, skx: Provide more machine specific location detail | expand |
Em Fri, 13 Sep 2019 15:13:43 -0700 Tony Luck <tony.luck@intel.com> escreveu: > Simplifies the code a little. > > Signed-off-by: Tony Luck <tony.luck@intel.com> Patch itself looks good... > --- > drivers/edac/skx_common.c | 48 +++++++++++++++++++-------------------- > 1 file changed, 23 insertions(+), 25 deletions(-) > > diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c > index d8ff63d91b86..58b8348d0f71 100644 > --- a/drivers/edac/skx_common.c > +++ b/drivers/edac/skx_common.c > @@ -100,6 +100,7 @@ void __exit skx_adxl_put(void) > > static bool skx_adxl_decode(struct decoded_addr *res) > { > + struct skx_dev *d; > int i, len = 0; > > if (res->addr >= skx_tohm || (res->addr >= skx_tolm && > @@ -118,6 +119,24 @@ static bool skx_adxl_decode(struct decoded_addr *res) > res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; > res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; > > + if (res->imc > NUM_IMC - 1) { > + skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); I would report this via EDAC as well. > + return false; > + } > + > + list_for_each_entry(d, &dev_edac_list, list) { > + if (d->imc[0].src_id == res->socket) { > + res->dev = d; > + break; > + } > + } > + > + if (!res->dev) { > + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", > + res->socket, res->imc); I would report this via EDAC as well. > + return false; > + } > + > for (i = 0; i < adxl_component_count; i++) { > if (adxl_values[i] == ~0x0ull) > continue; > @@ -452,24 +471,6 @@ static void skx_unregister_mci(struct skx_imc *imc) > edac_mc_free(mci); > } > > -static struct mem_ctl_info *get_mci(int src_id, int lmc) > -{ > - struct skx_dev *d; > - > - if (lmc > NUM_IMC - 1) { > - skx_printk(KERN_ERR, "Bad lmc %d\n", lmc); > - return NULL; > - } > - > - list_for_each_entry(d, &dev_edac_list, list) { > - if (d->imc[0].src_id == src_id) > - return d->imc[lmc].mci; > - } > - > - skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc); > - return NULL; > -} > - > static void skx_mce_output_error(struct mem_ctl_info *mci, > const struct mce *m, > struct decoded_addr *res) > @@ -583,15 +584,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, > if (adxl_component_count) { > if (!skx_adxl_decode(&res)) > return NOTIFY_DONE; > - > - mci = get_mci(res.socket, res.imc); > - } else { > - if (!skx_decode || !skx_decode(&res)) > - return NOTIFY_DONE; > - > - mci = res.dev->imc[res.imc].mci; > + } else if (!skx_decode || !skx_decode(&res)) { > + return NOTIFY_DONE; > } > > + mci = res.dev->imc[res.imc].mci; > + > if (!mci) > return NOTIFY_DONE; > Thanks, Mauro
>> + if (res->imc > NUM_IMC - 1) { >> + skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); > > I would report this via EDAC as well. It would be nice, but I don't see how. This function is trying to figure out which memory controller (and thus which EDAC struct mem_ctl_info) is connected to this error. If it fails, then we don't know where to report it. On the plus side this error (and the other one you flagged) "can't happen"(TM) so we shouldn't expend too much effort to solve this. Code must give up here rather than trigger out of bounds array accesses later. If we did want to solve this, we could invent a mechanism for EDAC drivers to log errors not related to a particular memory controller (by passing NULL to edac_mc_handle_error()???). -Tony
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index d8ff63d91b86..58b8348d0f71 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -100,6 +100,7 @@ void __exit skx_adxl_put(void) static bool skx_adxl_decode(struct decoded_addr *res) { + struct skx_dev *d; int i, len = 0; if (res->addr >= skx_tohm || (res->addr >= skx_tolm && @@ -118,6 +119,24 @@ static bool skx_adxl_decode(struct decoded_addr *res) res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + if (res->imc > NUM_IMC - 1) { + skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); + return false; + } + + list_for_each_entry(d, &dev_edac_list, list) { + if (d->imc[0].src_id == res->socket) { + res->dev = d; + break; + } + } + + if (!res->dev) { + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", + res->socket, res->imc); + return false; + } + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -452,24 +471,6 @@ static void skx_unregister_mci(struct skx_imc *imc) edac_mc_free(mci); } -static struct mem_ctl_info *get_mci(int src_id, int lmc) -{ - struct skx_dev *d; - - if (lmc > NUM_IMC - 1) { - skx_printk(KERN_ERR, "Bad lmc %d\n", lmc); - return NULL; - } - - list_for_each_entry(d, &dev_edac_list, list) { - if (d->imc[0].src_id == src_id) - return d->imc[lmc].mci; - } - - skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc); - return NULL; -} - static void skx_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, struct decoded_addr *res) @@ -583,15 +584,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (adxl_component_count) { if (!skx_adxl_decode(&res)) return NOTIFY_DONE; - - mci = get_mci(res.socket, res.imc); - } else { - if (!skx_decode || !skx_decode(&res)) - return NOTIFY_DONE; - - mci = res.dev->imc[res.imc].mci; + } else if (!skx_decode || !skx_decode(&res)) { + return NOTIFY_DONE; } + mci = res.dev->imc[res.imc].mci; + if (!mci) return NOTIFY_DONE;
Simplifies the code a little. Signed-off-by: Tony Luck <tony.luck@intel.com> --- drivers/edac/skx_common.c | 48 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 25 deletions(-)