Message ID | 20250319084050.366718-2-pandoh@google.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Rate limit AER logs | expand |
Hi, On 3/19/25 1:40 AM, Jon Pan-Doh wrote: > From: Karolina Stolarek <karolina.stolarek@oracle.com> > > When reporting an AER error, we check its type multiple times > to determine the log level for each message. Do this check only > in the top-level functions (aer_isr_one_error(), pci_print_aer()) and > propagate the result down the call chain. > > Signed-off-by: Karolina Stolarek <karolina.stolarek@oracle.com> > Signed-off-by: Jon Pan-Doh <pandoh@google.com> > Reviewed-by: Jon Pan-Doh <pandoh@google.com> > --- With my comments fixed, you can add Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > drivers/pci/pci.h | 2 +- > drivers/pci/pcie/aer.c | 37 ++++++++++++++++++++----------------- > drivers/pci/pcie/dpc.c | 2 +- > 3 files changed, 22 insertions(+), 19 deletions(-) > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index b8911d1e10dc..75985b96ecc1 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -551,7 +551,7 @@ struct aer_err_info { > }; > > int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); > -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); > +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info, const char *level); > > int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, > unsigned int tlp_len, bool flit, > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c > index 9cff7069577e..cc9c80cd88f3 100644 > --- a/drivers/pci/pcie/aer.c > +++ b/drivers/pci/pcie/aer.c > @@ -670,20 +670,18 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, > } > > static void __aer_print_error(struct pci_dev *dev, > - struct aer_err_info *info) > + struct aer_err_info *info, > + const char *level) > { > const char **strings; > unsigned long status = info->status & ~info->mask; > - const char *level, *errmsg; > + const char *errmsg; > int i; > > - if (info->severity == AER_CORRECTABLE) { > + if (info->severity == AER_CORRECTABLE) > strings = aer_correctable_error_string; > - level = KERN_WARNING; > - } else { > + else > strings = aer_uncorrectable_error_string; > - level = KERN_ERR; > - } > > for_each_set_bit(i, &status, 32) { > errmsg = strings[i]; > @@ -696,11 +694,11 @@ static void __aer_print_error(struct pci_dev *dev, > pci_dev_aer_stats_incr(dev, info); > } > > -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) > +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info, > + const char *level) > { > int layer, agent; > int id = pci_dev_id(dev); > - const char *level; > > if (!info->status) { > pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", > @@ -711,8 +709,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) > layer = AER_GET_LAYER_ERROR(info->severity, info->status); > agent = AER_GET_AGENT(info->severity, info->status); > > - level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; > - > aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", > aer_error_severity_string[info->severity], > aer_error_layer[layer], aer_agent_string[agent]); > @@ -720,7 +716,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) > aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", > dev->vendor, dev->device, info->status, info->mask); > > - __aer_print_error(dev, info); > + __aer_print_error(dev, info, level); > > if (info->tlp_header_valid) > pcie_print_tlp_log(dev, &info->tlp, dev_fmt(" ")); > @@ -765,15 +761,18 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, > { > int layer, agent, tlp_header_valid = 0; > u32 status, mask; > + const char *level; > struct aer_err_info info; > > if (aer_severity == AER_CORRECTABLE) { > status = aer->cor_status; > mask = aer->cor_mask; > + level = KERN_WARNING; > } else { > status = aer->uncor_status; > mask = aer->uncor_mask; > tlp_header_valid = status & AER_LOG_TLP_MASKS; > + level = KERN_ERR; > } > > layer = AER_GET_LAYER_ERROR(aer_severity, status); > @@ -786,7 +785,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, > info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); > > pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); > - __aer_print_error(dev, &info); > + __aer_print_error(dev, &info, level); > pci_err(dev, "aer_layer=%s, aer_agent=%s\n", > aer_error_layer[layer], aer_agent_string[agent]); > > @@ -1257,14 +1256,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) > return 1; > } > > -static inline void aer_process_err_devices(struct aer_err_info *e_info) > +static inline void aer_process_err_devices(struct aer_err_info *e_info, > + const char *level) > { > int i; > > /* Report all before handle them, not to lost records by reset etc. */ > for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { > if (aer_get_device_error_info(e_info->dev[i], e_info)) > - aer_print_error(e_info->dev[i], e_info); > + aer_print_error(e_info->dev[i], e_info, level); > } > for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { > if (aer_get_device_error_info(e_info->dev[i], e_info)) > @@ -1282,6 +1282,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc, > { > struct pci_dev *pdev = rpc->rpd; > struct aer_err_info e_info; > + const char *level; > > pci_rootport_aer_stats_incr(pdev, e_src); > > @@ -1292,6 +1293,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc, > if (e_src->status & PCI_ERR_ROOT_COR_RCV) { > e_info.id = ERR_COR_ID(e_src->id); > e_info.severity = AER_CORRECTABLE; > + level = KERN_WARNING; > > if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) > e_info.multi_error_valid = 1; > @@ -1300,11 +1302,12 @@ static void aer_isr_one_error(struct aer_rpc *rpc, > aer_print_port_info(pdev, &e_info); > > if (find_source_device(pdev, &e_info)) > - aer_process_err_devices(&e_info); > + aer_process_err_devices(&e_info, level); Since this path is only taken for correctable error, you can directly use aer_process_err_devices(&e_info, KERN_WARNING) > } > > if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { > e_info.id = ERR_UNCOR_ID(e_src->id); > + level = KERN_ERR; > > if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) > e_info.severity = AER_FATAL; > @@ -1319,7 +1322,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc, > aer_print_port_info(pdev, &e_info); > > if (find_source_device(pdev, &e_info)) > - aer_process_err_devices(&e_info); > + aer_process_err_devices(&e_info, level); Same as above. aer_process_err_devices(&e_info, KERN_ERR) > } > } > > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > index df42f15c9829..9e4c9ac737a7 100644 > --- a/drivers/pci/pcie/dpc.c > +++ b/drivers/pci/pcie/dpc.c > @@ -289,7 +289,7 @@ void dpc_process_error(struct pci_dev *pdev) > else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR && > dpc_get_aer_uncorrect_severity(pdev, &info) && > aer_get_device_error_info(pdev, &info)) { > - aer_print_error(pdev, &info); > + aer_print_error(pdev, &info, KERN_ERR); > pci_aer_clear_nonfatal_status(pdev); > pci_aer_clear_fatal_status(pdev); > }
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b8911d1e10dc..75985b96ecc1 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -551,7 +551,7 @@ struct aer_err_info { }; int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info, const char *level); int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, unsigned int tlp_len, bool flit, diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 9cff7069577e..cc9c80cd88f3 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -670,20 +670,18 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, } static void __aer_print_error(struct pci_dev *dev, - struct aer_err_info *info) + struct aer_err_info *info, + const char *level) { const char **strings; unsigned long status = info->status & ~info->mask; - const char *level, *errmsg; + const char *errmsg; int i; - if (info->severity == AER_CORRECTABLE) { + if (info->severity == AER_CORRECTABLE) strings = aer_correctable_error_string; - level = KERN_WARNING; - } else { + else strings = aer_uncorrectable_error_string; - level = KERN_ERR; - } for_each_set_bit(i, &status, 32) { errmsg = strings[i]; @@ -696,11 +694,11 @@ static void __aer_print_error(struct pci_dev *dev, pci_dev_aer_stats_incr(dev, info); } -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info, + const char *level) { int layer, agent; int id = pci_dev_id(dev); - const char *level; if (!info->status) { pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", @@ -711,8 +709,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; - aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); @@ -720,7 +716,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", dev->vendor, dev->device, info->status, info->mask); - __aer_print_error(dev, info); + __aer_print_error(dev, info, level); if (info->tlp_header_valid) pcie_print_tlp_log(dev, &info->tlp, dev_fmt(" ")); @@ -765,15 +761,18 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, { int layer, agent, tlp_header_valid = 0; u32 status, mask; + const char *level; struct aer_err_info info; if (aer_severity == AER_CORRECTABLE) { status = aer->cor_status; mask = aer->cor_mask; + level = KERN_WARNING; } else { status = aer->uncor_status; mask = aer->uncor_mask; tlp_header_valid = status & AER_LOG_TLP_MASKS; + level = KERN_ERR; } layer = AER_GET_LAYER_ERROR(aer_severity, status); @@ -786,7 +785,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); - __aer_print_error(dev, &info); + __aer_print_error(dev, &info, level); pci_err(dev, "aer_layer=%s, aer_agent=%s\n", aer_error_layer[layer], aer_agent_string[agent]); @@ -1257,14 +1256,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) return 1; } -static inline void aer_process_err_devices(struct aer_err_info *e_info) +static inline void aer_process_err_devices(struct aer_err_info *e_info, + const char *level) { int i; /* Report all before handle them, not to lost records by reset etc. */ for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { if (aer_get_device_error_info(e_info->dev[i], e_info)) - aer_print_error(e_info->dev[i], e_info); + aer_print_error(e_info->dev[i], e_info, level); } for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { if (aer_get_device_error_info(e_info->dev[i], e_info)) @@ -1282,6 +1282,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc, { struct pci_dev *pdev = rpc->rpd; struct aer_err_info e_info; + const char *level; pci_rootport_aer_stats_incr(pdev, e_src); @@ -1292,6 +1293,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc, if (e_src->status & PCI_ERR_ROOT_COR_RCV) { e_info.id = ERR_COR_ID(e_src->id); e_info.severity = AER_CORRECTABLE; + level = KERN_WARNING; if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) e_info.multi_error_valid = 1; @@ -1300,11 +1302,12 @@ static void aer_isr_one_error(struct aer_rpc *rpc, aer_print_port_info(pdev, &e_info); if (find_source_device(pdev, &e_info)) - aer_process_err_devices(&e_info); + aer_process_err_devices(&e_info, level); } if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { e_info.id = ERR_UNCOR_ID(e_src->id); + level = KERN_ERR; if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) e_info.severity = AER_FATAL; @@ -1319,7 +1322,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc, aer_print_port_info(pdev, &e_info); if (find_source_device(pdev, &e_info)) - aer_process_err_devices(&e_info); + aer_process_err_devices(&e_info, level); } } diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index df42f15c9829..9e4c9ac737a7 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -289,7 +289,7 @@ void dpc_process_error(struct pci_dev *pdev) else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR && dpc_get_aer_uncorrect_severity(pdev, &info) && aer_get_device_error_info(pdev, &info)) { - aer_print_error(pdev, &info); + aer_print_error(pdev, &info, KERN_ERR); pci_aer_clear_nonfatal_status(pdev); pci_aer_clear_fatal_status(pdev); }