diff mbox series

[v5,3/8] PCI/AER: Move AER stat collection out of __aer_print_error()

Message ID 20250321015806.954866-4-pandoh@google.com (mailing list archive)
State New
Delegated to: Bjorn Helgaas
Headers show
Series Rate limit AER logs | expand

Commit Message

Jon Pan-Doh March 21, 2025, 1:58 a.m. UTC
Decouple stat collection from internal AER print functions, so the
ratelimit does not impact the error counters. The stats collection is no
longer buried in nested functions, simplifying the function flow.

AERs from ghes or cxl drivers are a minor exception. Stat collection occurs
in pci_print_aer(), an external interface, as that is where aer_err_info
is populated.

Signed-off-by: Jon Pan-Doh <pandoh@google.com>
Reported-by: Sargun Dhillon <sargun@meta.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
---
 drivers/pci/pci.h      |  1 +
 drivers/pci/pcie/aer.c | 10 ++++++----
 drivers/pci/pcie/dpc.c |  1 +
 3 files changed, 8 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 75985b96ecc1..9d63d32f041c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -551,6 +551,7 @@  struct aer_err_info {
 };
 
 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
+void pci_dev_aer_stats_incr(struct pci_dev *pdev, struct aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info, const char *level);
 
 int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 3116b4678081..e5db1fdd8421 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -617,8 +617,7 @@  const struct attribute_group aer_stats_attr_group = {
 	.is_visible = aer_stats_attrs_are_visible,
 };
 
-static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
-				   struct aer_err_info *info)
+void pci_dev_aer_stats_incr(struct pci_dev *pdev, struct aer_err_info *info)
 {
 	unsigned long status = info->status & ~info->mask;
 	int i, max = -1;
@@ -691,7 +690,6 @@  static void __aer_print_error(struct pci_dev *dev,
 		aer_printk(level, dev, "   [%2d] %-22s%s\n", i, errmsg,
 				info->first_error == i ? " (First)" : "");
 	}
-	pci_dev_aer_stats_incr(dev, info);
 }
 
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info,
@@ -784,6 +782,8 @@  void pci_print_aer(struct pci_dev *dev, int aer_severity,
 	info.mask = mask;
 	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
 
+	pci_dev_aer_stats_incr(dev, &info);
+
 	aer_printk(level, dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
 	__aer_print_error(dev, &info, level);
 	aer_printk(level, dev, "aer_layer=%s, aer_agent=%s\n",
@@ -1263,8 +1263,10 @@  static inline void aer_process_err_devices(struct aer_err_info *e_info,
 
 	/* Report all before handle them, not to lost records by reset etc. */
 	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
-		if (aer_get_device_error_info(e_info->dev[i], e_info))
+		if (aer_get_device_error_info(e_info->dev[i], e_info)) {
+			pci_dev_aer_stats_incr(e_info->dev[i], e_info);
 			aer_print_error(e_info->dev[i], e_info, level);
+		}
 	}
 	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
 		if (aer_get_device_error_info(e_info->dev[i], e_info))
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 9e4c9ac737a7..81cd6e8ff3a4 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -289,6 +289,7 @@  void dpc_process_error(struct pci_dev *pdev)
 	else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR &&
 		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
 		 aer_get_device_error_info(pdev, &info)) {
+		pci_dev_aer_stats_incr(pdev, &info);
 		aer_print_error(pdev, &info, KERN_ERR);
 		pci_aer_clear_nonfatal_status(pdev);
 		pci_aer_clear_fatal_status(pdev);