diff mbox series

[3/8] PCI/AER: Rename struct aer_stats to aer_info

Message ID 20250115074301.3514927-4-pandoh@google.com (mailing list archive)
State New
Delegated to: Bjorn Helgaas
Headers show
Series Rate limit AER logs/IRQs | expand

Commit Message

Jon Pan-Doh Jan. 15, 2025, 7:42 a.m. UTC
Update name to reflect the broader definition of structs/variables that
are stored (e.g. ratelimits).

Signed-off-by: Jon Pan-Doh <pandoh@google.com>
---
 drivers/pci/pcie/aer.c | 50 +++++++++++++++++++++---------------------
 include/linux/pci.h    |  2 +-
 2 files changed, 26 insertions(+), 26 deletions(-)

Comments

Karolina Stolarek Jan. 16, 2025, 10:11 a.m. UTC | #1
On 15/01/2025 08:42, Jon Pan-Doh wrote:
> Update name to reflect the broader definition of structs/variables that
> are stored (e.g. ratelimits).

I understand the intention behind this change, but I'm not fully 
convinced if we should mix AER error attributes with the tools to 
control error reporting/generation (ratelimits). I'd argue that 
"aer_info" name still doesn't express what the structure does.

aer_stats sits in pci_dev, so I can see why you decided to use it; it's 
one of the few available places where we could keep a stateful ratelimit.

How about creating a struct to keep all the ratelimits in one place and 
embedding that in the pci_dev?

All the best,
Karolina

> 
> Signed-off-by: Jon Pan-Doh <pandoh@google.com>
> ---
>   drivers/pci/pcie/aer.c | 50 +++++++++++++++++++++---------------------
>   include/linux/pci.h    |  2 +-
>   2 files changed, 26 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 4bb0b3840402..5ab5cd7368bc 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -50,11 +50,11 @@ struct aer_rpc {
>   	DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
>   };
>   
> -/* AER stats for the device */
> -struct aer_stats {
> +/* AER info for the device */
> +struct aer_info {
>   
>   	/*
> -	 * Fields for all AER capable devices. They indicate the errors
> +	 * Stats for all AER capable devices. They indicate the errors
>   	 * "as seen by this device". Note that this may mean that if an
>   	 * end point is causing problems, the AER counters may increment
>   	 * at its link partner (e.g. root port) because the errors will be
> @@ -76,7 +76,7 @@ struct aer_stats {
>   	u64 dev_total_nonfatal_errs;
>   
>   	/*
> -	 * Fields for Root ports & root complex event collectors only, these
> +	 * Stats for Root ports & root complex event collectors only, these
>   	 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
>   	 * messages received by the root port / event collector, INCLUDING the
>   	 * ones that are generated internally (by the rootport itself)
> @@ -373,7 +373,7 @@ void pci_aer_init(struct pci_dev *dev)
>   	if (!dev->aer_cap)
>   		return;
>   
> -	dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
> +	dev->aer_info = kzalloc(sizeof(struct aer_info), GFP_KERNEL);
>   
>   	/*
>   	 * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER,
> @@ -394,8 +394,8 @@ void pci_aer_init(struct pci_dev *dev)
>   
>   void pci_aer_exit(struct pci_dev *dev)
>   {
> -	kfree(dev->aer_stats);
> -	dev->aer_stats = NULL;
> +	kfree(dev->aer_info);
> +	dev->aer_info = NULL;
>   }
>   
>   #define AER_AGENT_RECEIVER		0
> @@ -533,10 +533,10 @@ static const char *aer_agent_string[] = {
>   {									\
>   	unsigned int i;							\
>   	struct pci_dev *pdev = to_pci_dev(dev);				\
> -	u64 *stats = pdev->aer_stats->stats_array;			\
> +	u64 *stats = pdev->aer_info->stats_array;			\
>   	size_t len = 0;							\
>   									\
> -	for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
> +	for (i = 0; i < ARRAY_SIZE(pdev->aer_info->stats_array); i++) {\
>   		if (strings_array[i])					\
>   			len += sysfs_emit_at(buf, len, "%s %llu\n",	\
>   					     strings_array[i],		\
> @@ -547,7 +547,7 @@ static const char *aer_agent_string[] = {
>   					     i, stats[i]);		\
>   	}								\
>   	len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string,	\
> -			     pdev->aer_stats->total_field);		\
> +			     pdev->aer_info->total_field);		\
>   	return len;							\
>   }									\
>   static DEVICE_ATTR_RO(name)
> @@ -568,7 +568,7 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
>   		     char *buf)						\
>   {									\
>   	struct pci_dev *pdev = to_pci_dev(dev);				\
> -	return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field);	\
> +	return sysfs_emit(buf, "%llu\n", pdev->aer_info->field);	\
>   }									\
>   static DEVICE_ATTR_RO(name)
>   
> @@ -595,7 +595,7 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
>   	struct device *dev = kobj_to_dev(kobj);
>   	struct pci_dev *pdev = to_pci_dev(dev);
>   
> -	if (!pdev->aer_stats)
> +	if (!pdev->aer_info)
>   		return 0;
>   
>   	if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
> @@ -619,25 +619,25 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
>   	unsigned long status = info->status & ~info->mask;
>   	int i, max = -1;
>   	u64 *counter = NULL;
> -	struct aer_stats *aer_stats = pdev->aer_stats;
> +	struct aer_info *aer_info = pdev->aer_info;
>   
> -	if (!aer_stats)
> +	if (!aer_info)
>   		return;
>   
>   	switch (info->severity) {
>   	case AER_CORRECTABLE:
> -		aer_stats->dev_total_cor_errs++;
> -		counter = &aer_stats->dev_cor_errs[0];
> +		aer_info->dev_total_cor_errs++;
> +		counter = &aer_info->dev_cor_errs[0];
>   		max = AER_MAX_TYPEOF_COR_ERRS;
>   		break;
>   	case AER_NONFATAL:
> -		aer_stats->dev_total_nonfatal_errs++;
> -		counter = &aer_stats->dev_nonfatal_errs[0];
> +		aer_info->dev_total_nonfatal_errs++;
> +		counter = &aer_info->dev_nonfatal_errs[0];
>   		max = AER_MAX_TYPEOF_UNCOR_ERRS;
>   		break;
>   	case AER_FATAL:
> -		aer_stats->dev_total_fatal_errs++;
> -		counter = &aer_stats->dev_fatal_errs[0];
> +		aer_info->dev_total_fatal_errs++;
> +		counter = &aer_info->dev_fatal_errs[0];
>   		max = AER_MAX_TYPEOF_UNCOR_ERRS;
>   		break;
>   	}
> @@ -649,19 +649,19 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
>   static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
>   				 struct aer_err_source *e_src)
>   {
> -	struct aer_stats *aer_stats = pdev->aer_stats;
> +	struct aer_info *aer_info = pdev->aer_info;
>   
> -	if (!aer_stats)
> +	if (!aer_info)
>   		return;
>   
>   	if (e_src->status & PCI_ERR_ROOT_COR_RCV)
> -		aer_stats->rootport_total_cor_errs++;
> +		aer_info->rootport_total_cor_errs++;
>   
>   	if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
>   		if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
> -			aer_stats->rootport_total_fatal_errs++;
> +			aer_info->rootport_total_fatal_errs++;
>   		else
> -			aer_stats->rootport_total_nonfatal_errs++;
> +			aer_info->rootport_total_nonfatal_errs++;
>   	}
>   }
>   
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index db9b47ce3eef..72e6f5560164 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -346,7 +346,7 @@ struct pci_dev {
>   	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
>   #ifdef CONFIG_PCIEAER
>   	u16		aer_cap;	/* AER capability offset */
> -	struct aer_stats *aer_stats;	/* AER stats for this device */
> +	struct aer_info *aer_info;	/* AER info for this device */
>   #endif
>   #ifdef CONFIG_PCIEPORTBUS
>   	struct rcec_ea	*rcec_ea;	/* RCEC cached endpoint association */
Jon Pan-Doh Jan. 18, 2025, 1:59 a.m. UTC | #2
On Thu, Jan 16, 2025 at 2:12 AM Karolina Stolarek
<karolina.stolarek@oracle.com> wrote:
> On 15/01/2025 08:42, Jon Pan-Doh wrote:
> > Update name to reflect the broader definition of structs/variables that
> > are stored (e.g. ratelimits).
>
> I understand the intention behind this change, but I'm not fully
> convinced if we should mix AER error attributes with the tools to
> control error reporting/generation (ratelimits). I'd argue that
> "aer_info" name still doesn't express what the structure does.
>
> aer_stats sits in pci_dev, so I can see why you decided to use it; it's
> one of the few available places where we could keep a stateful ratelimit.
>
> How about creating a struct to keep all the ratelimits in one place and
> embedding that in the pci_dev?

In a previous version, I had a separate struct aer_ratelimits in
pci_dev (similar to your patch[1]):

@@ -346,7 +346,7 @@ struct pci_dev {
        u8              hdr_type;       /* PCI header type (`multi'
flag masked out) */
 #ifdef CONFIG_PCIEAER
        u16             aer_cap;        /* AER capability offset */
        struct aer_stats *aer_stats;    /* AER stats for this device */
+      struct aer_ratelimits *aer_ratelimits;      /* AER ratelimits
for this device */

However, in an internal review, Bjorn said:
> I don't think we need to burn two pointers here since we always populate both at the
> same time. Perhaps combine the stats and ratelimits into a single "struct aer_info" or
> similar.

It seems like the preference is to minimize additional memory in
pci_dev. Any suggestions for a more appropriate name than "aer_info"?

Otherwise, is this a good enough justification to maintain two pointers Bjorn?

Thanks,
Jon

[1] https://lore.kernel.org/linux-pci/68ef082c855b4e1d094dcfc9a861f43488b64922.1736341506.git.karolina.stolarek@oracle.com/#Z31include:linux:pci.h
Karolina Stolarek Jan. 20, 2025, 10:13 a.m. UTC | #3
On 18/01/2025 02:59, Jon Pan-Doh wrote:
> 
> In a previous version, I had a separate struct aer_ratelimits in
> pci_dev (similar to your patch[1]):
> 
> @@ -346,7 +346,7 @@ struct pci_dev {
>          u8              hdr_type;       /* PCI header type (`multi'
> flag masked out) */
>   #ifdef CONFIG_PCIEAER
>          u16             aer_cap;        /* AER capability offset */
>          struct aer_stats *aer_stats;    /* AER stats for this device */
> +      struct aer_ratelimits *aer_ratelimits;      /* AER ratelimits
> for this device */
> 
> However, in an internal review, Bjorn said:
>> I don't think we need to burn two pointers here since we always populate both at the
>> same time. Perhaps combine the stats and ratelimits into a single "struct aer_info" or
>> similar.

I see, thanks for sharing it here.

My thinking is that it's preferable to have small and specialized 
structs, with one used for collecting/showing error info and the other 
to control the rates at which they are reported and generated. I'd 
propose a similar change to the one above.

> It seems like the preference is to minimize additional memory in
> pci_dev. Any suggestions for a more appropriate name than "aer_info"?

Like I said, my preference would be to leave aer_stats as it is, but if 
we have to stick to one struct, I'd go with "aer_control" or "aer_report".

All the best,
Karolina

> 
> Otherwise, is this a good enough justification to maintain two pointers Bjorn?
> 
> Thanks,
> Jon
> 
> [1] https://lore.kernel.org/linux-pci/68ef082c855b4e1d094dcfc9a861f43488b64922.1736341506.git.karolina.stolarek@oracle.com/#Z31include:linux:pci.h
diff mbox series

Patch

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 4bb0b3840402..5ab5cd7368bc 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -50,11 +50,11 @@  struct aer_rpc {
 	DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
 };
 
-/* AER stats for the device */
-struct aer_stats {
+/* AER info for the device */
+struct aer_info {
 
 	/*
-	 * Fields for all AER capable devices. They indicate the errors
+	 * Stats for all AER capable devices. They indicate the errors
 	 * "as seen by this device". Note that this may mean that if an
 	 * end point is causing problems, the AER counters may increment
 	 * at its link partner (e.g. root port) because the errors will be
@@ -76,7 +76,7 @@  struct aer_stats {
 	u64 dev_total_nonfatal_errs;
 
 	/*
-	 * Fields for Root ports & root complex event collectors only, these
+	 * Stats for Root ports & root complex event collectors only, these
 	 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
 	 * messages received by the root port / event collector, INCLUDING the
 	 * ones that are generated internally (by the rootport itself)
@@ -373,7 +373,7 @@  void pci_aer_init(struct pci_dev *dev)
 	if (!dev->aer_cap)
 		return;
 
-	dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
+	dev->aer_info = kzalloc(sizeof(struct aer_info), GFP_KERNEL);
 
 	/*
 	 * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER,
@@ -394,8 +394,8 @@  void pci_aer_init(struct pci_dev *dev)
 
 void pci_aer_exit(struct pci_dev *dev)
 {
-	kfree(dev->aer_stats);
-	dev->aer_stats = NULL;
+	kfree(dev->aer_info);
+	dev->aer_info = NULL;
 }
 
 #define AER_AGENT_RECEIVER		0
@@ -533,10 +533,10 @@  static const char *aer_agent_string[] = {
 {									\
 	unsigned int i;							\
 	struct pci_dev *pdev = to_pci_dev(dev);				\
-	u64 *stats = pdev->aer_stats->stats_array;			\
+	u64 *stats = pdev->aer_info->stats_array;			\
 	size_t len = 0;							\
 									\
-	for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
+	for (i = 0; i < ARRAY_SIZE(pdev->aer_info->stats_array); i++) {\
 		if (strings_array[i])					\
 			len += sysfs_emit_at(buf, len, "%s %llu\n",	\
 					     strings_array[i],		\
@@ -547,7 +547,7 @@  static const char *aer_agent_string[] = {
 					     i, stats[i]);		\
 	}								\
 	len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string,	\
-			     pdev->aer_stats->total_field);		\
+			     pdev->aer_info->total_field);		\
 	return len;							\
 }									\
 static DEVICE_ATTR_RO(name)
@@ -568,7 +568,7 @@  aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
 		     char *buf)						\
 {									\
 	struct pci_dev *pdev = to_pci_dev(dev);				\
-	return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field);	\
+	return sysfs_emit(buf, "%llu\n", pdev->aer_info->field);	\
 }									\
 static DEVICE_ATTR_RO(name)
 
@@ -595,7 +595,7 @@  static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct pci_dev *pdev = to_pci_dev(dev);
 
-	if (!pdev->aer_stats)
+	if (!pdev->aer_info)
 		return 0;
 
 	if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
@@ -619,25 +619,25 @@  static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
 	unsigned long status = info->status & ~info->mask;
 	int i, max = -1;
 	u64 *counter = NULL;
-	struct aer_stats *aer_stats = pdev->aer_stats;
+	struct aer_info *aer_info = pdev->aer_info;
 
-	if (!aer_stats)
+	if (!aer_info)
 		return;
 
 	switch (info->severity) {
 	case AER_CORRECTABLE:
-		aer_stats->dev_total_cor_errs++;
-		counter = &aer_stats->dev_cor_errs[0];
+		aer_info->dev_total_cor_errs++;
+		counter = &aer_info->dev_cor_errs[0];
 		max = AER_MAX_TYPEOF_COR_ERRS;
 		break;
 	case AER_NONFATAL:
-		aer_stats->dev_total_nonfatal_errs++;
-		counter = &aer_stats->dev_nonfatal_errs[0];
+		aer_info->dev_total_nonfatal_errs++;
+		counter = &aer_info->dev_nonfatal_errs[0];
 		max = AER_MAX_TYPEOF_UNCOR_ERRS;
 		break;
 	case AER_FATAL:
-		aer_stats->dev_total_fatal_errs++;
-		counter = &aer_stats->dev_fatal_errs[0];
+		aer_info->dev_total_fatal_errs++;
+		counter = &aer_info->dev_fatal_errs[0];
 		max = AER_MAX_TYPEOF_UNCOR_ERRS;
 		break;
 	}
@@ -649,19 +649,19 @@  static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
 static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
 				 struct aer_err_source *e_src)
 {
-	struct aer_stats *aer_stats = pdev->aer_stats;
+	struct aer_info *aer_info = pdev->aer_info;
 
-	if (!aer_stats)
+	if (!aer_info)
 		return;
 
 	if (e_src->status & PCI_ERR_ROOT_COR_RCV)
-		aer_stats->rootport_total_cor_errs++;
+		aer_info->rootport_total_cor_errs++;
 
 	if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
 		if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
-			aer_stats->rootport_total_fatal_errs++;
+			aer_info->rootport_total_fatal_errs++;
 		else
-			aer_stats->rootport_total_nonfatal_errs++;
+			aer_info->rootport_total_nonfatal_errs++;
 	}
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index db9b47ce3eef..72e6f5560164 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -346,7 +346,7 @@  struct pci_dev {
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
 #ifdef CONFIG_PCIEAER
 	u16		aer_cap;	/* AER capability offset */
-	struct aer_stats *aer_stats;	/* AER stats for this device */
+	struct aer_info *aer_info;	/* AER info for this device */
 #endif
 #ifdef CONFIG_PCIEPORTBUS
 	struct rcec_ea	*rcec_ea;	/* RCEC cached endpoint association */