diff mbox series

[v2,7/8] PCI/AER: Add AER sysfs attributes for log ratelimits

Message ID 20250214023543.992372-8-pandoh@google.com (mailing list archive)
State New
Delegated to: Bjorn Helgaas
Headers show
Series Rate limit AER logs | expand

Commit Message

Jon Pan-Doh Feb. 14, 2025, 2:35 a.m. UTC
Allow userspace to read/write log ratelimits per device. Create aer/ sysfs
directory to store them and any future aer configs.

Tested using aer-inject[1]. Configured correctable log ratelimits to 5.
Sent 6 AER errors. Observed 5 errors logged while AER stats
(cat /sys/bus/pci/devices/<dev>/aer_dev_correctable) shows 6.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/gong.chen/aer-inject.git

Signed-off-by: Jon Pan-Doh <pandoh@google.com>
---
 .../testing/sysfs-bus-pci-devices-aer_stats   | 20 +++++++
 Documentation/PCI/pcieaer-howto.rst           |  3 ++
 drivers/pci/pci-sysfs.c                       |  1 +
 drivers/pci/pci.h                             |  1 +
 drivers/pci/pcie/aer.c                        | 52 +++++++++++++++++++
 5 files changed, 77 insertions(+)

Comments

Karolina Stolarek Feb. 17, 2025, 1:31 p.m. UTC | #1
On 14/02/2025 03:35, Jon Pan-Doh wrote:
> Allow userspace to read/write log ratelimits per device. Create aer/ sysfs
> directory to store them and any future aer configs.

I don't think it's neccessary to keep ratelimits in a separate directory 
when we decided to we keep the rest of AER attributes at the dev level.

> 
> Tested using aer-inject[1]. Configured correctable log ratelimits to 5.
> Sent 6 AER errors. Observed 5 errors logged while AER stats
> (cat /sys/bus/pci/devices/<dev>/aer_dev_correctable) shows 6.
> 
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/gong.chen/aer-inject.git
> 
> Signed-off-by: Jon Pan-Doh <pandoh@google.com>
> ---
>   .../testing/sysfs-bus-pci-devices-aer_stats   | 20 +++++++
>   Documentation/PCI/pcieaer-howto.rst           |  3 ++
>   drivers/pci/pci-sysfs.c                       |  1 +
>   drivers/pci/pci.h                             |  1 +
>   drivers/pci/pcie/aer.c                        | 52 +++++++++++++++++++
>   5 files changed, 77 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
> index d1f67bb81d5d..c1221614c079 100644
> --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
> +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
> @@ -117,3 +117,23 @@ Date:		July 2018
>   KernelVersion:	4.19.0
>   Contact:	linux-pci@vger.kernel.org, rajatja@google.com
>   Description:	Total number of ERR_NONFATAL messages reported to rootport.
> +
> +PCIe AER ratelimits
> +-------------------
> +
> +These attributes show up under all the devices that are AER capable.
> +Provides configurable ratelimits of logs/irq per error type. Writing a

This sentence seems to refer to _one_ attribute and mentions IRQ 
ratelimiting, which is not a part of the series.

How about rephrasing this section to say that the attributes allow 
configuration of the rate at which AER errors are reported, with each of 
them dedicated to one error type (correctable or uncorrectable)? 
Something along these lines.

> +nonzero value changes the number of errors (burst) allowed per 5 second
> +window before ratelimiting. Reading gets the current ratelimits.
> +
> +What:		/sys/bus/pci/devices/<dev>/aer/ratelimit_cor_log
> +Date:		March 2025
> +KernelVersion:	6.15.0
> +Contact:	linux-pci@vger.kernel.org, pandoh@google.com
> +Description:	Log ratelimit for correctable errors.
> +
> +What:		/sys/bus/pci/devices/<dev>/aer/ratelimit_uncor_log
> +Date:		March 2025
> +KernelVersion:	6.15.0
> +Contact:	linux-pci@vger.kernel.org, pandoh@google.com
> +Description:	Log ratelimit for uncorrectable errors.
> diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
> index 167c0b277b62..ab5b0f232204 100644
> --- a/Documentation/PCI/pcieaer-howto.rst
> +++ b/Documentation/PCI/pcieaer-howto.rst
> @@ -93,6 +93,9 @@ spammy devices from flooding the console and stalling execution. Set the
>   default ratelimit to DEFAULT_RATELIMIT_BURST over
>   DEFAULT_RATELIMIT_INTERVAL (10 per 5 seconds).
>   
> +Ratelimits are exposed in the form of sysfs attributes and configurable.
> +See Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats.
> +
>   AER Statistics / Counters
>   -------------------------
>   
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index b46ce1a2c554..16de3093294e 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -1801,6 +1801,7 @@ const struct attribute_group *pci_dev_attr_groups[] = {
>   	&pcie_dev_attr_group,
>   #ifdef CONFIG_PCIEAER
>   	&aer_stats_attr_group,
> +	&aer_attr_group,
>   #endif
>   #ifdef CONFIG_PCIEASPM
>   	&aspm_ctrl_attr_group,
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 26104aee06c0..26d30a99c48b 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -887,6 +887,7 @@ void pci_no_aer(void);
>   void pci_aer_init(struct pci_dev *dev);
>   void pci_aer_exit(struct pci_dev *dev);
>   extern const struct attribute_group aer_stats_attr_group;
> +extern const struct attribute_group aer_attr_group;
>   void pci_aer_clear_fatal_status(struct pci_dev *dev);
>   int pci_aer_clear_status(struct pci_dev *dev);
>   int pci_aer_raw_clear_status(struct pci_dev *dev);
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index c5b5381e2930..1237faee6542 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -626,6 +626,58 @@ const struct attribute_group aer_stats_attr_group = {
>   	.is_visible = aer_stats_attrs_are_visible,
>   };
>   
> +#define aer_ratelimit_attr(name, ratelimit)				\
> +	static ssize_t							\
> +	name##_show(struct device *dev, struct device_attribute *attr,	\
> +		    char *buf)						\
> +{									\
> +	struct pci_dev *pdev = to_pci_dev(dev);				\
> +	return sysfs_emit(buf, "%u errors every %u seconds\n",		\

The convention is that sysfs files should provide one value per file. It 
won't be just people interacting with this interface, but scripts as 
well. Parsing such a string is a hassle. As we can only change the burst 
of the ratelimit, I'd simply emit pdev->aer_report->ratelimit.burst.

All the best,
Karolina

> +			  pdev->aer_report->ratelimit.burst,		\
> +			  pdev->aer_report->ratelimit.interval / HZ);	\
> +}									\
> +									\
> +	static ssize_t							\
> +	name##_store(struct device *dev, struct device_attribute *attr,	\
> +		     const char *buf, size_t count)			\
> +{									\
> +	struct pci_dev *pdev = to_pci_dev(dev);				\
> +	int burst;							\
> +									\
> +	if (kstrtoint(buf, 0, &burst) < 0)				\
> +		return -EINVAL;						\
> +									\
> +	pdev->aer_report->ratelimit.burst = burst;			\
> +	return count;							\
> +}									\
> +static DEVICE_ATTR_RW(name)
> +
> +aer_ratelimit_attr(ratelimit_cor_log, cor_log_ratelimit);
> +aer_ratelimit_attr(ratelimit_uncor_log, uncor_log_ratelimit);
> +
> +static struct attribute *aer_attrs[] __ro_after_init = {
> +	&dev_attr_ratelimit_cor_log.attr,
> +	&dev_attr_ratelimit_uncor_log.attr,
> +	NULL
> +};
> +
> +static umode_t aer_attrs_are_visible(struct kobject *kobj,
> +				     struct attribute *a, int n)
> +{
> +	struct device *dev = kobj_to_dev(kobj);
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	if (!pdev->aer_report)
> +		return 0;
> +	return a->mode;
> +}
> +
> +const struct attribute_group aer_attr_group = {
> +	.name = "aer",
> +	.attrs = aer_attrs,
> +	.is_visible = aer_attrs_are_visible,
> +};
> +
>   void pci_dev_aer_stats_incr(struct pci_dev *pdev, struct aer_err_info *info)
>   {
>   	unsigned long status = info->status & ~info->mask;
Jon Pan-Doh Feb. 19, 2025, 2:50 a.m. UTC | #2
On Mon, Feb 17, 2025 at 5:31 AM Karolina Stolarek
<karolina.stolarek@oracle.com> wrote:
> I don't think it's neccessary to keep ratelimits in a separate directory
> when we decided to we keep the rest of AER attributes at the dev level.

My motivation for this is that there may be more AER sysfs attributes we want to
expose. An example being OCP Fault Management roadmap which aims to have
userspace manage/enforce AER settings (set/get PCIE AER regs) for datacenter
repairability.

Given the permanence of sysfs entries, I think that it is reasonable
to create a new
directory to make AER sysfs attributes more extensible.

> > +These attributes show up under all the devices that are AER capable.
> > +Provides configurable ratelimits of logs/irq per error type. Writing a
>
> This sentence seems to refer to _one_ attribute and mentions IRQ
> ratelimiting, which is not a part of the series.
>
> How about rephrasing this section to say that the attributes allow
> configuration of the rate at which AER errors are reported, with each of
> them dedicated to one error type (correctable or uncorrectable)?
> Something along these lines.

Good catch. IRQ verbiage slipped in from v1. How's this:

These attributes show up under all the devices that are AER capable.
They allow configuration of the rate at which AER errors are reported,
with each of them dedicated to one error type (correctable or uncorrectable).

I kept the first sentence as that is common under the other AER sysfs
attributes.

> The convention is that sysfs files should provide one value per file. It
> won't be just people interacting with this interface, but scripts as
> well. Parsing such a string is a hassle. As we can only change the burst
> of the ratelimit, I'd simply emit pdev->aer_report->ratelimit.burst.

Ack. Will update in v3 and add explicit mention of ratelimit interval
in sysfs-bus-pci-devices-aer_stats so context is still there.

Thanks,
Jon
Jon Pan-Doh Feb. 19, 2025, 5:42 a.m. UTC | #3
On Mon, Feb 17, 2025 at 5:31 AM Karolina Stolarek
<karolina.stolarek@oracle.com> wrote:
> The convention is that sysfs files should provide one value per file. It
> won't be just people interacting with this interface, but scripts as
> well. Parsing such a string is a hassle. As we can only change the burst
> of the ratelimit, I'd simply emit pdev->aer_report->ratelimit.burst.

Realized that Jonathan said something similar in v1[1] (that I forgot
to fix). In addition to the reason you provided, he stated that
convention is to read/write the same thing to a sysfs file.

[1] https://lore.kernel.org/linux-pci/20250131143246.000037a2@huawei.com/

Thanks,
Jon
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
index d1f67bb81d5d..c1221614c079 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
@@ -117,3 +117,23 @@  Date:		July 2018
 KernelVersion:	4.19.0
 Contact:	linux-pci@vger.kernel.org, rajatja@google.com
 Description:	Total number of ERR_NONFATAL messages reported to rootport.
+
+PCIe AER ratelimits
+-------------------
+
+These attributes show up under all the devices that are AER capable.
+Provides configurable ratelimits of logs/irq per error type. Writing a
+nonzero value changes the number of errors (burst) allowed per 5 second
+window before ratelimiting. Reading gets the current ratelimits.
+
+What:		/sys/bus/pci/devices/<dev>/aer/ratelimit_cor_log
+Date:		March 2025
+KernelVersion:	6.15.0
+Contact:	linux-pci@vger.kernel.org, pandoh@google.com
+Description:	Log ratelimit for correctable errors.
+
+What:		/sys/bus/pci/devices/<dev>/aer/ratelimit_uncor_log
+Date:		March 2025
+KernelVersion:	6.15.0
+Contact:	linux-pci@vger.kernel.org, pandoh@google.com
+Description:	Log ratelimit for uncorrectable errors.
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
index 167c0b277b62..ab5b0f232204 100644
--- a/Documentation/PCI/pcieaer-howto.rst
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -93,6 +93,9 @@  spammy devices from flooding the console and stalling execution. Set the
 default ratelimit to DEFAULT_RATELIMIT_BURST over
 DEFAULT_RATELIMIT_INTERVAL (10 per 5 seconds).
 
+Ratelimits are exposed in the form of sysfs attributes and configurable.
+See Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats.
+
 AER Statistics / Counters
 -------------------------
 
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index b46ce1a2c554..16de3093294e 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1801,6 +1801,7 @@  const struct attribute_group *pci_dev_attr_groups[] = {
 	&pcie_dev_attr_group,
 #ifdef CONFIG_PCIEAER
 	&aer_stats_attr_group,
+	&aer_attr_group,
 #endif
 #ifdef CONFIG_PCIEASPM
 	&aspm_ctrl_attr_group,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 26104aee06c0..26d30a99c48b 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -887,6 +887,7 @@  void pci_no_aer(void);
 void pci_aer_init(struct pci_dev *dev);
 void pci_aer_exit(struct pci_dev *dev);
 extern const struct attribute_group aer_stats_attr_group;
+extern const struct attribute_group aer_attr_group;
 void pci_aer_clear_fatal_status(struct pci_dev *dev);
 int pci_aer_clear_status(struct pci_dev *dev);
 int pci_aer_raw_clear_status(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index c5b5381e2930..1237faee6542 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -626,6 +626,58 @@  const struct attribute_group aer_stats_attr_group = {
 	.is_visible = aer_stats_attrs_are_visible,
 };
 
+#define aer_ratelimit_attr(name, ratelimit)				\
+	static ssize_t							\
+	name##_show(struct device *dev, struct device_attribute *attr,	\
+		    char *buf)						\
+{									\
+	struct pci_dev *pdev = to_pci_dev(dev);				\
+	return sysfs_emit(buf, "%u errors every %u seconds\n",		\
+			  pdev->aer_report->ratelimit.burst,		\
+			  pdev->aer_report->ratelimit.interval / HZ);	\
+}									\
+									\
+	static ssize_t							\
+	name##_store(struct device *dev, struct device_attribute *attr,	\
+		     const char *buf, size_t count)			\
+{									\
+	struct pci_dev *pdev = to_pci_dev(dev);				\
+	int burst;							\
+									\
+	if (kstrtoint(buf, 0, &burst) < 0)				\
+		return -EINVAL;						\
+									\
+	pdev->aer_report->ratelimit.burst = burst;			\
+	return count;							\
+}									\
+static DEVICE_ATTR_RW(name)
+
+aer_ratelimit_attr(ratelimit_cor_log, cor_log_ratelimit);
+aer_ratelimit_attr(ratelimit_uncor_log, uncor_log_ratelimit);
+
+static struct attribute *aer_attrs[] __ro_after_init = {
+	&dev_attr_ratelimit_cor_log.attr,
+	&dev_attr_ratelimit_uncor_log.attr,
+	NULL
+};
+
+static umode_t aer_attrs_are_visible(struct kobject *kobj,
+				     struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (!pdev->aer_report)
+		return 0;
+	return a->mode;
+}
+
+const struct attribute_group aer_attr_group = {
+	.name = "aer",
+	.attrs = aer_attrs,
+	.is_visible = aer_attrs_are_visible,
+};
+
 void pci_dev_aer_stats_incr(struct pci_dev *pdev, struct aer_err_info *info)
 {
 	unsigned long status = info->status & ~info->mask;