diff mbox series

pci: provide bus reset attribute

Message ID 20241025145021.1410645-1-kbusch@meta.com (mailing list archive)
State Superseded
Headers show
Series pci: provide bus reset attribute | expand

Commit Message

Keith Busch Oct. 25, 2024, 2:50 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

Attempting a bus reset on an end device only works if it's the only
function on or below that bus.

Provide an attribute on the pci_bus device that can perform the
secondary bus reset. This makes it possible for a user to safely reset
multiple devices in a single command using the secondary bus reset
method.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/pci/pci-sysfs.c | 23 +++++++++++++++++++++++
 drivers/pci/pci.c       |  2 +-
 drivers/pci/pci.h       |  1 +
 3 files changed, 25 insertions(+), 1 deletion(-)

Comments

Bjorn Helgaas Oct. 25, 2024, 4:57 p.m. UTC | #1
[+cc Alex, Amey, Raphael]

On Fri, Oct 25, 2024 at 07:50:21AM -0700, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> Attempting a bus reset on an end device only works if it's the only
> function on or below that bus.
> 
> Provide an attribute on the pci_bus device that can perform the
> secondary bus reset. This makes it possible for a user to safely reset
> multiple devices in a single command using the secondary bus reset
> method.

I confess to being a little ambivalent or even hesitant about
operations on the pci_bus (as opposed to on a pci_dev), but I can't
really articulate a great reason, other than the fact that the "bus"
is kind of abstract and from a hardware perspective, the *devices* are
the only things we can control.

I assume this is useful in some scenario.  I guess this is root-only,
so there's no real concern about whether all the devices are used by
the same VM or in the same IOMMU group or anything?

> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>  drivers/pci/pci-sysfs.c | 23 +++++++++++++++++++++++
>  drivers/pci/pci.c       |  2 +-
>  drivers/pci/pci.h       |  1 +
>  3 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 5d0f4db1cab78..616d64f12da4d 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -521,6 +521,28 @@ static ssize_t bus_rescan_store(struct device *dev,
>  static struct device_attribute dev_attr_bus_rescan = __ATTR(rescan, 0200, NULL,
>  							    bus_rescan_store);
>  
> +static ssize_t bus_reset_store(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	struct pci_bus *bus = to_pci_bus(dev);
> +	unsigned long val;
> +
> +	if (kstrtoul(buf, 0, &val) < 0)
> +		return -EINVAL;
> +
> +	if (val) {
> +		int ret = __pci_reset_bus(bus);
> +
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return count;
> +}
> +static struct device_attribute dev_attr_bus_reset = __ATTR(reset, 0200, NULL,
> +							   bus_reset_store);
> +
>  #if defined(CONFIG_PM) && defined(CONFIG_ACPI)
>  static ssize_t d3cold_allowed_store(struct device *dev,
>  				    struct device_attribute *attr,
> @@ -638,6 +660,7 @@ static struct attribute *pcie_dev_attrs[] = {
>  
>  static struct attribute *pcibus_attrs[] = {
>  	&dev_attr_bus_rescan.attr,
> +	&dev_attr_bus_reset.attr,
>  	&dev_attr_cpuaffinity.attr,
>  	&dev_attr_cpulistaffinity.attr,
>  	NULL,
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 7d85c04fbba2a..338dfcd983f1e 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -5880,7 +5880,7 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
>   *
>   * Same as above except return -EAGAIN if the bus cannot be locked
>   */
> -static int __pci_reset_bus(struct pci_bus *bus)
> +int __pci_reset_bus(struct pci_bus *bus)
>  {
>  	int rc;
>  
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 14d00ce45bfa9..1cdc2c9547a7e 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -104,6 +104,7 @@ bool pci_reset_supported(struct pci_dev *dev);
>  void pci_init_reset_methods(struct pci_dev *dev);
>  int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
>  int pci_bus_error_reset(struct pci_dev *dev);
> +int __pci_reset_bus(struct pci_bus *bus);
>  
>  struct pci_cap_saved_data {
>  	u16		cap_nr;
> -- 
> 2.43.5
>
Keith Busch Oct. 25, 2024, 5:15 p.m. UTC | #2
On Fri, Oct 25, 2024 at 11:57:25AM -0500, Bjorn Helgaas wrote:
> [+cc Alex, Amey, Raphael]
> 
> On Fri, Oct 25, 2024 at 07:50:21AM -0700, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > Attempting a bus reset on an end device only works if it's the only
> > function on or below that bus.
> > 
> > Provide an attribute on the pci_bus device that can perform the
> > secondary bus reset. This makes it possible for a user to safely reset
> > multiple devices in a single command using the secondary bus reset
> > method.
> 
> I confess to being a little ambivalent or even hesitant about
> operations on the pci_bus (as opposed to on a pci_dev), but I can't
> really articulate a great reason, other than the fact that the "bus"
> is kind of abstract and from a hardware perspective, the *devices* are
> the only things we can control.

If you prefer, this could probably be a pci_dev attribute specific to
bridges. Maybe we call it "reset_subordinate" to make it different than
the existing "reset" attribute.

> I assume this is useful in some scenario.  I guess this is root-only,
> so there's no real concern about whether all the devices are used by
> the same VM or in the same IOMMU group or anything?

Yes, definitely root only. The concern for misuse is real, so must be
used carefully. If you have binded drivers that are not ready for this,
it may get confused. The same thing could happen with existing
function-level resets though too. Making this operate on the bus just
has potentially larger blast radius if you reset the wrong bus.

It is still useful. Ioctl VFIO_DEVICE_PCI_HOT_RESET also eventually
calls __pci_reset_bus(), but this gets the same thing without having to
bind all the functions to vfio.
Alex Williamson Oct. 25, 2024, 8:04 p.m. UTC | #3
On Fri, 25 Oct 2024 11:57:25 -0500
Bjorn Helgaas <helgaas@kernel.org> wrote:

> [+cc Alex, Amey, Raphael]
> 
> On Fri, Oct 25, 2024 at 07:50:21AM -0700, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > Attempting a bus reset on an end device only works if it's the only
> > function on or below that bus.
> > 
> > Provide an attribute on the pci_bus device that can perform the
> > secondary bus reset. This makes it possible for a user to safely reset
> > multiple devices in a single command using the secondary bus reset
> > method.  
> 
> I confess to being a little ambivalent or even hesitant about
> operations on the pci_bus (as opposed to on a pci_dev), but I can't
> really articulate a great reason, other than the fact that the "bus"
> is kind of abstract and from a hardware perspective, the *devices* are
> the only things we can control.
> 
> I assume this is useful in some scenario.  I guess this is root-only,
> so there's no real concern about whether all the devices are used by
> the same VM or in the same IOMMU group or anything?

I can understand your hesitation, but TBH I've wished for such a thing
in the past.  We can already twiddle the secondary bus reset bit using
setpci, but then we don't restore config space of the subordinate
devices and at best we need to remove and rescan those devices.

As Keith notes in his reply, we can also effectively trigger this same
thing through vfio-pci, so I think we're only making it a little easier
to accomplish through this sysfs attribute.  Yes, bad things can happen
if we were to reset a bus of running devices, but I don't know that
it's any more bad than resetting each of those devices individually.

I would agree that "reset" is not a great name since we're resetting
the subordinate devices and not the bridge device under which this
attribute would appear.  Seems there should also be an update to
Documentation/ABI/testing/sysfs-bus-pci in this patch too.  Thanks,

Alex

 
> > Signed-off-by: Keith Busch <kbusch@kernel.org>
> > ---
> >  drivers/pci/pci-sysfs.c | 23 +++++++++++++++++++++++
> >  drivers/pci/pci.c       |  2 +-
> >  drivers/pci/pci.h       |  1 +
> >  3 files changed, 25 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> > index 5d0f4db1cab78..616d64f12da4d 100644
> > --- a/drivers/pci/pci-sysfs.c
> > +++ b/drivers/pci/pci-sysfs.c
> > @@ -521,6 +521,28 @@ static ssize_t bus_rescan_store(struct device *dev,
> >  static struct device_attribute dev_attr_bus_rescan = __ATTR(rescan, 0200, NULL,
> >  							    bus_rescan_store);
> >  
> > +static ssize_t bus_reset_store(struct device *dev,
> > +				struct device_attribute *attr,
> > +				const char *buf, size_t count)
> > +{
> > +	struct pci_bus *bus = to_pci_bus(dev);
> > +	unsigned long val;
> > +
> > +	if (kstrtoul(buf, 0, &val) < 0)
> > +		return -EINVAL;
> > +
> > +	if (val) {
> > +		int ret = __pci_reset_bus(bus);
> > +
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> > +	return count;
> > +}
> > +static struct device_attribute dev_attr_bus_reset = __ATTR(reset, 0200, NULL,
> > +							   bus_reset_store);
> > +
> >  #if defined(CONFIG_PM) && defined(CONFIG_ACPI)
> >  static ssize_t d3cold_allowed_store(struct device *dev,
> >  				    struct device_attribute *attr,
> > @@ -638,6 +660,7 @@ static struct attribute *pcie_dev_attrs[] = {
> >  
> >  static struct attribute *pcibus_attrs[] = {
> >  	&dev_attr_bus_rescan.attr,
> > +	&dev_attr_bus_reset.attr,
> >  	&dev_attr_cpuaffinity.attr,
> >  	&dev_attr_cpulistaffinity.attr,
> >  	NULL,
> > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > index 7d85c04fbba2a..338dfcd983f1e 100644
> > --- a/drivers/pci/pci.c
> > +++ b/drivers/pci/pci.c
> > @@ -5880,7 +5880,7 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
> >   *
> >   * Same as above except return -EAGAIN if the bus cannot be locked
> >   */
> > -static int __pci_reset_bus(struct pci_bus *bus)
> > +int __pci_reset_bus(struct pci_bus *bus)
> >  {
> >  	int rc;
> >  
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index 14d00ce45bfa9..1cdc2c9547a7e 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -104,6 +104,7 @@ bool pci_reset_supported(struct pci_dev *dev);
> >  void pci_init_reset_methods(struct pci_dev *dev);
> >  int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
> >  int pci_bus_error_reset(struct pci_dev *dev);
> > +int __pci_reset_bus(struct pci_bus *bus);
> >  
> >  struct pci_cap_saved_data {
> >  	u16		cap_nr;
> > -- 
> > 2.43.5
> >   
>
Keith Busch Oct. 25, 2024, 9:13 p.m. UTC | #4
On Fri, Oct 25, 2024 at 02:04:58PM -0600, Alex Williamson wrote:
> 
> I can understand your hesitation, but TBH I've wished for such a thing
> in the past.  We can already twiddle the secondary bus reset bit using
> setpci, but then we don't restore config space of the subordinate
> devices and at best we need to remove and rescan those devices.

I wasn't going to say it, but I have encountered the setpci method used
in the wild, and the results are not always consistent. :) Having the
kernel involved is safer, though you should still use it carefully.

> As Keith notes in his reply, we can also effectively trigger this same
> thing through vfio-pci, so I think we're only making it a little easier
> to accomplish through this sysfs attribute.  Yes, bad things can happen
> if we were to reset a bus of running devices, but I don't know that
> it's any more bad than resetting each of those devices individually.

If your drivers implement the .reset_prepare and .reset_done callbacks,
it's actually okay (in theory) to do this on a bus of running devices.

Might even be worth it to emit a warning if you reset running devices
that don't implement the callbacks. ?

> I would agree that "reset" is not a great name since we're resetting
> the subordinate devices and not the bridge device under which this
> attribute would appear.  Seems there should also be an update to
> Documentation/ABI/testing/sysfs-bus-pci in this patch too.  Thanks,

I'll spin out a v2 soon to change these:
  
  Move the attribute to pci_dev bridges only (it's actually easier to
  find that in sysfs compared to pci_bus attributes, IMO)

  Rename the attribute to "reset_subordinate"

  Add the attribute to pci sysfs Documentation.
diff mbox series

Patch

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 5d0f4db1cab78..616d64f12da4d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -521,6 +521,28 @@  static ssize_t bus_rescan_store(struct device *dev,
 static struct device_attribute dev_attr_bus_rescan = __ATTR(rescan, 0200, NULL,
 							    bus_rescan_store);
 
+static ssize_t bus_reset_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct pci_bus *bus = to_pci_bus(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		int ret = __pci_reset_bus(bus);
+
+		if (ret)
+			return ret;
+	}
+
+	return count;
+}
+static struct device_attribute dev_attr_bus_reset = __ATTR(reset, 0200, NULL,
+							   bus_reset_store);
+
 #if defined(CONFIG_PM) && defined(CONFIG_ACPI)
 static ssize_t d3cold_allowed_store(struct device *dev,
 				    struct device_attribute *attr,
@@ -638,6 +660,7 @@  static struct attribute *pcie_dev_attrs[] = {
 
 static struct attribute *pcibus_attrs[] = {
 	&dev_attr_bus_rescan.attr,
+	&dev_attr_bus_reset.attr,
 	&dev_attr_cpuaffinity.attr,
 	&dev_attr_cpulistaffinity.attr,
 	NULL,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7d85c04fbba2a..338dfcd983f1e 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5880,7 +5880,7 @@  EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
  *
  * Same as above except return -EAGAIN if the bus cannot be locked
  */
-static int __pci_reset_bus(struct pci_bus *bus)
+int __pci_reset_bus(struct pci_bus *bus)
 {
 	int rc;
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 14d00ce45bfa9..1cdc2c9547a7e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -104,6 +104,7 @@  bool pci_reset_supported(struct pci_dev *dev);
 void pci_init_reset_methods(struct pci_dev *dev);
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
+int __pci_reset_bus(struct pci_bus *bus);
 
 struct pci_cap_saved_data {
 	u16		cap_nr;