diff mbox series

[v10,4/8] PCI/sysfs: Allow userspace to query and set device reset mechanism

Message ID 20210709123813.8700-5-ameynarkhede03@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Bjorn Helgaas
Headers show
Series Expose and manage PCI device reset | expand

Commit Message

Amey Narkhede July 9, 2021, 12:38 p.m. UTC
Add reset_method sysfs attribute to enable user to query and set user
preferred device reset methods and their ordering.

Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
---
 Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
 drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
 2 files changed, 122 insertions(+)

Comments

Bjorn Helgaas July 27, 2021, 11:28 p.m. UTC | #1
On Fri, Jul 09, 2021 at 06:08:09PM +0530, Amey Narkhede wrote:
> Add reset_method sysfs attribute to enable user to query and set user
> preferred device reset methods and their ordering.
> 
> Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
> ---
>  Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
>  drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
>  2 files changed, 122 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> index ef00fada2..43f4e33c7 100644
> --- a/Documentation/ABI/testing/sysfs-bus-pci
> +++ b/Documentation/ABI/testing/sysfs-bus-pci
> @@ -121,6 +121,25 @@ Description:
>  		child buses, and re-discover devices removed earlier
>  		from this part of the device tree.
>  
> +What:		/sys/bus/pci/devices/.../reset_method
> +Date:		March 2021
> +Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
> +Description:
> +		Some devices allow an individual function to be reset
> +		without affecting other functions in the same slot.
> +
> +		For devices that have this support, a file named
> +		reset_method will be present in sysfs. Initially reading
> +		this file will give names of the device supported reset
> +		methods and their ordering. After write, this file will
> +		give names and ordering of currently enabled reset methods.
> +		Writing the name or comma separated list of names of any of
> +		the device supported reset methods to this file will set
> +		the reset methods and their ordering to be used when
> +		resetting the device. Writing empty string to this file
> +		will disable ability to reset the device and writing
> +		"default" will return to the original value.
> +
>  What:		/sys/bus/pci/devices/.../reset
>  Date:		July 2009
>  Contact:	Michael S. Tsirkin <mst@redhat.com>
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 316f70c3e..8a740e211 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -1334,6 +1334,108 @@ static const struct attribute_group pci_dev_rom_attr_group = {
>  	.is_bin_visible = pci_dev_rom_attr_is_visible,
>  };
>  
> +static ssize_t reset_method_show(struct device *dev,
> +				 struct device_attribute *attr,
> +				 char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	ssize_t len = 0;
> +	int i, idx;
> +
> +	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
> +		idx = pdev->reset_methods[i];
> +		if (!idx)

I know it's totally OCD, but can you use the same "m" for indexing
pci_reset_fn_methods[] that you used in __pci_reset_function_locked()
and ... oops, I meant to say pci_init_reset_methods(), but there you
used "i".  Maybe pci_init_reset_methods() could use "m" as well, and
maybe it could use "i" instead of "n" to index dev->reset_methods[]?

> +			break;
> +
> +		len += sysfs_emit_at(buf, len, "%s%s", len ? "," : "",
> +				     pci_reset_fn_methods[idx].name);

Maybe separate with spaces instead of commas?  E.g.,

  device_specific flr pm bus

instead of

  device_specific,flr,pm,bus

I think the former is less error-prone when parsing.  Easier to split
in a shell script, for example.

> +	}
> +
> +	if (len)
> +		len += sysfs_emit_at(buf, len, "\n");
> +
> +	return len;
> +}
> +
> +static ssize_t reset_method_store(struct device *dev,
> +				  struct device_attribute *attr,
> +				  const char *buf, size_t count)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	int n = 0;
> +	char *name, *options = NULL;
> +	u8 reset_methods[PCI_NUM_RESET_METHODS] = { 0 };
> +
> +	if (count >= (PAGE_SIZE - 1))
> +		return -EINVAL;

I'm not the sysfs expert, but surely the sysfs infrastructure already
guarantees this?

> +
> +	if (sysfs_streq(buf, "")) {
> +		pci_warn(pdev, "All device reset methods disabled by user");

Can we just do:

  pdev->reset_methods[0] = 0;

here and dispense with the memcpy()?

> +		goto set_reset_methods;
> +	}
> +
> +	if (sysfs_streq(buf, "default")) {
> +		pci_init_reset_methods(pdev);
> +		return count;
> +	}
> +
> +	options = kstrndup(buf, count, GFP_KERNEL);

I assume the kstrndup() is because strsep() writes into the buffer?
Aren't we allowed to write into the buffer we get from sysfs?  Does
the user ever see the buffer contents again?  I would think sysfs
would have already done a copy_from_user() or whatever.

> +	if (!options)
> +		return -ENOMEM;
> +
> +	while ((name = strsep(&options, ",")) != NULL) {

*If* you change the show to use spaces, obviously this would have to
change as well (as well as the doc).

> +		int i;
> +
> +		if (sysfs_streq(name, ""))
> +			continue;
> +
> +		name = strim(name);
> +
> +		for (i = 1; i < PCI_NUM_RESET_METHODS; i++) {
> +			if (sysfs_streq(name, pci_reset_fn_methods[i].name) &&
> +			    !pci_reset_fn_methods[i].reset_fn(pdev, 1)) {
> +				reset_methods[n++] = i;

Can we build this directly in pdev->reset_methods[] so we don't need
the memcpy() below?

> +				break;
> +			}
> +		}

Same "m" for indexing pci_reset_fn_methods[], "i" for
pdev->reset_methods[] comment here.

> +		if (i == PCI_NUM_RESET_METHODS) {
> +			kfree(options);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	if (!pci_reset_fn_methods[1].reset_fn(pdev, 1) && reset_methods[0] != 1)
> +		pci_warn(pdev, "Device specific reset disabled/de-prioritized by user");

Hmmm.  I sort of see the point here, but I wish we didn't have the
implicit dependency on pci_reset_fn_methods[1] being
pci_dev_specific_reset().

I know we've talked about this before.  I'm still not 100% sure either
of these warnings is worthwhile, especially since we're not *using*
the reset here.  It might be useful at the point where we try to *do*
a reset.  I dunno.  Maybe this is the best place since this is where
the user potentially screwed up.

> +set_reset_methods:
> +	memcpy(pdev->reset_methods, reset_methods, sizeof(reset_methods));
> +	kfree(options);
> +	return count;
> +}
> +static DEVICE_ATTR_RW(reset_method);
> +
> +static struct attribute *pci_dev_reset_method_attrs[] = {
> +	&dev_attr_reset_method.attr,
> +	NULL,
> +};
> +
> +static umode_t pci_dev_reset_method_attr_is_visible(struct kobject *kobj,
> +						    struct attribute *a, int n)
> +{
> +	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
> +
> +	if (!pci_reset_supported(pdev))
> +		return 0;
> +
> +	return a->mode;
> +}
> +
> +static const struct attribute_group pci_dev_reset_method_attr_group = {
> +	.attrs = pci_dev_reset_method_attrs,
> +	.is_visible = pci_dev_reset_method_attr_is_visible,
> +};
> +
>  static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
>  			   const char *buf, size_t count)
>  {
> @@ -1491,6 +1593,7 @@ const struct attribute_group *pci_dev_groups[] = {
>  	&pci_dev_config_attr_group,
>  	&pci_dev_rom_attr_group,
>  	&pci_dev_reset_attr_group,
> +	&pci_dev_reset_method_attr_group,
>  	&pci_dev_vpd_attr_group,
>  #ifdef CONFIG_DMI
>  	&pci_dev_smbios_attr_group,
> -- 
> 2.32.0
>
Krzysztof Wilczyński July 28, 2021, 1:27 a.m. UTC | #2
Hi Bjorn,

[...]
> > +	if (count >= (PAGE_SIZE - 1))
> > +		return -EINVAL;
> 
> I'm not the sysfs expert, but surely the sysfs infrastructure already
> guarantees this?

We don't need to store any value, since we are processing the input from
the userspace, thus ensuring that we have room for the newline is not
needed, especially since the show() function dynamically builds the
content to show, so indeed this check can be dropped.

To add, there aren't any guarantees other from sysfs than we get a up to
a PAGE_SIZE worth of data in the buffer.

[...]
> > +	options = kstrndup(buf, count, GFP_KERNEL);
> 
> I assume the kstrndup() is because strsep() writes into the buffer?

Yes, Amey added kstrndup() in v6 following my recommendation as per:

  https://lore.kernel.org/linux-pci/20210606125800.GA76573@rocinante.localdomain/

This was to avoid removing the const quantifier through a type cast
given that the signature of the function denotes that the buffer is
a pointer to immutable string, as per:

  https://elixir.bootlin.com/linux/v5.14-rc3/source/include/linux/device/driver.h#L137

Some other sysfs users do employ the cast when using strtok() and I am
not so such it's the right way to do it, as per:

  drivers/s390/net/qeth_l3_sys.c
  232:	tmp = strsep((char **)&buf, "\n");
  
  drivers/media/rc/rc-main.c
  1167:	while ((tmp = strsep((char **)&buf, " \n")) != NULL) {

> Aren't we allowed to write into the buffer we get from sysfs?  Does
> the user ever see the buffer contents again?  I would think sysfs
> would have already done a copy_from_user() or whatever.

I might be wrong about this, but I suppose this might be to stop people
from accidentally freeing the buffer as kernfs_fop_write_iter() would do
it after all the internal housekeeping is done, provided that someone
pays attention to compile time warnings.

	Krzysztof
Bjorn Helgaas July 28, 2021, 3:36 p.m. UTC | #3
On Wed, Jul 28, 2021 at 03:27:40AM +0200, Krzysztof Wilczyński wrote:

> > > +	options = kstrndup(buf, count, GFP_KERNEL);
> > 
> > I assume the kstrndup() is because strsep() writes into the buffer?
> 
> Yes, Amey added kstrndup() in v6 following my recommendation as per:
> 
>   https://lore.kernel.org/linux-pci/20210606125800.GA76573@rocinante.localdomain/
> 
> This was to avoid removing the const quantifier through a type cast
> given that the signature of the function denotes that the buffer is
> a pointer to immutable string, as per:
> 
>   https://elixir.bootlin.com/linux/v5.14-rc3/source/include/linux/device/driver.h#L137

Ah, right, thanks!  Definitely prefer not to cast away the constness.

I guess the strings here are short (<100 chars max), so no big deal to
duplicate them.  Sorry for the noise!
Bjorn Helgaas July 28, 2021, 5:09 p.m. UTC | #4
[+cc Serge, linux-security-module: should we check CAP_SYS_ADMIN or
similar for changing PCI reset mechanisms for a device?]

On Fri, Jul 09, 2021 at 06:08:09PM +0530, Amey Narkhede wrote:
> Add reset_method sysfs attribute to enable user to query and set user
> preferred device reset methods and their ordering.
> 
> Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
> ---
>  Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
>  drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
>  2 files changed, 122 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> index ef00fada2..43f4e33c7 100644
> --- a/Documentation/ABI/testing/sysfs-bus-pci
> +++ b/Documentation/ABI/testing/sysfs-bus-pci
> @@ -121,6 +121,25 @@ Description:
>  		child buses, and re-discover devices removed earlier
>  		from this part of the device tree.
>  
> +What:		/sys/bus/pci/devices/.../reset_method
> +Date:		March 2021
> +Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
> +Description:
> +		Some devices allow an individual function to be reset
> +		without affecting other functions in the same slot.
> +
> +		For devices that have this support, a file named
> +		reset_method will be present in sysfs. Initially reading
> +		this file will give names of the device supported reset
> +		methods and their ordering. After write, this file will
> +		give names and ordering of currently enabled reset methods.
> +		Writing the name or comma separated list of names of any of
> +		the device supported reset methods to this file will set
> +		the reset methods and their ordering to be used when
> +		resetting the device. Writing empty string to this file
> +		will disable ability to reset the device and writing
> +		"default" will return to the original value.
> +
>  What:		/sys/bus/pci/devices/.../reset
>  Date:		July 2009
>  Contact:	Michael S. Tsirkin <mst@redhat.com>
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 316f70c3e..8a740e211 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -1334,6 +1334,108 @@ static const struct attribute_group pci_dev_rom_attr_group = {
>  	.is_bin_visible = pci_dev_rom_attr_is_visible,
>  };
>  
> +static ssize_t reset_method_show(struct device *dev,
> +				 struct device_attribute *attr,
> +				 char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	ssize_t len = 0;
> +	int i, idx;
> +
> +	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
> +		idx = pdev->reset_methods[i];
> +		if (!idx)
> +			break;
> +
> +		len += sysfs_emit_at(buf, len, "%s%s", len ? "," : "",
> +				     pci_reset_fn_methods[idx].name);
> +	}
> +
> +	if (len)
> +		len += sysfs_emit_at(buf, len, "\n");
> +
> +	return len;
> +}
> +
> +static ssize_t reset_method_store(struct device *dev,
> +				  struct device_attribute *attr,
> +				  const char *buf, size_t count)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	int n = 0;
> +	char *name, *options = NULL;
> +	u8 reset_methods[PCI_NUM_RESET_METHODS] = { 0 };

Should this check "capable(CAP_SYS_ADMIN)" or similar?  The sysfs file
is mode 0644, so writable only by root.

I do note that Documentation/process/adding-syscalls.rst suggests
"avoid adding new uses of the already overly-general CAP_SYS_ADMIN
capability."  But CAP_SYS_ADMIN is used for all the other checks in
pci-sysfs.c.

> +	if (count >= (PAGE_SIZE - 1))
> +		return -EINVAL;
> +
> +	if (sysfs_streq(buf, "")) {
> +		pci_warn(pdev, "All device reset methods disabled by user");
> +		goto set_reset_methods;
> +	}
> +
> +	if (sysfs_streq(buf, "default")) {
> +		pci_init_reset_methods(pdev);
> +		return count;
> +	}
> +
> +	options = kstrndup(buf, count, GFP_KERNEL);
> +	if (!options)
> +		return -ENOMEM;
> +
> +	while ((name = strsep(&options, ",")) != NULL) {
> +		int i;
> +
> +		if (sysfs_streq(name, ""))
> +			continue;
> +
> +		name = strim(name);
> +
> +		for (i = 1; i < PCI_NUM_RESET_METHODS; i++) {
> +			if (sysfs_streq(name, pci_reset_fn_methods[i].name) &&
> +			    !pci_reset_fn_methods[i].reset_fn(pdev, 1)) {
> +				reset_methods[n++] = i;
> +				break;
> +			}
> +		}
> +
> +		if (i == PCI_NUM_RESET_METHODS) {
> +			kfree(options);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	if (!pci_reset_fn_methods[1].reset_fn(pdev, 1) && reset_methods[0] != 1)
> +		pci_warn(pdev, "Device specific reset disabled/de-prioritized by user");
> +
> +set_reset_methods:
> +	memcpy(pdev->reset_methods, reset_methods, sizeof(reset_methods));
> +	kfree(options);
> +	return count;
> +}
> +static DEVICE_ATTR_RW(reset_method);
> +
> +static struct attribute *pci_dev_reset_method_attrs[] = {
> +	&dev_attr_reset_method.attr,
> +	NULL,
> +};
> +
> +static umode_t pci_dev_reset_method_attr_is_visible(struct kobject *kobj,
> +						    struct attribute *a, int n)
> +{
> +	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
> +
> +	if (!pci_reset_supported(pdev))
> +		return 0;
> +
> +	return a->mode;
> +}
> +
> +static const struct attribute_group pci_dev_reset_method_attr_group = {
> +	.attrs = pci_dev_reset_method_attrs,
> +	.is_visible = pci_dev_reset_method_attr_is_visible,
> +};
> +
>  static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
>  			   const char *buf, size_t count)
>  {
> @@ -1491,6 +1593,7 @@ const struct attribute_group *pci_dev_groups[] = {
>  	&pci_dev_config_attr_group,
>  	&pci_dev_rom_attr_group,
>  	&pci_dev_reset_attr_group,
> +	&pci_dev_reset_method_attr_group,
>  	&pci_dev_vpd_attr_group,
>  #ifdef CONFIG_DMI
>  	&pci_dev_smbios_attr_group,
> -- 
> 2.32.0
>
Amey Narkhede July 28, 2021, 5:59 p.m. UTC | #5
On 21/07/27 06:28PM, Bjorn Helgaas wrote:
> On Fri, Jul 09, 2021 at 06:08:09PM +0530, Amey Narkhede wrote:
> > Add reset_method sysfs attribute to enable user to query and set user
> > preferred device reset methods and their ordering.
> >
> > Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
> > ---
> >  Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
> >  drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
> >  2 files changed, 122 insertions(+)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> > index ef00fada2..43f4e33c7 100644
> > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > @@ -121,6 +121,25 @@ Description:
> >  		child buses, and re-discover devices removed earlier
> >  		from this part of the device tree.
> >
> > +What:		/sys/bus/pci/devices/.../reset_method
> > +Date:		March 2021
> > +Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
> > +Description:
> > +		Some devices allow an individual function to be reset
> > +		without affecting other functions in the same slot.
> > +
> > +		For devices that have this support, a file named
> > +		reset_method will be present in sysfs. Initially reading
> > +		this file will give names of the device supported reset
> > +		methods and their ordering. After write, this file will
> > +		give names and ordering of currently enabled reset methods.
> > +		Writing the name or comma separated list of names of any of
> > +		the device supported reset methods to this file will set
> > +		the reset methods and their ordering to be used when
> > +		resetting the device. Writing empty string to this file
> > +		will disable ability to reset the device and writing
> > +		"default" will return to the original value.
> > +
> >  What:		/sys/bus/pci/devices/.../reset
> >  Date:		July 2009
> >  Contact:	Michael S. Tsirkin <mst@redhat.com>
>
[...]

> > +		int i;
> > +
> > +		if (sysfs_streq(name, ""))
> > +			continue;
> > +
> > +		name = strim(name);
> > +
> > +		for (i = 1; i < PCI_NUM_RESET_METHODS; i++) {
> > +			if (sysfs_streq(name, pci_reset_fn_methods[i].name) &&
> > +			    !pci_reset_fn_methods[i].reset_fn(pdev, 1)) {
> > +				reset_methods[n++] = i;
>
> Can we build this directly in pdev->reset_methods[] so we don't need
> the memcpy() below?
>
This is to avoid writing partial values directly to dev->reset_methods.
So for example if user writes flr,unsupported_value then
dev->reset_methods should not be modified even though flr is valid reset
method in this example to avoid partial writes. Either operation(in this
case writing supported reset methods to reset_method attr) succeeds
completely or it fails othewise.

Thanks,
Amey

[...]
Bjorn Helgaas July 28, 2021, 6:13 p.m. UTC | #6
On Wed, Jul 28, 2021 at 11:29:50PM +0530, Amey Narkhede wrote:
> On 21/07/27 06:28PM, Bjorn Helgaas wrote:
> > On Fri, Jul 09, 2021 at 06:08:09PM +0530, Amey Narkhede wrote:
> > > Add reset_method sysfs attribute to enable user to query and set user
> > > preferred device reset methods and their ordering.
> > >
> > > Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
> > > ---
> > >  Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
> > >  drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
> > >  2 files changed, 122 insertions(+)
> > >
> > > diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> > > index ef00fada2..43f4e33c7 100644
> > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > @@ -121,6 +121,25 @@ Description:
> > >  		child buses, and re-discover devices removed earlier
> > >  		from this part of the device tree.
> > >
> > > +What:		/sys/bus/pci/devices/.../reset_method
> > > +Date:		March 2021
> > > +Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
> > > +Description:
> > > +		Some devices allow an individual function to be reset
> > > +		without affecting other functions in the same slot.
> > > +
> > > +		For devices that have this support, a file named
> > > +		reset_method will be present in sysfs. Initially reading
> > > +		this file will give names of the device supported reset
> > > +		methods and their ordering. After write, this file will
> > > +		give names and ordering of currently enabled reset methods.
> > > +		Writing the name or comma separated list of names of any of
> > > +		the device supported reset methods to this file will set
> > > +		the reset methods and their ordering to be used when
> > > +		resetting the device. Writing empty string to this file
> > > +		will disable ability to reset the device and writing
> > > +		"default" will return to the original value.
> > > +
> > >  What:		/sys/bus/pci/devices/.../reset
> > >  Date:		July 2009
> > >  Contact:	Michael S. Tsirkin <mst@redhat.com>
> >
> [...]
> 
> > > +		int i;
> > > +
> > > +		if (sysfs_streq(name, ""))
> > > +			continue;
> > > +
> > > +		name = strim(name);
> > > +
> > > +		for (i = 1; i < PCI_NUM_RESET_METHODS; i++) {
> > > +			if (sysfs_streq(name, pci_reset_fn_methods[i].name) &&
> > > +			    !pci_reset_fn_methods[i].reset_fn(pdev, 1)) {
> > > +				reset_methods[n++] = i;
> >
> > Can we build this directly in pdev->reset_methods[] so we don't need
> > the memcpy() below?
> >
> This is to avoid writing partial values directly to dev->reset_methods.
> So for example if user writes flr,unsupported_value then
> dev->reset_methods should not be modified even though flr is valid reset
> method in this example to avoid partial writes. Either operation(in this
> case writing supported reset methods to reset_method attr) succeeds
> completely or it fails othewise.

I guess the question is, if somebody writes

  flr junk bus

and we set the supported methods to "flr bus", is that OK?  It seems
OK to me; obviously we have to protect ourselves from attack, but
over-zealous checking can make things unnecessarily complicated.
Amey Narkhede July 28, 2021, 6:58 p.m. UTC | #7
On 21/07/28 01:13PM, Bjorn Helgaas wrote:
> On Wed, Jul 28, 2021 at 11:29:50PM +0530, Amey Narkhede wrote:
> > On 21/07/27 06:28PM, Bjorn Helgaas wrote:
> > > On Fri, Jul 09, 2021 at 06:08:09PM +0530, Amey Narkhede wrote:
> > > > Add reset_method sysfs attribute to enable user to query and set user
> > > > preferred device reset methods and their ordering.
> > > >
> > > > Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
> > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
> > > > ---
> > > >  Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
> > > >  drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
> > > >  2 files changed, 122 insertions(+)
> > > >
> > > > diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> > > > index ef00fada2..43f4e33c7 100644
> > > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > > @@ -121,6 +121,25 @@ Description:
> > > >  		child buses, and re-discover devices removed earlier
> > > >  		from this part of the device tree.
> > > >
> > > > +What:		/sys/bus/pci/devices/.../reset_method
> > > > +Date:		March 2021
> > > > +Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
> > > > +Description:
> > > > +		Some devices allow an individual function to be reset
> > > > +		without affecting other functions in the same slot.
> > > > +
> > > > +		For devices that have this support, a file named
> > > > +		reset_method will be present in sysfs. Initially reading
> > > > +		this file will give names of the device supported reset
> > > > +		methods and their ordering. After write, this file will
> > > > +		give names and ordering of currently enabled reset methods.
> > > > +		Writing the name or comma separated list of names of any of
> > > > +		the device supported reset methods to this file will set
> > > > +		the reset methods and their ordering to be used when
> > > > +		resetting the device. Writing empty string to this file
> > > > +		will disable ability to reset the device and writing
> > > > +		"default" will return to the original value.
> > > > +
> > > >  What:		/sys/bus/pci/devices/.../reset
> > > >  Date:		July 2009
> > > >  Contact:	Michael S. Tsirkin <mst@redhat.com>
> > >
> > [...]
> >
> > > > +		int i;
> > > > +
> > > > +		if (sysfs_streq(name, ""))
> > > > +			continue;
> > > > +
> > > > +		name = strim(name);
> > > > +
> > > > +		for (i = 1; i < PCI_NUM_RESET_METHODS; i++) {
> > > > +			if (sysfs_streq(name, pci_reset_fn_methods[i].name) &&
> > > > +			    !pci_reset_fn_methods[i].reset_fn(pdev, 1)) {
> > > > +				reset_methods[n++] = i;
> > >
> > > Can we build this directly in pdev->reset_methods[] so we don't need
> > > the memcpy() below?
> > >
> > This is to avoid writing partial values directly to dev->reset_methods.
> > So for example if user writes flr,unsupported_value then
> > dev->reset_methods should not be modified even though flr is valid reset
> > method in this example to avoid partial writes. Either operation(in this
> > case writing supported reset methods to reset_method attr) succeeds
> > completely or it fails othewise.
>
> I guess the question is, if somebody writes
>
>   flr junk bus
>
> and we set the supported methods to "flr bus", is that OK?  It seems
> OK to me; obviously we have to protect ourselves from attack, but
> over-zealous checking can make things unnecessarily complicated.
The problem is once we encounter "junk" we return -EINVAL so in your
example we only set flr.

Thanks,
Amey
Bjorn Helgaas July 28, 2021, 8:18 p.m. UTC | #8
On Thu, Jul 29, 2021 at 12:28:31AM +0530, Amey Narkhede wrote:
> On 21/07/28 01:13PM, Bjorn Helgaas wrote:
> > On Wed, Jul 28, 2021 at 11:29:50PM +0530, Amey Narkhede wrote:
> > > On 21/07/27 06:28PM, Bjorn Helgaas wrote:
> > > > On Fri, Jul 09, 2021 at 06:08:09PM +0530, Amey Narkhede wrote:
> > > > > Add reset_method sysfs attribute to enable user to query and set user
> > > > > preferred device reset methods and their ordering.
> > > > >
> > > > > Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
> > > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > > Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
> > > > > ---
> > > > >  Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
> > > > >  drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
> > > > >  2 files changed, 122 insertions(+)
> > > > >
> > > > > diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> > > > > index ef00fada2..43f4e33c7 100644
> > > > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > > > @@ -121,6 +121,25 @@ Description:
> > > > >  		child buses, and re-discover devices removed earlier
> > > > >  		from this part of the device tree.
> > > > >
> > > > > +What:		/sys/bus/pci/devices/.../reset_method
> > > > > +Date:		March 2021
> > > > > +Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
> > > > > +Description:
> > > > > +		Some devices allow an individual function to be reset
> > > > > +		without affecting other functions in the same slot.
> > > > > +
> > > > > +		For devices that have this support, a file named
> > > > > +		reset_method will be present in sysfs. Initially reading
> > > > > +		this file will give names of the device supported reset
> > > > > +		methods and their ordering. After write, this file will
> > > > > +		give names and ordering of currently enabled reset methods.
> > > > > +		Writing the name or comma separated list of names of any of
> > > > > +		the device supported reset methods to this file will set
> > > > > +		the reset methods and their ordering to be used when
> > > > > +		resetting the device. Writing empty string to this file
> > > > > +		will disable ability to reset the device and writing
> > > > > +		"default" will return to the original value.
> > > > > +
> > > > >  What:		/sys/bus/pci/devices/.../reset
> > > > >  Date:		July 2009
> > > > >  Contact:	Michael S. Tsirkin <mst@redhat.com>
> > > >
> > > [...]
> > >
> > > > > +		int i;
> > > > > +
> > > > > +		if (sysfs_streq(name, ""))
> > > > > +			continue;
> > > > > +
> > > > > +		name = strim(name);
> > > > > +
> > > > > +		for (i = 1; i < PCI_NUM_RESET_METHODS; i++) {
> > > > > +			if (sysfs_streq(name, pci_reset_fn_methods[i].name) &&
> > > > > +			    !pci_reset_fn_methods[i].reset_fn(pdev, 1)) {
> > > > > +				reset_methods[n++] = i;
> > > >
> > > > Can we build this directly in pdev->reset_methods[] so we don't need
> > > > the memcpy() below?
> > > >
> > > This is to avoid writing partial values directly to dev->reset_methods.
> > > So for example if user writes flr,unsupported_value then
> > > dev->reset_methods should not be modified even though flr is valid reset
> > > method in this example to avoid partial writes. Either operation(in this
> > > case writing supported reset methods to reset_method attr) succeeds
> > > completely or it fails othewise.
> >
> > I guess the question is, if somebody writes
> >
> >   flr junk bus
> >
> > and we set the supported methods to "flr bus", is that OK?  It seems
> > OK to me; obviously we have to protect ourselves from attack, but
> > over-zealous checking can make things unnecessarily complicated.
>
> The problem is once we encounter "junk" we return -EINVAL so in your
> example we only set flr.

We're still talking about whether we need the memcpy().  We can decide
whether it's the right thing to return -EINVAL if we encounter "junk".

Maybe it's OK if we set it to "flr" and ignore everything after
"junk"?  Or maybe it's OK to ignore unsupported values and set it to
"flr bus"?
Amey Narkhede July 31, 2021, 7:15 p.m. UTC | #9
On 21/07/27 06:28PM, Bjorn Helgaas wrote:
> On Fri, Jul 09, 2021 at 06:08:09PM +0530, Amey Narkhede wrote:
> > Add reset_method sysfs attribute to enable user to query and set user
> > preferred device reset methods and their ordering.
> >
> > Co-developed-by: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: Amey Narkhede <ameynarkhede03@gmail.com>
> > ---
> >  Documentation/ABI/testing/sysfs-bus-pci |  19 +++++
> >  drivers/pci/pci-sysfs.c                 | 103 ++++++++++++++++++++++++
> >  2 files changed, 122 insertions(+)
> >
[...]

> > +		if (i == PCI_NUM_RESET_METHODS) {
> > +			kfree(options);
> > +			return -EINVAL;
> > +		}
> > +	}
> > +
> > +	if (!pci_reset_fn_methods[1].reset_fn(pdev, 1) && reset_methods[0] != 1)
> > +		pci_warn(pdev, "Device specific reset disabled/de-prioritized by user");
>
> Hmmm.  I sort of see the point here, but I wish we didn't have the
> implicit dependency on pci_reset_fn_methods[1] being
> pci_dev_specific_reset().
>
> I know we've talked about this before.  I'm still not 100% sure either
> of these warnings is worthwhile, especially since we're not *using*
> the reset here.  It might be useful at the point where we try to *do*
> a reset.  I dunno.  Maybe this is the best place since this is where
> the user potentially screwed up.
>
I agree this is the best place for the warning as this where potentially
broken reset methods may get called/prioritized. We can move this check
to __pci_reset_function_locked() if you want.

> > +set_reset_methods:
> > +	memcpy(pdev->reset_methods, reset_methods, sizeof(reset_methods));
> > +	kfree(options);
> > +	return count;
> > +}
> > +static DEVICE_ATTR_RW(reset_method);
> > +
> > +static struct attribute *pci_dev_reset_method_attrs[] = {
> > +	&dev_attr_reset_method.attr,
> > +	NULL,
> > +};
> > +
> > +static umode_t pci_dev_reset_method_attr_is_visible(struct kobject *kobj,
> > +						    struct attribute *a, int n)
> > +{
> > +	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
> > +
> > +	if (!pci_reset_supported(pdev))
> > +		return 0;
> > +
> > +	return a->mode;
> > +}
> > +
> > +static const struct attribute_group pci_dev_reset_method_attr_group = {
> > +	.attrs = pci_dev_reset_method_attrs,
> > +	.is_visible = pci_dev_reset_method_attr_is_visible,
> > +};
> > +
> >  static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
> >  			   const char *buf, size_t count)
> >  {
> > @@ -1491,6 +1593,7 @@ const struct attribute_group *pci_dev_groups[] = {
> >  	&pci_dev_config_attr_group,
> >  	&pci_dev_rom_attr_group,
> >  	&pci_dev_reset_attr_group,
> > +	&pci_dev_reset_method_attr_group,
> >  	&pci_dev_vpd_attr_group,
> >  #ifdef CONFIG_DMI
> >  	&pci_dev_smbios_attr_group,
> > --
> > 2.32.0
> >
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index ef00fada2..43f4e33c7 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -121,6 +121,25 @@  Description:
 		child buses, and re-discover devices removed earlier
 		from this part of the device tree.
 
+What:		/sys/bus/pci/devices/.../reset_method
+Date:		March 2021
+Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
+Description:
+		Some devices allow an individual function to be reset
+		without affecting other functions in the same slot.
+
+		For devices that have this support, a file named
+		reset_method will be present in sysfs. Initially reading
+		this file will give names of the device supported reset
+		methods and their ordering. After write, this file will
+		give names and ordering of currently enabled reset methods.
+		Writing the name or comma separated list of names of any of
+		the device supported reset methods to this file will set
+		the reset methods and their ordering to be used when
+		resetting the device. Writing empty string to this file
+		will disable ability to reset the device and writing
+		"default" will return to the original value.
+
 What:		/sys/bus/pci/devices/.../reset
 Date:		July 2009
 Contact:	Michael S. Tsirkin <mst@redhat.com>
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 316f70c3e..8a740e211 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1334,6 +1334,108 @@  static const struct attribute_group pci_dev_rom_attr_group = {
 	.is_bin_visible = pci_dev_rom_attr_is_visible,
 };
 
+static ssize_t reset_method_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	ssize_t len = 0;
+	int i, idx;
+
+	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
+		idx = pdev->reset_methods[i];
+		if (!idx)
+			break;
+
+		len += sysfs_emit_at(buf, len, "%s%s", len ? "," : "",
+				     pci_reset_fn_methods[idx].name);
+	}
+
+	if (len)
+		len += sysfs_emit_at(buf, len, "\n");
+
+	return len;
+}
+
+static ssize_t reset_method_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int n = 0;
+	char *name, *options = NULL;
+	u8 reset_methods[PCI_NUM_RESET_METHODS] = { 0 };
+
+	if (count >= (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	if (sysfs_streq(buf, "")) {
+		pci_warn(pdev, "All device reset methods disabled by user");
+		goto set_reset_methods;
+	}
+
+	if (sysfs_streq(buf, "default")) {
+		pci_init_reset_methods(pdev);
+		return count;
+	}
+
+	options = kstrndup(buf, count, GFP_KERNEL);
+	if (!options)
+		return -ENOMEM;
+
+	while ((name = strsep(&options, ",")) != NULL) {
+		int i;
+
+		if (sysfs_streq(name, ""))
+			continue;
+
+		name = strim(name);
+
+		for (i = 1; i < PCI_NUM_RESET_METHODS; i++) {
+			if (sysfs_streq(name, pci_reset_fn_methods[i].name) &&
+			    !pci_reset_fn_methods[i].reset_fn(pdev, 1)) {
+				reset_methods[n++] = i;
+				break;
+			}
+		}
+
+		if (i == PCI_NUM_RESET_METHODS) {
+			kfree(options);
+			return -EINVAL;
+		}
+	}
+
+	if (!pci_reset_fn_methods[1].reset_fn(pdev, 1) && reset_methods[0] != 1)
+		pci_warn(pdev, "Device specific reset disabled/de-prioritized by user");
+
+set_reset_methods:
+	memcpy(pdev->reset_methods, reset_methods, sizeof(reset_methods));
+	kfree(options);
+	return count;
+}
+static DEVICE_ATTR_RW(reset_method);
+
+static struct attribute *pci_dev_reset_method_attrs[] = {
+	&dev_attr_reset_method.attr,
+	NULL,
+};
+
+static umode_t pci_dev_reset_method_attr_is_visible(struct kobject *kobj,
+						    struct attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	if (!pci_reset_supported(pdev))
+		return 0;
+
+	return a->mode;
+}
+
+static const struct attribute_group pci_dev_reset_method_attr_group = {
+	.attrs = pci_dev_reset_method_attrs,
+	.is_visible = pci_dev_reset_method_attr_is_visible,
+};
+
 static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
 			   const char *buf, size_t count)
 {
@@ -1491,6 +1593,7 @@  const struct attribute_group *pci_dev_groups[] = {
 	&pci_dev_config_attr_group,
 	&pci_dev_rom_attr_group,
 	&pci_dev_reset_attr_group,
+	&pci_dev_reset_method_attr_group,
 	&pci_dev_vpd_attr_group,
 #ifdef CONFIG_DMI
 	&pci_dev_smbios_attr_group,