diff mbox series

[RFC,3/3] PCI: Expose PCIe Resizable BAR support via sysfs

Message ID 166067883598.1885802.7663904087127986133.stgit@omen (mailing list archive)
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series PCI: Expose resource resizing through sysfs | expand

Commit Message

Alex Williamson Aug. 16, 2022, 7:41 p.m. UTC
This proposes a simple sysfs interface to Resizable BAR support,
largely for the purposes of assigning such devices to a VM through
VFIO.  Resizable BARs present a difficult feature to expose to a VM
through emulation, as resizing a BAR is done on the host.  It can
fail, and often does, but we have no means via emulation of a PCIe
REBAR capability to handle the error cases.

A vfio-pci specific ioctl interface is also cumbersome as there are
often multiple devices within the same bridge aperture and handling
them is a challenge.  In the interface proposed here, expanding a
BAR potentially requires such devices to be soft-removed during the
resize operation and rescanned after, in order for all the necessary
resources to be released.  A pci-sysfs interface is also more
universal than a vfio specific interface.

Please see the ABI documentation update for usage.

Cc: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

NB, I realize the read value of the syfs attribute provides two values,
the bitmap of possible sizes and the current size.  There are a number
of ways to determine the current size, including stat(1) on the
resourceN file, but I found this output to be useful while developing
the interface and provides consistency with the store value to the
attribute.  Suggestions welcome for better semantics.

 Documentation/ABI/testing/sysfs-bus-pci |   27 +++++++
 drivers/pci/pci-sysfs.c                 |  118 +++++++++++++++++++++++++++++++
 include/linux/pci.h                     |    1 
 3 files changed, 146 insertions(+)
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 6fc2c2efe8ab..5eea5d89c9f2 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -457,3 +457,30 @@  Description:
 
 		The file is writable if the PF is bound to a driver that
 		implements ->sriov_set_msix_vec_count().
+
+What:		/sys/bus/pci/devices/.../resourceN_resize
+Date:		August 2022
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		These files provide an interface to PCIe Resizable BAR support.
+		A file is created for each BAR resource (N) supported by the
+		PCIe Resizable BAR extended capability of the device.  Reading
+		each file exposes the capability and current setting for the
+		device, ex.
+
+		# cat resource1_resize
+		00000000000001c0:6
+
+		The first field provides the supported sizes bitmap, where
+		bit0 = 1MB, bit1 = 2MB, bit2 = 4MB, etc.  In the above example
+		the devices supports 64MB, 128MB, and 256MB BAR sizes.  The
+		second field provides the current setting, the value 6
+		indicates bit6 is set, which corresponds to 64MB.
+
+		When writing the file, only the latter is used, ex.
+
+		# echo 7 > resource1_resize
+
+		This indicates to set the size value corresponding to bit 7,
+		which is 128MB.  The resulting size is 2 ^ (bit# + 20).  This
+		definition matches the PCIe specification of this capability.
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 9ac92e6a2397..aa59a2de508f 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1143,6 +1143,7 @@  static void pci_remove_resource_files(struct pci_dev *pdev)
 
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		struct bin_attribute *res_attr;
+		struct dev_ext_attribute *resize_attr;
 
 		res_attr = pdev->res_attr[i];
 		if (res_attr) {
@@ -1155,6 +1156,13 @@  static void pci_remove_resource_files(struct pci_dev *pdev)
 			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
 			kfree(res_attr);
 		}
+
+		resize_attr = pdev->res_attr_resize[i];
+		if (resize_attr) {
+			sysfs_remove_file(&pdev->dev.kobj,
+					  &resize_attr->attr.attr);
+			kfree(resize_attr);
+		}
 	}
 }
 
@@ -1208,6 +1216,108 @@  static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
 	return retval;
 }
 
+static ssize_t pci_bar_resize_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct dev_ext_attribute *resize_attr =
+			container_of(attr, struct dev_ext_attribute, attr);
+	int bar = (int)(long)resize_attr->var;
+	ssize_t ret;
+
+	pci_config_pm_runtime_get(pdev);
+
+	/*
+	 * pci_rebar_get_possible_sizes() only currently reads supported sizes
+	 * from the capability register and therefore returns a u32.  The spec
+	 * allows additional supported bits in the control register, which
+	 * then exceeds 32bit.  Expose a u64 to userspace for future compat.
+	 */
+	ret = sysfs_emit(buf, "%016llx:%d\n",
+			(u64)pci_rebar_get_possible_sizes(pdev, bar),
+			pci_rebar_get_current_size(pdev, bar));
+
+	pci_config_pm_runtime_put(pdev);
+
+	return ret;
+}
+
+static ssize_t pci_bar_resize_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct dev_ext_attribute *resize_attr =
+			container_of(attr, struct dev_ext_attribute, attr);
+	int ret, i, bar = (int)(long)resize_attr->var;
+	unsigned long size, flags;
+	u16 cmd;
+
+	if (kstrtoul(buf, 0, &size) < 0)
+		return -EINVAL;
+
+	device_lock(dev);
+	if (dev->driver) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	pci_config_pm_runtime_get(pdev);
+
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	pci_write_config_word(pdev, PCI_COMMAND, cmd & ~PCI_COMMAND_MEMORY);
+
+	flags = pci_resource_flags(pdev, bar);
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (pci_resource_len(pdev, i) &&
+		    pci_resource_flags(pdev, i) == flags)
+			pci_release_resource(pdev, i);
+	}
+
+	ret = pci_resize_resource(pdev, bar, size);
+
+	pci_assign_unassigned_bus_resources(pdev->bus);
+
+	pci_write_config_word(pdev, PCI_COMMAND, cmd);
+
+	pci_config_pm_runtime_put(pdev);
+unlock:
+	device_unlock(dev);
+
+	return ret ? ret : count;
+}
+
+static int pci_create_resize_attr(struct pci_dev *pdev, int num)
+{
+	struct dev_ext_attribute *resize_attr;
+	char *resize_attr_name;
+	int retval;
+
+	resize_attr = kzalloc(sizeof(*resize_attr) + 17, GFP_ATOMIC);
+	if (!resize_attr)
+		return -ENOMEM;
+
+	resize_attr_name = (char *)(resize_attr + 1);
+
+	sysfs_attr_init(&resize_attr->attr.attr);
+	sprintf(resize_attr_name, "resource%d_resize", num);
+	resize_attr->attr.attr.name = resize_attr_name;
+	resize_attr->attr.attr.mode = 0600;
+	resize_attr->attr.show = pci_bar_resize_show;
+	resize_attr->attr.store = pci_bar_resize_store;
+	resize_attr->var = (void *)(long)num;
+
+	retval = sysfs_create_file(&pdev->dev.kobj, &resize_attr->attr.attr);
+	if (retval) {
+		kfree(resize_attr);
+		return retval;
+	}
+
+	pdev->res_attr_resize[num] = resize_attr;
+	return 0;
+}
+
 /**
  * pci_create_resource_files - create resource files in sysfs for @dev
  * @pdev: dev in question
@@ -1235,6 +1345,14 @@  static int pci_create_resource_files(struct pci_dev *pdev)
 			pci_remove_resource_files(pdev);
 			return retval;
 		}
+
+		if (pci_rebar_get_current_size(pdev, i) >= 0) {
+			retval = pci_create_resize_attr(pdev, i);
+			if (retval) {
+				pci_remove_resource_files(pdev);
+				return retval;
+			}
+		}
 	}
 	return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 060af91bafcd..9c4db0c5f215 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -470,6 +470,7 @@  struct pci_dev {
 	int		rom_attr_enabled;	/* Display of ROM attribute enabled? */
 	struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
+	struct dev_ext_attribute *res_attr_resize[DEVICE_COUNT_RESOURCE]; /* sysfs file for resizing BAR resources */
 
 #ifdef CONFIG_HOTPLUG_PCI_PCIE
 	unsigned int	broken_cmd_compl:1;	/* No compl for some cmds */