diff mbox series

[v3,3/8] PCI: Restore resource alignment

Message ID 20240807151723.613742-4-stewart.hildebrand@amd.com (mailing list archive)
State Changes Requested
Headers show
Series PCI: Align small BARs | expand

Commit Message

Stewart Hildebrand Aug. 7, 2024, 3:17 p.m. UTC
Devices with alignment specified will lose their alignment in cases when
the bridge resources have been released, e.g. due to insufficient bridge
window size. Restore the alignment.

Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
v2->v3:
* no change

v1->v2:
* capitalize subject text
---
 drivers/pci/setup-bus.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

Comments

Bjorn Helgaas Aug. 8, 2024, 7:28 p.m. UTC | #1
On Wed, Aug 07, 2024 at 11:17:12AM -0400, Stewart Hildebrand wrote:
> Devices with alignment specified will lose their alignment in cases when
> the bridge resources have been released, e.g. due to insufficient bridge
> window size. Restore the alignment.

I guess this fixes a problem when the user has specified
"pci=resource_alignment=..." and we've decided to release and
reallocate a bridge window?  Just looking for a bit more concrete
description of what this problem would look like to a user.

> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> v2->v3:
> * no change
> 
> v1->v2:
> * capitalize subject text
> ---
>  drivers/pci/setup-bus.c | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index 23082bc0ca37..ab7510ce6917 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -1594,6 +1594,23 @@ static void __pci_bridge_assign_resources(const struct pci_dev *bridge,
>  	}
>  }
>  
> +static void restore_child_resource_alignment(struct pci_bus *bus)
> +{
> +	struct pci_dev *dev;
> +
> +	list_for_each_entry(dev, &bus->devices, bus_list) {
> +		struct pci_bus *b;
> +
> +		pci_reassigndev_resource_alignment(dev);
> +
> +		b = dev->subordinate;
> +		if (!b)
> +			continue;
> +
> +		restore_child_resource_alignment(b);
> +	}
> +}
> +
>  #define PCI_RES_TYPE_MASK \
>  	(IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH |\
>  	 IORESOURCE_MEM_64)
> @@ -1648,6 +1665,8 @@ static void pci_bridge_release_resources(struct pci_bus *bus,
>  		r->start = 0;
>  		r->flags = 0;
>  
> +		restore_child_resource_alignment(bus);
> +
>  		/* Avoiding touch the one without PREF */
>  		if (type & IORESOURCE_PREFETCH)
>  			type = IORESOURCE_PREFETCH;
> -- 
> 2.46.0
>
Stewart Hildebrand Aug. 8, 2024, 8:28 p.m. UTC | #2
On 8/8/24 15:28, Bjorn Helgaas wrote:
> On Wed, Aug 07, 2024 at 11:17:12AM -0400, Stewart Hildebrand wrote:
>> Devices with alignment specified will lose their alignment in cases when
>> the bridge resources have been released, e.g. due to insufficient bridge
>> window size. Restore the alignment.
> 
> I guess this fixes a problem when the user has specified
> "pci=resource_alignment=..." and we've decided to release and
> reallocate a bridge window?  Just looking for a bit more concrete
> description of what this problem would look like to a user.

Yes. When alignment has been specified via pcibios_default_alignment()
or by the user with "pci=resource_alignment=...", and the bridge window
is being reallocated, the specified alignment is lost and the resource
may not be sufficiently aligned after reallocation.

I can expand the commit description.

> 
>> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
>> ---
>> v2->v3:
>> * no change
>>
>> v1->v2:
>> * capitalize subject text
>> ---
>>  drivers/pci/setup-bus.c | 19 +++++++++++++++++++
>>  1 file changed, 19 insertions(+)
>>
>> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
>> index 23082bc0ca37..ab7510ce6917 100644
>> --- a/drivers/pci/setup-bus.c
>> +++ b/drivers/pci/setup-bus.c
>> @@ -1594,6 +1594,23 @@ static void __pci_bridge_assign_resources(const struct pci_dev *bridge,
>>  	}
>>  }
>>  
>> +static void restore_child_resource_alignment(struct pci_bus *bus)
>> +{
>> +	struct pci_dev *dev;
>> +
>> +	list_for_each_entry(dev, &bus->devices, bus_list) {
>> +		struct pci_bus *b;
>> +
>> +		pci_reassigndev_resource_alignment(dev);
>> +
>> +		b = dev->subordinate;
>> +		if (!b)
>> +			continue;
>> +
>> +		restore_child_resource_alignment(b);
>> +	}
>> +}
>> +
>>  #define PCI_RES_TYPE_MASK \
>>  	(IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH |\
>>  	 IORESOURCE_MEM_64)
>> @@ -1648,6 +1665,8 @@ static void pci_bridge_release_resources(struct pci_bus *bus,
>>  		r->start = 0;
>>  		r->flags = 0;
>>  
>> +		restore_child_resource_alignment(bus);
>> +
>>  		/* Avoiding touch the one without PREF */
>>  		if (type & IORESOURCE_PREFETCH)
>>  			type = IORESOURCE_PREFETCH;
>> -- 
>> 2.46.0
>>
Bjorn Helgaas Aug. 8, 2024, 9:54 p.m. UTC | #3
On Thu, Aug 08, 2024 at 04:28:50PM -0400, Stewart Hildebrand wrote:
> On 8/8/24 15:28, Bjorn Helgaas wrote:
> > On Wed, Aug 07, 2024 at 11:17:12AM -0400, Stewart Hildebrand wrote:
> >> Devices with alignment specified will lose their alignment in cases when
> >> the bridge resources have been released, e.g. due to insufficient bridge
> >> window size. Restore the alignment.
> > 
> > I guess this fixes a problem when the user has specified
> > "pci=resource_alignment=..." and we've decided to release and
> > reallocate a bridge window?  Just looking for a bit more concrete
> > description of what this problem would look like to a user.
> 
> Yes. When alignment has been specified via pcibios_default_alignment()
> or by the user with "pci=resource_alignment=...", and the bridge window
> is being reallocated, the specified alignment is lost and the resource
> may not be sufficiently aligned after reallocation.
> 
> I can expand the commit description.

I think a hint about where the alignment gets lost would be helpful,
too.

This seems like a problem users could be seeing today, even
independent of the device passthrough issue that I think is the main
thrust of this series.  If there's a problem report or an easy way to
reproduce this problem, that would be nice, too.

Bjorn
Stewart Hildebrand Aug. 14, 2024, 6:12 p.m. UTC | #4
On 8/8/24 17:54, Bjorn Helgaas wrote:
> On Thu, Aug 08, 2024 at 04:28:50PM -0400, Stewart Hildebrand wrote:
>> On 8/8/24 15:28, Bjorn Helgaas wrote:
>>> On Wed, Aug 07, 2024 at 11:17:12AM -0400, Stewart Hildebrand wrote:
>>>> Devices with alignment specified will lose their alignment in cases when
>>>> the bridge resources have been released, e.g. due to insufficient bridge
>>>> window size. Restore the alignment.
>>>
>>> I guess this fixes a problem when the user has specified
>>> "pci=resource_alignment=..." and we've decided to release and
>>> reallocate a bridge window?  Just looking for a bit more concrete
>>> description of what this problem would look like to a user.
>>
>> Yes. When alignment has been specified via pcibios_default_alignment()
>> or by the user with "pci=resource_alignment=...", and the bridge window
>> is being reallocated, the specified alignment is lost and the resource
>> may not be sufficiently aligned after reallocation.
>>
>> I can expand the commit description.
> 
> I think a hint about where the alignment gets lost would be helpful,
> too.
> 
> This seems like a problem users could be seeing today, even
> independent of the device passthrough issue that I think is the main
> thrust of this series.  If there's a problem report or an easy way to
> reproduce this problem, that would be nice, too.

Oh, sorry, I just realized that it's only alignments with
IORESOURCE_STARTALIGN that get lost during bridge window realloc.
Specifically, r->start gets overwritten in __release_child_resources().
There are a few code paths, such as the one in
__release_child_resources(), that assume IORESOURCE_SIZEALIGN. In case
of IORESOURCE_SIZEALIGN, the alignment is restored by a simple
calculation. However, with IORESOURCE_STARTALIGN, we can't use a simple
calculation, instead we need to consult
pci_reassigndev_resource_alignment() to restore the alignment. I'll
update the commit message.

An alternative approach might be to store the alignment in a new member
in struct resource, thus saving us from having to call
pci_reassigndev_resource_alignment() for each PCI device on the bridge
undergoing reallocation.

BTW, this patch and the two "[powerpc,x86]/pci: Preserve
IORESOURCE_STARTALIGN alignment" patches could potentially be folded
into a single patch as they're all dealing with fixing
IORESOURCE_STARTALIGN.

To repro the issue on x86, we will need to apply this series except the
current patch [3/8].

I ran into the issue with the following device. Let's call it example 1.
Scrubbed/partial output from lspci -vv:

	Region 0: Memory at d1924600 (32-bit, non-prefetchable) [size=256]
	Region 1: Memory at d1924400 (32-bit, non-prefetchable) [size=512]
	Region 2: Memory at d1924000 (32-bit, non-prefetchable) [size=1K]
	Region 3: Memory at d1920000 (32-bit, non-prefetchable) [size=16K]
	Region 4: Memory at d1900000 (32-bit, non-prefetchable) [size=128K]
	Region 5: Memory at d1800000 (32-bit, non-prefetchable) [size=1M]
	Capabilities: [b0] MSI-X: Enable- Count=2 Masked-
		Vector table: BAR=0 offset=00000080
		PBA: BAR=0 offset=00000090
	Capabilities: [200 v1] Single Root I/O Virtualization (SR-IOV)
		IOVCap:	Migration-, Interrupt Message Number: 000
		IOVCtl:	Enable- Migration- Interrupt- MSE- ARIHierarchy+
		IOVSta:	Migration-
		Initial VFs: 4, Total VFs: 4, Number of VFs: 0, Function Dependency Link: 00
		VF offset: 6, stride: 1, Device ID: 0100
		Supported Page Size: 00000553, System Page Size: 00000001
		Region 0: Memory at 00000000d0800000 (64-bit, non-prefetchable)
		VF Migration: offset: 00000000, BIR: 0
	Kernel driver in use: pci-endpoint-test
	Kernel modules: pci_endpoint_test

BARs 0, 1, and 2 are small, and the host firmware placed them on the
same page. The host firmware did not page align the BARs. There is also
an SR-IOV BAR that the firmware couldn't fit in the bridge window.

In example 1, I did not specify pci=realloc=on. I used a kernel with
CONFIG_PCI_REALLOC_ENABLE_AUTO=y, and the SR-IOV BAR triggered the
bridge window realloc. Alignment was requested for BARs 0, 1, and 2 of
this device, using IORESOURCE_STARTALIGN, because of patch [8/8]. The
alignment was lost during realloc.

Such a device from example 1 may be hard to come by, so here's a way to
reproduce with qemu. Example 2:

01:00.0 Unclassified device [00ff]: Red Hat, Inc. QEMU PCI Test Device
        Subsystem: Red Hat, Inc. QEMU Virtual Machine
        Flags: fast devsel
        Memory at fe800000 (32-bit, non-prefetchable) [size=4K]
        I/O ports at c000 [size=256]
        Memory at 7050000000 (64-bit, prefetchable) [size=32]

01:01.0 Unclassified device [00ff]: Red Hat, Inc. QEMU PCI Test Device
        Subsystem: Red Hat, Inc. QEMU Virtual Machine
        Flags: fast devsel
        Memory at fe801000 (32-bit, non-prefetchable) [size=4K]
        I/O ports at c100 [size=256]
        Memory at 7050000020 (64-bit, prefetchable) [size=32]

01:02.0 Unclassified device [00ff]: Red Hat, Inc. QEMU PCI Test Device
        Subsystem: Red Hat, Inc. QEMU Virtual Machine
        Flags: fast devsel
        Memory at fe802000 (32-bit, non-prefetchable) [size=4K]
        I/O ports at c200 [size=256]
        Memory at 7050000040 (64-bit, prefetchable) [size=32]

01:03.0 Unclassified device [00ff]: Red Hat, Inc. QEMU PCI Test Device
        Subsystem: Red Hat, Inc. QEMU Virtual Machine
        Flags: fast devsel
        Memory at fe803000 (32-bit, non-prefetchable) [size=4K]
        I/O ports at c300 [size=256]
        Memory at 7040000000 (64-bit, prefetchable) [size=256M]

This example can reproduced with Qemu's pci-testdev and a SeaBIOS hack.
In this case we will need to specify pci=realloc=on to trigger the
realloc because there's no SR-IOV BAR to trigger it automatically. Add
this to your usual qemu-system-x86_64 args:

    -append "console=ttyS0 ignore_loglevel pci=realloc=on" \
    -device pcie-pci-bridge,id=pcie.1 \
    -device pci-testdev,bus=pcie.1,membar=32 \
    -device pci-testdev,bus=pcie.1,membar=32 \
    -device pci-testdev,bus=pcie.1,membar=32 \
    -device pci-testdev,bus=pcie.1,membar=268435456

Apply this SeaBIOS hack:

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
index b3e359d7..769007a4 100644
--- a/src/fw/pciinit.c
+++ b/src/fw/pciinit.c
@@ -25,7 +25,7 @@
 #include "util.h" // pci_setup
 #include "x86.h" // outb

-#define PCI_DEVICE_MEM_MIN    (1<<12)  // 4k == page size
+#define PCI_DEVICE_MEM_MIN    (0)
 #define PCI_BRIDGE_MEM_MIN    (1<<21)  // 2M == hugepage size
 #define PCI_BRIDGE_IO_MIN      0x1000  // mandated by pci bridge spec
@@ -1089,6 +1089,7 @@ pci_region_map_one_entry(struct pci_region_entry *entry, u64 addr)
         pci_config_writew(bdf, PCI_MEMORY_LIMIT, limit >> PCI_MEMORY_SHIFT);
     }
     if (entry->type == PCI_REGION_TYPE_PREFMEM) {
+        limit = addr + PCI_BRIDGE_MEM_MIN - 1;
         pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, addr >> PCI_PREF_MEMORY_SHIFT);
         pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, limit >> PCI_PREF_MEMORY_SHIFT);
         pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, addr >> 32);
diff mbox series

Patch

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 23082bc0ca37..ab7510ce6917 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1594,6 +1594,23 @@  static void __pci_bridge_assign_resources(const struct pci_dev *bridge,
 	}
 }
 
+static void restore_child_resource_alignment(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		struct pci_bus *b;
+
+		pci_reassigndev_resource_alignment(dev);
+
+		b = dev->subordinate;
+		if (!b)
+			continue;
+
+		restore_child_resource_alignment(b);
+	}
+}
+
 #define PCI_RES_TYPE_MASK \
 	(IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH |\
 	 IORESOURCE_MEM_64)
@@ -1648,6 +1665,8 @@  static void pci_bridge_release_resources(struct pci_bus *bus,
 		r->start = 0;
 		r->flags = 0;
 
+		restore_child_resource_alignment(bus);
+
 		/* Avoiding touch the one without PREF */
 		if (type & IORESOURCE_PREFETCH)
 			type = IORESOURCE_PREFETCH;