diff mbox series

[RFC,1/1] PCI: Add Extended Tag + MRRS quirk for Xeon 6

Message ID 20250304135108.2599-1-ilpo.jarvinen@linux.intel.com (mailing list archive)
State RFC
Delegated to: Bjorn Helgaas
Headers show
Series [RFC,1/1] PCI: Add Extended Tag + MRRS quirk for Xeon 6 | expand

Commit Message

Ilpo Järvinen March 4, 2025, 1:51 p.m. UTC
Disallow Extended Tags and Max Read Request Size (MRRS) larger than
128B for devices under Xeon 6 Root Ports if the Root Port is bifurcated
to x2. Also, 10-Bit Tag Requester should be disallowed for device
underneath these Root Ports but there is currently no 10-Bit Tag
support in the kernel.

The normal path that writes MRRS is through
pcie_bus_configure_settings() -> pcie_bus_configure_set() ->
pcie_write_mrrs() and contains a few early returns that are based on
the value of pcie_bus_config. Overriding such checks with the host
bridge flag check on each level seems messy. Thus, simply ensure MRRS
is always written in pci_configure_device() if a device requiring the
quirk is detected.

Link: https://cdrdv2.intel.com/v1/dl/getContent/837176
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---

The normal path that writes MRRS is somewhat convoluted so I ensure MRRS
gets written in a more direct way, I'm not sure if that's the best
approach. Thus sending this as RFC.

 drivers/pci/pci.c    | 15 ++++++++-------
 drivers/pci/probe.c  |  8 +++++++-
 drivers/pci/quirks.c | 27 +++++++++++++++++++++++++++
 include/linux/pci.h  |  1 +
 4 files changed, 43 insertions(+), 8 deletions(-)


base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b

Comments

Bjorn Helgaas March 4, 2025, 9:14 p.m. UTC | #1
On Tue, Mar 04, 2025 at 03:51:08PM +0200, Ilpo Järvinen wrote:
> Disallow Extended Tags and Max Read Request Size (MRRS) larger than
> 128B for devices under Xeon 6 Root Ports if the Root Port is bifurcated
> to x2. Also, 10-Bit Tag Requester should be disallowed for device
> underneath these Root Ports but there is currently no 10-Bit Tag
> support in the kernel.
> 
> The normal path that writes MRRS is through
> pcie_bus_configure_settings() -> pcie_bus_configure_set() ->
> pcie_write_mrrs() and contains a few early returns that are based on
> the value of pcie_bus_config. Overriding such checks with the host
> bridge flag check on each level seems messy. Thus, simply ensure MRRS
> is always written in pci_configure_device() if a device requiring the
> quirk is detected.

This is kind of weird.  It's apparently not an erratum in the sense
that something doesn't *work*, just something for "optimized PCIe
performance"?

What are we supposed to do with this?  Add similar quirks for every
random PCI controller?  Scratching my head about what this means for
the future.

What bad things happen if we *don't* do this?  Is this something we
could/should rely on BIOS to configure for us?

> Link: https://cdrdv2.intel.com/v1/dl/getContent/837176
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> ---
> 
> The normal path that writes MRRS is somewhat convoluted so I ensure MRRS
> gets written in a more direct way, I'm not sure if that's the best
> approach. Thus sending this as RFC.
> 
>  drivers/pci/pci.c    | 15 ++++++++-------
>  drivers/pci/probe.c  |  8 +++++++-
>  drivers/pci/quirks.c | 27 +++++++++++++++++++++++++++
>  include/linux/pci.h  |  1 +
>  4 files changed, 43 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 869d204a70a3..81ddad81ccb8 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -5913,7 +5913,7 @@ EXPORT_SYMBOL(pcie_get_readrq);
>  int pcie_set_readrq(struct pci_dev *dev, int rq)
>  {
>  	u16 v;
> -	int ret;
> +	int ret, max_mrrs = 4096;
>  	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
>  
>  	if (rq < 128 || rq > 4096 || !is_power_of_2(rq))
> @@ -5933,13 +5933,14 @@ int pcie_set_readrq(struct pci_dev *dev, int rq)
>  
>  	v = FIELD_PREP(PCI_EXP_DEVCTL_READRQ, ffs(rq) - 8);
>  
> -	if (bridge->no_inc_mrrs) {
> -		int max_mrrs = pcie_get_readrq(dev);
> +	if (bridge->no_inc_mrrs)
> +		max_mrrs = pcie_get_readrq(dev);
> +	if (bridge->only_128b_mrrs)
> +		max_mrrs = 128;
>  
> -		if (rq > max_mrrs) {
> -			pci_info(dev, "can't set Max_Read_Request_Size to %d; max is %d\n", rq, max_mrrs);
> -			return -EINVAL;
> -		}
> +	if (rq > max_mrrs) {
> +		pci_info(dev, "can't set Max_Read_Request_Size to %d; max is %d\n", rq, max_mrrs);
> +		return -EINVAL;
>  	}
>  
>  	ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index b6536ed599c3..ceaa34b0525b 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2342,7 +2342,11 @@ static void pci_configure_serr(struct pci_dev *dev)
>  
>  static void pci_configure_device(struct pci_dev *dev)
>  {
> +	struct pci_host_bridge *host_bridge = pci_find_host_bridge(dev->bus);
> +
>  	pci_configure_mps(dev);
> +	if (host_bridge && host_bridge->only_128b_mrrs)
> +		pcie_set_readrq(dev, 128);
>  	pci_configure_extended_tags(dev, NULL);
>  	pci_configure_relaxed_ordering(dev);
>  	pci_configure_ltr(dev);
> @@ -2851,13 +2855,15 @@ static void pcie_write_mps(struct pci_dev *dev, int mps)
>  
>  static void pcie_write_mrrs(struct pci_dev *dev)
>  {
> +	struct pci_host_bridge *host_bridge = pci_find_host_bridge(dev->bus);
>  	int rc, mrrs;
>  
>  	/*
>  	 * In the "safe" case, do not configure the MRRS.  There appear to be
>  	 * issues with setting MRRS to 0 on a number of devices.
>  	 */
> -	if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
> +	if (pcie_bus_config != PCIE_BUS_PERFORMANCE &&
> +	    (!host_bridge || !host_bridge->only_128b_mrrs))
>  		return;
>  
>  	/*
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index b84ff7bade82..987cd94028e1 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -5564,6 +5564,33 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags);
>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0420, quirk_no_ext_tags);
>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
>  
> +static void quirk_pcie2x_no_tags_no_mrrs(struct pci_dev *pdev)
> +{
> +	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
> +	u32 linkcap;
> +
> +	if (!bridge)
> +		return;
> +
> +	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &linkcap);
> +	if (FIELD_GET(PCI_EXP_LNKCAP_MLW, linkcap) != 0x2)
> +		return;
> +
> +	bridge->no_ext_tags = 1;
> +	bridge->only_128b_mrrs = 1;
> +	pci_info(pdev, "Disabling Extended Tags and forcing MRRS to 128B (performance reasons due to 2x PCIe link)\n");
> +}
> +
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db0, quirk_pcie2x_no_tags_no_mrrs);
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db1, quirk_pcie2x_no_tags_no_mrrs);
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db2, quirk_pcie2x_no_tags_no_mrrs);
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db3, quirk_pcie2x_no_tags_no_mrrs);
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db6, quirk_pcie2x_no_tags_no_mrrs);
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db7, quirk_pcie2x_no_tags_no_mrrs);
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db8, quirk_pcie2x_no_tags_no_mrrs);
> +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db9, quirk_pcie2x_no_tags_no_mrrs);
> +
> +
>  #ifdef CONFIG_PCI_ATS
>  static void quirk_no_ats(struct pci_dev *pdev)
>  {
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 47b31ad724fa..def29c8c0f84 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -601,6 +601,7 @@ struct pci_host_bridge {
>  	unsigned int	ignore_reset_delay:1;	/* For entire hierarchy */
>  	unsigned int	no_ext_tags:1;		/* No Extended Tags */
>  	unsigned int	no_inc_mrrs:1;		/* No Increase MRRS */
> +	unsigned int	only_128b_mrrs:1;	/* Only 128B MRRS */
>  	unsigned int	native_aer:1;		/* OS may use PCIe AER */
>  	unsigned int	native_pcie_hotplug:1;	/* OS may use PCIe hotplug */
>  	unsigned int	native_shpc_hotplug:1;	/* OS may use SHPC hotplug */
> 
> base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
> -- 
> 2.39.5
>
Dan Williams March 5, 2025, 8:38 p.m. UTC | #2
Bjorn Helgaas wrote:
> On Tue, Mar 04, 2025 at 03:51:08PM +0200, Ilpo Järvinen wrote:
> > Disallow Extended Tags and Max Read Request Size (MRRS) larger than
> > 128B for devices under Xeon 6 Root Ports if the Root Port is bifurcated
> > to x2. Also, 10-Bit Tag Requester should be disallowed for device
> > underneath these Root Ports but there is currently no 10-Bit Tag
> > support in the kernel.
> > 
> > The normal path that writes MRRS is through
> > pcie_bus_configure_settings() -> pcie_bus_configure_set() ->
> > pcie_write_mrrs() and contains a few early returns that are based on
> > the value of pcie_bus_config. Overriding such checks with the host
> > bridge flag check on each level seems messy. Thus, simply ensure MRRS
> > is always written in pci_configure_device() if a device requiring the
> > quirk is detected.
> 
> This is kind of weird.  It's apparently not an erratum in the sense
> that something doesn't *work*, just something for "optimized PCIe
> performance"?

Another way of saying that large requests surprisingly perform
worse than small requests.

> What are we supposed to do with this?  Add similar quirks for every
> random PCI controller?  Scratching my head about what this means for
> the future.

Ideally when the platform knows about these corner cases the BIOS
deploys the setting and the OS knows to leave it alone.

> What bad things happen if we *don't* do this?  Is this something we
> could/should rely on BIOS to configure for us?

Reduced performance, and yes only the BIOS has a chance to know about
these niche corner cases ahead of time. The problem, as always, is when
to know when to step in and change what look to be default values, and
when the default values are deliberate choices by platform firmware that
knows a one-off detail.

So I agree with you that while this quirk meets the letter of this
specific recommendation, it portends a future of a steady stream of odd
host PCI controller quirks. Is there a path to empower platform firmware
to convey, "don't touch this value for 'reasons'"?

This reminds me of your observation about _HPX.
http://lore.kernel.org/20240715214529.GA447149@bhelgaas

I.e. potentially a path for Linux to double check that what it thinks is
a good value is countermanded by an _HPX record. Maybe that is overkill
and a more tightly scoped, "don't touch root port PCIe performance
settings" flag variable in ACPI would suffice? So I see this quirk as a
conversation starter that can be applied or held out until the
conversation resolves.
Lukas Wunner March 7, 2025, 8:34 a.m. UTC | #3
On Tue, Mar 04, 2025 at 03:51:08PM +0200, Ilpo Järvinen wrote:
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -5564,6 +5564,33 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags);
>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0420, quirk_no_ext_tags);
>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
>  
> +static void quirk_pcie2x_no_tags_no_mrrs(struct pci_dev *pdev)
> +{
> +	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
> +	u32 linkcap;
> +
> +	if (!bridge)
> +		return;

I note that in a lot of places where pci_find_host_bridge() is called,
no NULL pointer check is performed.  So omitting it would appear
to be safe.

The quirk is x86-specific, so compiling it into the kernel on other
arches creates unnecessary bloat.  Avoid by moving to arch/x86/pci/fixup.c.

There should definitely be a multi-line code comment above the function
explaining what defect this works around (slower performance apparently),
and also link to the PDF document.

BTW the PDF document says "Intel Confidential", I'm wondering why this
has been made public without stripping the confidentiality marker...

Thanks,

Lukas
Ilpo Järvinen March 7, 2025, 1:06 p.m. UTC | #4
On Tue, 4 Mar 2025, Bjorn Helgaas wrote:

> On Tue, Mar 04, 2025 at 03:51:08PM +0200, Ilpo Järvinen wrote:
> > Disallow Extended Tags and Max Read Request Size (MRRS) larger than
> > 128B for devices under Xeon 6 Root Ports if the Root Port is bifurcated
> > to x2. Also, 10-Bit Tag Requester should be disallowed for device
> > underneath these Root Ports but there is currently no 10-Bit Tag
> > support in the kernel.
> > 
> > The normal path that writes MRRS is through
> > pcie_bus_configure_settings() -> pcie_bus_configure_set() ->
> > pcie_write_mrrs() and contains a few early returns that are based on
> > the value of pcie_bus_config. Overriding such checks with the host
> > bridge flag check on each level seems messy. Thus, simply ensure MRRS
> > is always written in pci_configure_device() if a device requiring the
> > quirk is detected.
> 
> This is kind of weird.  It's apparently not an erratum in the sense
> that something doesn't *work*, just something for "optimized PCIe
> performance"?
> 
> What are we supposed to do with this?  Add similar quirks for every
> random PCI controller?  Scratching my head about what this means for
> the future.
> 
> What bad things happen if we *don't* do this?  Is this something we
> could/should rely on BIOS to configure for us?

Even if BIOS configures this (I'm under impression they already do, I 
had problem in finding a configuration in our lab on which this patch
had some effect). But my kernel was built with CONFIG_PCIE_BUS_DEFAULT, if 
I set that to CONFIG_PCIE_BUS_PERFORMANCE, what BIOS did will be 
overwritten.

One option would be to drop the changes to drivers/pci/probe.c which is 
there to force MRRS is always written (in this v1). That case should be 
coverable with BIOS configuration but changes into pcie_set_readrq() seems 
necessary to prevent Linux overwriting the configuration made by the BIOS. 
Unless there's going to some other mechanism to tell kernel it should keep 
hands from from these values as suggested by Dan.
Ilpo Järvinen March 7, 2025, 1:13 p.m. UTC | #5
On Fri, 7 Mar 2025, Lukas Wunner wrote:

> On Tue, Mar 04, 2025 at 03:51:08PM +0200, Ilpo Järvinen wrote:
> > --- a/drivers/pci/quirks.c
> > +++ b/drivers/pci/quirks.c
> > @@ -5564,6 +5564,33 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags);
> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0420, quirk_no_ext_tags);
> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
> >  
> > +static void quirk_pcie2x_no_tags_no_mrrs(struct pci_dev *pdev)
> > +{
> > +	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
> > +	u32 linkcap;
> > +
> > +	if (!bridge)
> > +		return;
> 
> I note that in a lot of places where pci_find_host_bridge() is called,
> no NULL pointer check is performed.  So omitting it would appear
> to be safe.
> 
> The quirk is x86-specific, so compiling it into the kernel on other
> arches creates unnecessary bloat.  Avoid by moving to arch/x86/pci/fixup.c.
> 
> There should definitely be a multi-line code comment above the function
> explaining what defect this works around (slower performance apparently),
> and also link to the PDF document.

I'll do those in v2. Thanks for the comments.

> BTW the PDF document says "Intel Confidential", I'm wondering why this
> has been made public without stripping the confidentiality marker...

We're apparently also not supposed to "finalize a design with this 
information". :-)
Bjorn Helgaas March 7, 2025, 4:39 p.m. UTC | #6
On Fri, Mar 07, 2025 at 03:06:31PM +0200, Ilpo Järvinen wrote:
> On Tue, 4 Mar 2025, Bjorn Helgaas wrote:
> > On Tue, Mar 04, 2025 at 03:51:08PM +0200, Ilpo Järvinen wrote:
> > > Disallow Extended Tags and Max Read Request Size (MRRS) larger than
> > > 128B for devices under Xeon 6 Root Ports if the Root Port is bifurcated
> > > to x2. Also, 10-Bit Tag Requester should be disallowed for device
> > > underneath these Root Ports but there is currently no 10-Bit Tag
> > > support in the kernel.
> > > 
> > > The normal path that writes MRRS is through
> > > pcie_bus_configure_settings() -> pcie_bus_configure_set() ->
> > > pcie_write_mrrs() and contains a few early returns that are based on
> > > the value of pcie_bus_config. Overriding such checks with the host
> > > bridge flag check on each level seems messy. Thus, simply ensure MRRS
> > > is always written in pci_configure_device() if a device requiring the
> > > quirk is detected.
> > 
> > This is kind of weird.  It's apparently not an erratum in the sense
> > that something doesn't *work*, just something for "optimized PCIe
> > performance"?
> > 
> > What are we supposed to do with this?  Add similar quirks for every
> > random PCI controller?  Scratching my head about what this means for
> > the future.
> > 
> > What bad things happen if we *don't* do this?  Is this something we
> > could/should rely on BIOS to configure for us?
> 
> Even if BIOS configures this (I'm under impression they already do, I 
> had problem in finding a configuration in our lab on which this patch
> had some effect). But my kernel was built with CONFIG_PCIE_BUS_DEFAULT, if 
> I set that to CONFIG_PCIE_BUS_PERFORMANCE, what BIOS did will be 
> overwritten.

I despise those CONFIG_PCIE_BUS_* options, but have never managed to
get rid of them.  Unfortunate that something named "*_PERFORMANCE"
will apparently result in *worse* performance in this respect.
Dan Williams March 7, 2025, 8:50 p.m. UTC | #7
Ilpo Järvinen wrote:
> On Tue, 4 Mar 2025, Bjorn Helgaas wrote:
> 
> > On Tue, Mar 04, 2025 at 03:51:08PM +0200, Ilpo Järvinen wrote:
> > > Disallow Extended Tags and Max Read Request Size (MRRS) larger than
> > > 128B for devices under Xeon 6 Root Ports if the Root Port is bifurcated
> > > to x2. Also, 10-Bit Tag Requester should be disallowed for device
> > > underneath these Root Ports but there is currently no 10-Bit Tag
> > > support in the kernel.
> > > 
> > > The normal path that writes MRRS is through
> > > pcie_bus_configure_settings() -> pcie_bus_configure_set() ->
> > > pcie_write_mrrs() and contains a few early returns that are based on
> > > the value of pcie_bus_config. Overriding such checks with the host
> > > bridge flag check on each level seems messy. Thus, simply ensure MRRS
> > > is always written in pci_configure_device() if a device requiring the
> > > quirk is detected.
> > 
> > This is kind of weird.  It's apparently not an erratum in the sense
> > that something doesn't *work*, just something for "optimized PCIe
> > performance"?
> > 
> > What are we supposed to do with this?  Add similar quirks for every
> > random PCI controller?  Scratching my head about what this means for
> > the future.
> > 
> > What bad things happen if we *don't* do this?  Is this something we
> > could/should rely on BIOS to configure for us?
> 
> Even if BIOS configures this (I'm under impression they already do, I 
> had problem in finding a configuration in our lab on which this patch
> had some effect). But my kernel was built with CONFIG_PCIE_BUS_DEFAULT, if 
> I set that to CONFIG_PCIE_BUS_PERFORMANCE, what BIOS did will be 
> overwritten.

The observation is that while linux only overrides Maximum Read Request
Size with PCIE_BUS_PERFORMANCE, it always overrides
PCI_EXP_DEVCTL_EXT_TAG.

> One option would be to drop the changes to drivers/pci/probe.c which is 
> there to force MRRS is always written (in this v1). That case should be 
> coverable with BIOS configuration but changes into pcie_set_readrq() seems 
> necessary to prevent Linux overwriting the configuration made by the BIOS. 
> Unless there's going to some other mechanism to tell kernel it should keep 
> hands from from these values as suggested by Dan.

The problem is determining when the BIOS has made an affirmative step to
limit the settings to defaults, vs expecting the OS to optimize the
settings because performance matters less in pre-OS runtime.
diff mbox series

Patch

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 869d204a70a3..81ddad81ccb8 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5913,7 +5913,7 @@  EXPORT_SYMBOL(pcie_get_readrq);
 int pcie_set_readrq(struct pci_dev *dev, int rq)
 {
 	u16 v;
-	int ret;
+	int ret, max_mrrs = 4096;
 	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
 
 	if (rq < 128 || rq > 4096 || !is_power_of_2(rq))
@@ -5933,13 +5933,14 @@  int pcie_set_readrq(struct pci_dev *dev, int rq)
 
 	v = FIELD_PREP(PCI_EXP_DEVCTL_READRQ, ffs(rq) - 8);
 
-	if (bridge->no_inc_mrrs) {
-		int max_mrrs = pcie_get_readrq(dev);
+	if (bridge->no_inc_mrrs)
+		max_mrrs = pcie_get_readrq(dev);
+	if (bridge->only_128b_mrrs)
+		max_mrrs = 128;
 
-		if (rq > max_mrrs) {
-			pci_info(dev, "can't set Max_Read_Request_Size to %d; max is %d\n", rq, max_mrrs);
-			return -EINVAL;
-		}
+	if (rq > max_mrrs) {
+		pci_info(dev, "can't set Max_Read_Request_Size to %d; max is %d\n", rq, max_mrrs);
+		return -EINVAL;
 	}
 
 	ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index b6536ed599c3..ceaa34b0525b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2342,7 +2342,11 @@  static void pci_configure_serr(struct pci_dev *dev)
 
 static void pci_configure_device(struct pci_dev *dev)
 {
+	struct pci_host_bridge *host_bridge = pci_find_host_bridge(dev->bus);
+
 	pci_configure_mps(dev);
+	if (host_bridge && host_bridge->only_128b_mrrs)
+		pcie_set_readrq(dev, 128);
 	pci_configure_extended_tags(dev, NULL);
 	pci_configure_relaxed_ordering(dev);
 	pci_configure_ltr(dev);
@@ -2851,13 +2855,15 @@  static void pcie_write_mps(struct pci_dev *dev, int mps)
 
 static void pcie_write_mrrs(struct pci_dev *dev)
 {
+	struct pci_host_bridge *host_bridge = pci_find_host_bridge(dev->bus);
 	int rc, mrrs;
 
 	/*
 	 * In the "safe" case, do not configure the MRRS.  There appear to be
 	 * issues with setting MRRS to 0 on a number of devices.
 	 */
-	if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
+	if (pcie_bus_config != PCIE_BUS_PERFORMANCE &&
+	    (!host_bridge || !host_bridge->only_128b_mrrs))
 		return;
 
 	/*
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b84ff7bade82..987cd94028e1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5564,6 +5564,33 @@  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0420, quirk_no_ext_tags);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
 
+static void quirk_pcie2x_no_tags_no_mrrs(struct pci_dev *pdev)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
+	u32 linkcap;
+
+	if (!bridge)
+		return;
+
+	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &linkcap);
+	if (FIELD_GET(PCI_EXP_LNKCAP_MLW, linkcap) != 0x2)
+		return;
+
+	bridge->no_ext_tags = 1;
+	bridge->only_128b_mrrs = 1;
+	pci_info(pdev, "Disabling Extended Tags and forcing MRRS to 128B (performance reasons due to 2x PCIe link)\n");
+}
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db0, quirk_pcie2x_no_tags_no_mrrs);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db1, quirk_pcie2x_no_tags_no_mrrs);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db2, quirk_pcie2x_no_tags_no_mrrs);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db3, quirk_pcie2x_no_tags_no_mrrs);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db6, quirk_pcie2x_no_tags_no_mrrs);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db7, quirk_pcie2x_no_tags_no_mrrs);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db8, quirk_pcie2x_no_tags_no_mrrs);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db9, quirk_pcie2x_no_tags_no_mrrs);
+
+
 #ifdef CONFIG_PCI_ATS
 static void quirk_no_ats(struct pci_dev *pdev)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 47b31ad724fa..def29c8c0f84 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -601,6 +601,7 @@  struct pci_host_bridge {
 	unsigned int	ignore_reset_delay:1;	/* For entire hierarchy */
 	unsigned int	no_ext_tags:1;		/* No Extended Tags */
 	unsigned int	no_inc_mrrs:1;		/* No Increase MRRS */
+	unsigned int	only_128b_mrrs:1;	/* Only 128B MRRS */
 	unsigned int	native_aer:1;		/* OS may use PCIe AER */
 	unsigned int	native_pcie_hotplug:1;	/* OS may use PCIe hotplug */
 	unsigned int	native_shpc_hotplug:1;	/* OS may use SHPC hotplug */