diff mbox series

q35: fix mmconfig and PCI0._CRS

Message ID 20190528204331.5280-1-kraxel@redhat.com (mailing list archive)
State New, archived
Headers show
Series q35: fix mmconfig and PCI0._CRS | expand

Commit Message

Gerd Hoffmann May 28, 2019, 8:43 p.m. UTC
This patch changes the handling of the mmconfig area.  Thanks to the
pci(e) expander devices we already have the logic to exclude address
ranges from PCI0._CRS.  We can simply add the mmconfig address range
to the list get it excluded as well.

With that in place we can go with a fixed pci hole which covers the
whole area from the end of (low) ram to the ioapic.

This will make the whole logic alot less fragile.  No matter where the
firmware places the mmconfig xbar, things should work correctly.  The
guest also gets a bit more PCI address space (seabios boot):

    # cat /proc/iomem
    [ ... ]
    7ffdd000-7fffffff : reserved
    80000000-afffffff : PCI Bus 0000:00            <<-- this is new
    b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
      b0000000-bfffffff : reserved
    c0000000-febfffff : PCI Bus 0000:00
      f8000000-fbffffff : 0000:00:01.0
    [ ... ]

So this is a guest visible change.

Cc: László Érsek <lersek@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 hw/i386/acpi-build.c | 14 ++++++++++++++
 hw/pci-host/q35.c    | 31 ++++++++-----------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

Comments

Marcel Apfelbaum May 29, 2019, 4:48 a.m. UTC | #1
Hi Gerd,

On 5/28/19 11:43 PM, Gerd Hoffmann wrote:
> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
>
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
>
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
>
>      # cat /proc/iomem
>      [ ... ]
>      7ffdd000-7fffffff : reserved
>      80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>      b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>        b0000000-bfffffff : reserved
>      c0000000-febfffff : PCI Bus 0000:00
>        f8000000-fbffffff : 0000:00:01.0
>      [ ... ]
>
> So this is a guest visible change.

Looks good to me, but shouldn't we use some compat
property to not affect previous machine versions?


Thanks,
Marcel

> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>   hw/i386/acpi-build.c | 14 ++++++++++++++
>   hw/pci-host/q35.c    | 31 ++++++++-----------------------
>   2 files changed, 22 insertions(+), 23 deletions(-)
>
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>       uint8_t tpmppi_version;
>   } QEMU_PACKED FwCfgTPMConfig;
>   
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>   static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>   {
>       uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>       CrsRangeSet crs_range_set;
>       PCMachineState *pcms = PC_MACHINE(machine);
>       PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>       uint32_t nr_mem = machine->ram_slots;
>       int root_bus_limit = 0xFF;
>       PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>           }
>       }
>   
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>       scope = aml_scope("\\_SB.PCI0");
>       /* build PCI0._CRS */
>       crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>       object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                                (Object **) &s->mch.address_space_io,
>                                qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>   }
>   
>   static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>       }
>       addr = pciexbar & addr_mask;
>       pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>   }
>   
>   /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>       mch_update_pam(mch);
>       mch_update_smram(mch);
>       mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>   }
>   
>   static int mch_post_load(void *opaque, int version_id)
Gerd Hoffmann May 29, 2019, 5:01 a.m. UTC | #2
On Wed, May 29, 2019 at 07:48:03AM +0300, Marcel Apfelbaum wrote:
> Hi Gerd,
> 
> On 5/28/19 11:43 PM, Gerd Hoffmann wrote:
> > This patch changes the handling of the mmconfig area.  Thanks to the
> > pci(e) expander devices we already have the logic to exclude address
> > ranges from PCI0._CRS.  We can simply add the mmconfig address range
> > to the list get it excluded as well.
> > 
> > With that in place we can go with a fixed pci hole which covers the
> > whole area from the end of (low) ram to the ioapic.
> > 
> > This will make the whole logic alot less fragile.  No matter where the
> > firmware places the mmconfig xbar, things should work correctly.  The
> > guest also gets a bit more PCI address space (seabios boot):
> > 
> >      # cat /proc/iomem
> >      [ ... ]
> >      7ffdd000-7fffffff : reserved
> >      80000000-afffffff : PCI Bus 0000:00            <<-- this is new
> >      b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
> >        b0000000-bfffffff : reserved
> >      c0000000-febfffff : PCI Bus 0000:00
> >        f8000000-fbffffff : 0000:00:01.0
> >      [ ... ]
> > 
> > So this is a guest visible change.
> 
> Looks good to me, but shouldn't we use some compat
> property to not affect previous machine versions?

acpi table changes are typically not versioned, and IIRC the acpi tables
are part of the live migration data stream so the tables will not change
under the feet of the running guest even when it is migrated to another
qemu version.

cheers,
  Gerd
Marcel Apfelbaum May 29, 2019, 5:11 a.m. UTC | #3
Hi Gerd,

On 5/29/19 8:01 AM, Gerd Hoffmann wrote:
> On Wed, May 29, 2019 at 07:48:03AM +0300, Marcel Apfelbaum wrote:
>> Hi Gerd,
>>
>> On 5/28/19 11:43 PM, Gerd Hoffmann wrote:
>>> This patch changes the handling of the mmconfig area.  Thanks to the
>>> pci(e) expander devices we already have the logic to exclude address
>>> ranges from PCI0._CRS.  We can simply add the mmconfig address range
>>> to the list get it excluded as well.
>>>
>>> With that in place we can go with a fixed pci hole which covers the
>>> whole area from the end of (low) ram to the ioapic.
>>>
>>> This will make the whole logic alot less fragile.  No matter where the
>>> firmware places the mmconfig xbar, things should work correctly.  The
>>> guest also gets a bit more PCI address space (seabios boot):
>>>
>>>       # cat /proc/iomem
>>>       [ ... ]
>>>       7ffdd000-7fffffff : reserved
>>>       80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>>>       b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>>>         b0000000-bfffffff : reserved
>>>       c0000000-febfffff : PCI Bus 0000:00
>>>         f8000000-fbffffff : 0000:00:01.0
>>>       [ ... ]
>>>
>>> So this is a guest visible change.
>> Looks good to me, but shouldn't we use some compat
>> property to not affect previous machine versions?
> acpi table changes are typically not versioned, and IIRC the acpi tables
> are part of the live migration data stream so the tables will not change
> under the feet of the running guest even when it is migrated to another
> qemu version.

I wasn't "worried" only about migration, but also about the visible 
change in
the guests keeping the same machine type and upgrading QEMU.

I am not saying is a "big" issue since it will probably not affect the 
guests at all.
Upgrading QEMU will look like a firmware update or something.

Thanks,
Marcel

> cheers,
>    Gerd
>
Paolo Bonzini May 29, 2019, 9:38 a.m. UTC | #4
On 29/05/19 07:11, Marcel Apfelbaum wrote:
> I am not saying is a "big" issue since it will probably not affect the
> guests at all.
> Upgrading QEMU will look like a firmware update or something.

Yeah, this happens all the time.  This kind of bugfix is generally not
versioned.

Paolo
Igor Mammedov May 29, 2019, 11:16 a.m. UTC | #5
On Tue, 28 May 2019 22:43:31 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.

My impression was that QEMU would/should add into CRS whatever bars
firmware programmed (and it looks like QEMU doesn't do it right).
So I'm not really fond of adding bigger hole just to paper over
existing bug (still might be the way to go). Let me ponder a bit
on it and look into what's isn't working on QEMU side properly.

> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)
Gerd Hoffmann May 29, 2019, 1:20 p.m. UTC | #6
On Wed, May 29, 2019 at 01:16:52PM +0200, Igor Mammedov wrote:
> On Tue, 28 May 2019 22:43:31 +0200
> Gerd Hoffmann <kraxel@redhat.com> wrote:
> 
> > This patch changes the handling of the mmconfig area.  Thanks to the
> > pci(e) expander devices we already have the logic to exclude address
> > ranges from PCI0._CRS.  We can simply add the mmconfig address range
> > to the list get it excluded as well.
> > 
> > With that in place we can go with a fixed pci hole which covers the
> > whole area from the end of (low) ram to the ioapic.
> > 
> > This will make the whole logic alot less fragile.  No matter where the
> > firmware places the mmconfig xbar, things should work correctly.  The
> > guest also gets a bit more PCI address space (seabios boot):
> > 
> >     # cat /proc/iomem
> >     [ ... ]
> >     7ffdd000-7fffffff : reserved
> >     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
> >     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
> >       b0000000-bfffffff : reserved
> >     c0000000-febfffff : PCI Bus 0000:00
> >       f8000000-fbffffff : 0000:00:01.0
> >     [ ... ]
> > 
> > So this is a guest visible change.
> 
> My impression was that QEMU would/should add into CRS whatever bars
> firmware programmed (and it looks like QEMU doesn't do it right).

Well, that works reasonable well.  It looks at all pci bars.  The ones
which do not belong to PCI0 are added to their pci(e) expander.  All
remaining address space of the pci hole is added to PCI0.  At times
things look a bit odd as all unused ranges go to PCI0 even in cases like
this one:

[ ... ]
84a00000-85203fff : PCI Bus 0000:40
  84a00000-84bfffff : PCI Bus 0000:44
  84c00000-84dfffff : PCI Bus 0000:43
  84e00000-84ffffff : PCI Bus 0000:42
  85000000-851fffff : PCI Bus 0000:41
  85200000-85200fff : 0000:40:02.3
  85201000-85201fff : 0000:40:02.2
  85202000-85202fff : 0000:40:02.1
  85203000-85203fff : 0000:40:02.0
85204000-853fffff : PCI Bus 0000:00   <<-- this could be given to PCI Bus 0000:40
85400000-85c03fff : PCI Bus 0000:80
  85400000-855fffff : PCI Bus 0000:84
[ ... ]

but that is more or less cosmetical.

> So I'm not really fond of adding bigger hole just to paper over
> existing bug (still might be the way to go). Let me ponder a bit
> on it and look into what's isn't working on QEMU side properly.

Basically qemu assumes the (32bit) pci hole starts above the mmconfig
bar.  The pci hole should start above low memory though, like it does
on 'pc'.  And the mmconfig bar should be excluded.

cheers,
  Gerd
Michael S. Tsirkin May 29, 2019, 10:05 p.m. UTC | #7
On Tue, May 28, 2019 at 10:43:31PM +0200, Gerd Hoffmann wrote:
> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.
> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>

Hi Gerd!
Please rebase on top of latest pci tree.

After the rebase this will start failing since we
are now asserting on any changes to ACPI tables - and the way to
fix it is to add a comma-separated list of
changed ACPI tables to tests/bios-tables-test-allowed-diff.h

As a maintainer I will notice this and update the expected
files before pushing.

> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)
> -- 
> 2.18.1
Igor Mammedov May 30, 2019, 8:58 a.m. UTC | #8
On Tue, 28 May 2019 22:43:31 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.
> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>

Reviewed-by: Igor Mammedov <imammedo@redhat.com>

> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)
Laszlo Ersek June 3, 2019, 10:43 a.m. UTC | #9
On 05/28/19 22:43, Gerd Hoffmann wrote:
> This patch changes the handling of the mmconfig area.  Thanks to the
> pci(e) expander devices we already have the logic to exclude address
> ranges from PCI0._CRS.  We can simply add the mmconfig address range
> to the list get it excluded as well.
> 
> With that in place we can go with a fixed pci hole which covers the
> whole area from the end of (low) ram to the ioapic.
> 
> This will make the whole logic alot less fragile.  No matter where the
> firmware places the mmconfig xbar, things should work correctly.  The
> guest also gets a bit more PCI address space (seabios boot):
> 
>     # cat /proc/iomem
>     [ ... ]
>     7ffdd000-7fffffff : reserved
>     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
>     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
>       b0000000-bfffffff : reserved
>     c0000000-febfffff : PCI Bus 0000:00
>       f8000000-fbffffff : 0000:00:01.0
>     [ ... ]
> 
> So this is a guest visible change.
> 
> Cc: László Érsek <lersek@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  hw/i386/acpi-build.c | 14 ++++++++++++++
>  hw/pci-host/q35.c    | 31 ++++++++-----------------------
>  2 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d78d738948c..abb0e0ce9f27 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
>      uint8_t tpmppi_version;
>  } QEMU_PACKED FwCfgTPMConfig;
>  
> +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> +
>  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
>  {
>      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      CrsRangeSet crs_range_set;
>      PCMachineState *pcms = PC_MACHINE(machine);
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> +    AcpiMcfgInfo mcfg;
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          }
>      }
>  
> +    /*
> +     * At this point crs_range_set has all the ranges used by pci
> +     * busses *other* than PCI0.  These ranges will be excluded from
> +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> +     * too.
> +     */
> +    if (acpi_get_mcfg(&mcfg)) {
> +        crs_range_insert(crs_range_set.mem_ranges,
> +                         mcfg.base, mcfg.base + mcfg.size - 1);
> +    }
> +
>      scope = aml_scope("\\_SB.PCI0");
>      /* build PCI0._CRS */
>      crs = aml_resource_template();
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index 960939f5ed3e..72093320befe 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
>      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
>                               (Object **) &s->mch.address_space_io,
>                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> -
> -    /* Leave enough space for the biggest MCFG BAR */
> -    /* TODO: this matches current bios behaviour, but
> -     * it's not a power of two, which means an MTRR
> -     * can't cover it exactly.
> -     */
> -    range_set_bounds(&s->mch.pci_hole,
> -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> -            IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static const TypeInfo q35_host_info = {
> @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>      }
>      addr = pciexbar & addr_mask;
>      pcie_host_mmcfg_update(pehb, enable, addr, length);
> -    /* Leave enough space for the MCFG BAR */
> -    /*
> -     * TODO: this matches current bios behaviour, but it's not a power of two,
> -     * which means an MTRR can't cover it exactly.
> -     */
> -    if (enable) {
> -        range_set_bounds(&mch->pci_hole,
> -                         addr + length,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    } else {
> -        range_set_bounds(&mch->pci_hole,
> -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> -                         IO_APIC_DEFAULT_ADDRESS - 1);
> -    }
>  }
>  
>  /* PAM */
> @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
>      mch_update_pam(mch);
>      mch_update_smram(mch);
>      mch_update_ext_tseg_mbytes(mch);
> +
> +    /*
> +     * pci hole goes from end-of-low-ram to io-apic.
> +     * mmconfig will be excluded by the dsdt builder.
> +     */
> +    range_set_bounds(&mch->pci_hole,
> +                     mch->below_4g_mem_size,
> +                     IO_APIC_DEFAULT_ADDRESS - 1);
>  }
>  
>  static int mch_post_load(void *opaque, int version_id)
> 

Looks plausible to me, but I've burned myself with this so frequently
that I'm not comfortable giving a formal feedback tag. I'll defer to
other reviewers (thankfully, there seem to be many, in this instance!)

One question: are we sure all guest OSes we care about can deal with
discontiguous 32-bit PCI MMIO aperture(s)? Personally, I've got no clue.
Is a "naïve" OS imaginable that looks only at the first suitable range
in the _CRS?

Thanks,
Laszlo
Gerd Hoffmann June 3, 2019, 2:24 p.m. UTC | #10
Hi, 

> One question: are we sure all guest OSes we care about can deal with
> discontiguous 32-bit PCI MMIO aperture(s)? Personally, I've got no clue.
> Is a "naïve" OS imaginable that looks only at the first suitable range
> in the _CRS?

Well, I know there is physical hardware doing the same thing,
my work station for example:

[ ... ]
c8000000-f7ffffff : PCI Bus 0000:00
  c8000000-c81fffff : PCI Bus 0000:01
  e0000000-efffffff : 0000:00:02.0
    e0000000-e02fffff : BOOTFB
[ ... ]
f8000000-fbffffff : PCI MMCONFIG 0000 [bus 00-3f]
  f8000000-fbffffff : Reserved
    f8000000-fbffffff : pnp 00:06
fd000000-fe7fffff : PCI Bus 0000:00
  fd000000-fdabffff : pnp 00:07
[ ... ]

Which of course is no guarantee that no naïve OS exists, but I think the
chances that we'll run into trouble with this are rather small.

cheers,
  Gerd
Laszlo Ersek June 3, 2019, 4:23 p.m. UTC | #11
On 06/03/19 16:24, Gerd Hoffmann wrote:
>   Hi, 
> 
>> One question: are we sure all guest OSes we care about can deal with
>> discontiguous 32-bit PCI MMIO aperture(s)? Personally, I've got no clue.
>> Is a "naïve" OS imaginable that looks only at the first suitable range
>> in the _CRS?
> 
> Well, I know there is physical hardware doing the same thing,
> my work station for example:
> 
> [ ... ]
> c8000000-f7ffffff : PCI Bus 0000:00
>   c8000000-c81fffff : PCI Bus 0000:01
>   e0000000-efffffff : 0000:00:02.0
>     e0000000-e02fffff : BOOTFB
> [ ... ]
> f8000000-fbffffff : PCI MMCONFIG 0000 [bus 00-3f]
>   f8000000-fbffffff : Reserved
>     f8000000-fbffffff : pnp 00:06
> fd000000-fe7fffff : PCI Bus 0000:00
>   fd000000-fdabffff : pnp 00:07
> [ ... ]
> 
> Which of course is no guarantee that no naïve OS exists, but I think the
> chances that we'll run into trouble with this are rather small.

OK. Thank you.
Laszlo
Michael S. Tsirkin June 6, 2019, 1:59 p.m. UTC | #12
On Wed, May 29, 2019 at 06:05:41PM -0400, Michael S. Tsirkin wrote:
> On Tue, May 28, 2019 at 10:43:31PM +0200, Gerd Hoffmann wrote:
> > This patch changes the handling of the mmconfig area.  Thanks to the
> > pci(e) expander devices we already have the logic to exclude address
> > ranges from PCI0._CRS.  We can simply add the mmconfig address range
> > to the list get it excluded as well.
> > 
> > With that in place we can go with a fixed pci hole which covers the
> > whole area from the end of (low) ram to the ioapic.
> > 
> > This will make the whole logic alot less fragile.  No matter where the
> > firmware places the mmconfig xbar, things should work correctly.  The
> > guest also gets a bit more PCI address space (seabios boot):
> > 
> >     # cat /proc/iomem
> >     [ ... ]
> >     7ffdd000-7fffffff : reserved
> >     80000000-afffffff : PCI Bus 0000:00            <<-- this is new
> >     b0000000-bfffffff : PCI MMCONFIG 0000 [bus 00-ff]
> >       b0000000-bfffffff : reserved
> >     c0000000-febfffff : PCI Bus 0000:00
> >       f8000000-fbffffff : 0000:00:01.0
> >     [ ... ]
> > 
> > So this is a guest visible change.
> > 
> > Cc: László Érsek <lersek@redhat.com>
> > Cc: Igor Mammedov <imammedo@redhat.com>
> > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> 
> Hi Gerd!
> Please rebase on top of latest pci tree.
> 
> After the rebase this will start failing since we
> are now asserting on any changes to ACPI tables - and the way to
> fix it is to add a comma-separated list of
> changed ACPI tables to tests/bios-tables-test-allowed-diff.h
> 
> As a maintainer I will notice this and update the expected
> files before pushing.

ping.

> > ---
> >  hw/i386/acpi-build.c | 14 ++++++++++++++
> >  hw/pci-host/q35.c    | 31 ++++++++-----------------------
> >  2 files changed, 22 insertions(+), 23 deletions(-)
> > 
> > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > index 0d78d738948c..abb0e0ce9f27 100644
> > --- a/hw/i386/acpi-build.c
> > +++ b/hw/i386/acpi-build.c
> > @@ -122,6 +122,8 @@ typedef struct FwCfgTPMConfig {
> >      uint8_t tpmppi_version;
> >  } QEMU_PACKED FwCfgTPMConfig;
> >  
> > +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
> > +
> >  static void init_common_fadt_data(Object *o, AcpiFadtData *data)
> >  {
> >      uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
> > @@ -1807,6 +1809,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> >      CrsRangeSet crs_range_set;
> >      PCMachineState *pcms = PC_MACHINE(machine);
> >      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
> > +    AcpiMcfgInfo mcfg;
> >      uint32_t nr_mem = machine->ram_slots;
> >      int root_bus_limit = 0xFF;
> >      PCIBus *bus = NULL;
> > @@ -1921,6 +1924,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> >          }
> >      }
> >  
> > +    /*
> > +     * At this point crs_range_set has all the ranges used by pci
> > +     * busses *other* than PCI0.  These ranges will be excluded from
> > +     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
> > +     * too.
> > +     */
> > +    if (acpi_get_mcfg(&mcfg)) {
> > +        crs_range_insert(crs_range_set.mem_ranges,
> > +                         mcfg.base, mcfg.base + mcfg.size - 1);
> > +    }
> > +
> >      scope = aml_scope("\\_SB.PCI0");
> >      /* build PCI0._CRS */
> >      crs = aml_resource_template();
> > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> > index 960939f5ed3e..72093320befe 100644
> > --- a/hw/pci-host/q35.c
> > +++ b/hw/pci-host/q35.c
> > @@ -258,15 +258,6 @@ static void q35_host_initfn(Object *obj)
> >      object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
> >                               (Object **) &s->mch.address_space_io,
> >                               qdev_prop_allow_set_link_before_realize, 0, NULL);
> > -
> > -    /* Leave enough space for the biggest MCFG BAR */
> > -    /* TODO: this matches current bios behaviour, but
> > -     * it's not a power of two, which means an MTRR
> > -     * can't cover it exactly.
> > -     */
> > -    range_set_bounds(&s->mch.pci_hole,
> > -            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
> > -            IO_APIC_DEFAULT_ADDRESS - 1);
> >  }
> >  
> >  static const TypeInfo q35_host_info = {
> > @@ -338,20 +329,6 @@ static void mch_update_pciexbar(MCHPCIState *mch)
> >      }
> >      addr = pciexbar & addr_mask;
> >      pcie_host_mmcfg_update(pehb, enable, addr, length);
> > -    /* Leave enough space for the MCFG BAR */
> > -    /*
> > -     * TODO: this matches current bios behaviour, but it's not a power of two,
> > -     * which means an MTRR can't cover it exactly.
> > -     */
> > -    if (enable) {
> > -        range_set_bounds(&mch->pci_hole,
> > -                         addr + length,
> > -                         IO_APIC_DEFAULT_ADDRESS - 1);
> > -    } else {
> > -        range_set_bounds(&mch->pci_hole,
> > -                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
> > -                         IO_APIC_DEFAULT_ADDRESS - 1);
> > -    }
> >  }
> >  
> >  /* PAM */
> > @@ -484,6 +461,14 @@ static void mch_update(MCHPCIState *mch)
> >      mch_update_pam(mch);
> >      mch_update_smram(mch);
> >      mch_update_ext_tseg_mbytes(mch);
> > +
> > +    /*
> > +     * pci hole goes from end-of-low-ram to io-apic.
> > +     * mmconfig will be excluded by the dsdt builder.
> > +     */
> > +    range_set_bounds(&mch->pci_hole,
> > +                     mch->below_4g_mem_size,
> > +                     IO_APIC_DEFAULT_ADDRESS - 1);
> >  }
> >  
> >  static int mch_post_load(void *opaque, int version_id)
> > -- 
> > 2.18.1
diff mbox series

Patch

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 0d78d738948c..abb0e0ce9f27 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -122,6 +122,8 @@  typedef struct FwCfgTPMConfig {
     uint8_t tpmppi_version;
 } QEMU_PACKED FwCfgTPMConfig;
 
+static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg);
+
 static void init_common_fadt_data(Object *o, AcpiFadtData *data)
 {
     uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
@@ -1807,6 +1809,7 @@  build_dsdt(GArray *table_data, BIOSLinker *linker,
     CrsRangeSet crs_range_set;
     PCMachineState *pcms = PC_MACHINE(machine);
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
+    AcpiMcfgInfo mcfg;
     uint32_t nr_mem = machine->ram_slots;
     int root_bus_limit = 0xFF;
     PCIBus *bus = NULL;
@@ -1921,6 +1924,17 @@  build_dsdt(GArray *table_data, BIOSLinker *linker,
         }
     }
 
+    /*
+     * At this point crs_range_set has all the ranges used by pci
+     * busses *other* than PCI0.  These ranges will be excluded from
+     * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
+     * too.
+     */
+    if (acpi_get_mcfg(&mcfg)) {
+        crs_range_insert(crs_range_set.mem_ranges,
+                         mcfg.base, mcfg.base + mcfg.size - 1);
+    }
+
     scope = aml_scope("\\_SB.PCI0");
     /* build PCI0._CRS */
     crs = aml_resource_template();
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 960939f5ed3e..72093320befe 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -258,15 +258,6 @@  static void q35_host_initfn(Object *obj)
     object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION,
                              (Object **) &s->mch.address_space_io,
                              qdev_prop_allow_set_link_before_realize, 0, NULL);
-
-    /* Leave enough space for the biggest MCFG BAR */
-    /* TODO: this matches current bios behaviour, but
-     * it's not a power of two, which means an MTRR
-     * can't cover it exactly.
-     */
-    range_set_bounds(&s->mch.pci_hole,
-            MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + MCH_HOST_BRIDGE_PCIEXBAR_MAX,
-            IO_APIC_DEFAULT_ADDRESS - 1);
 }
 
 static const TypeInfo q35_host_info = {
@@ -338,20 +329,6 @@  static void mch_update_pciexbar(MCHPCIState *mch)
     }
     addr = pciexbar & addr_mask;
     pcie_host_mmcfg_update(pehb, enable, addr, length);
-    /* Leave enough space for the MCFG BAR */
-    /*
-     * TODO: this matches current bios behaviour, but it's not a power of two,
-     * which means an MTRR can't cover it exactly.
-     */
-    if (enable) {
-        range_set_bounds(&mch->pci_hole,
-                         addr + length,
-                         IO_APIC_DEFAULT_ADDRESS - 1);
-    } else {
-        range_set_bounds(&mch->pci_hole,
-                         MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT,
-                         IO_APIC_DEFAULT_ADDRESS - 1);
-    }
 }
 
 /* PAM */
@@ -484,6 +461,14 @@  static void mch_update(MCHPCIState *mch)
     mch_update_pam(mch);
     mch_update_smram(mch);
     mch_update_ext_tseg_mbytes(mch);
+
+    /*
+     * pci hole goes from end-of-low-ram to io-apic.
+     * mmconfig will be excluded by the dsdt builder.
+     */
+    range_set_bounds(&mch->pci_hole,
+                     mch->below_4g_mem_size,
+                     IO_APIC_DEFAULT_ADDRESS - 1);
 }
 
 static int mch_post_load(void *opaque, int version_id)