diff mbox

[V8,7/9] acpi: Add generic MCFG table handling

Message ID 1464621262-26770-8-git-send-email-tn@semihalf.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tomasz Nowicki May 30, 2016, 3:14 p.m. UTC
In order to handle PCI config space regions properly in ACPI, new MCFG
interface is defined which does sanity checks on MCFG table and keeps its
root pointer. The user is able to lookup MCFG regions based on
host bridge root structure and domain:bus_start:bus_end touple.
Use pci_mmcfg_late_init old prototype to avoid another function name.

Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
Signed-off-by: Jayachandran C <jchandra@broadcom.com>
---
 drivers/acpi/Kconfig     |  3 ++
 drivers/acpi/Makefile    |  1 +
 drivers/acpi/pci_mcfg.c  | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci-acpi.h |  2 ++
 include/linux/pci.h      |  2 +-
 5 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 drivers/acpi/pci_mcfg.c

Comments

Lorenzo Pieralisi June 3, 2016, 11:38 a.m. UTC | #1
On Mon, May 30, 2016 at 05:14:20PM +0200, Tomasz Nowicki wrote:
> In order to handle PCI config space regions properly in ACPI, new MCFG
> interface is defined which does sanity checks on MCFG table and keeps its
> root pointer. The user is able to lookup MCFG regions based on
> host bridge root structure and domain:bus_start:bus_end touple.
> Use pci_mmcfg_late_init old prototype to avoid another function name.

"According to PCI firmware specifications, on systems booting with ACPI,
PCI configuration for a host bridge must be set-up through the MCFG table
regions for non-hotpluggable bridges and _CBA method for hotpluggable ones.

Current MCFG table handling code, as implemented for x86, cannot be
easily generalized owing to x86 specific quirks handling and related
code, which makes it hard to reuse on other architectures.

In order to implement MCFG PCI configuration handling for new platforms
booting with ACPI (eg ARM64) this patch re-implements MCFG handling from
scratch in a streamlined fashion and provides (through a generic
interface available to all arches):

- Simplified MCFG table parsing (executed through the pci_mmcfg_late_init()
  hook as in current x86)
- MCFG regions look-up interface through domain:bus_start:bus_end tuple

The new MCFG regions handling interface is added to generic ACPI code
so that existing architectures (eg x86) can be moved over to it and
architectures relying on MCFG for ACPI PCI config space can rely on it
without having to resort to arch specific implementations."

> Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
> Signed-off-by: Jayachandran C <jchandra@broadcom.com>
> ---
>  drivers/acpi/Kconfig     |  3 ++
>  drivers/acpi/Makefile    |  1 +
>  drivers/acpi/pci_mcfg.c  | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci-acpi.h |  2 ++
>  include/linux/pci.h      |  2 +-
>  5 files changed, 101 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/acpi/pci_mcfg.c
> 
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index b7e2e77..f98c328 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -217,6 +217,9 @@ config ACPI_PROCESSOR_IDLE
>  	bool
>  	select CPU_IDLE
>  
> +config ACPI_MCFG
> +	bool
> +
>  config ACPI_CPPC_LIB
>  	bool
>  	depends on ACPI_PROCESSOR
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index 251ce85..632e81f 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -40,6 +40,7 @@ acpi-$(CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC) += processor_pdc.o
>  acpi-y				+= ec.o
>  acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
>  acpi-y				+= pci_root.o pci_link.o pci_irq.o
> +obj-$(CONFIG_ACPI_MCFG)		+= pci_mcfg.o
>  acpi-y				+= acpi_lpss.o acpi_apd.o
>  acpi-y				+= acpi_platform.o
>  acpi-y				+= acpi_pnp.o
> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> new file mode 100644
> index 0000000..1847f74
> --- /dev/null
> +++ b/drivers/acpi/pci_mcfg.c
> @@ -0,0 +1,94 @@
> +/*
> + * Copyright (C) 2016 Broadcom
> + *	Author: Jayachandran C <jchandra@broadcom.com>
> + * Copyright (C) 2016 Semihalf
> + * 	Author: Tomasz Nowicki <tn@semihalf.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation (the "GPL").
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License version 2 (GPLv2) for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * version 2 (GPLv2) along with this source code.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/pci-acpi.h>
> +
> +/* Root pointer to the mapped MCFG table */
> +static struct acpi_table_mcfg *mcfg_table;
> +static int mcfg_entries;
> +
> +int pci_mcfg_lookup(struct acpi_pci_root *root)
> +{
> +	struct acpi_mcfg_allocation *mptr, *entry = NULL;
> +	struct resource *bus_res = &root->secondary;
> +	int i;
> +
> +	if (mcfg_table) {
> +		mptr = (struct acpi_mcfg_allocation *) &mcfg_table[1];
> +		for (i = 0; i < mcfg_entries && !entry; i++, mptr++)
> +			if (mptr->pci_segment == root->segment &&
> +			    mptr->start_bus_number == bus_res->start)
> +				entry = mptr;
> +	}
> +
> +	/* not found, use _CBA if available, else error */
> +	if (!entry) {
> +		if (root->mcfg_addr)
> +			return root->mcfg_addr;
> +		pr_err("%04x:%pR MCFG lookup failed\n", root->segment, bus_res);
> +		return -ENOENT;
> +	} else if (root->mcfg_addr && entry->address != root->mcfg_addr) {
> +		pr_warn("%04x:%pR CBA %pa != MCFG %lx, using CBA\n",
> +			root->segment, bus_res, &root->mcfg_addr,
> +			(unsigned long)entry->address);
> +		return 0;
> +	}
> +
> +	/* found matching entry, bus range check */
> +	if (entry->end_bus_number != bus_res->end) {
> +		resource_size_t bus_end = min_t(resource_size_t,
> +					entry->end_bus_number, bus_res->end);
> +		pr_warn("%04x:%pR bus end mismatch, using %02lx\n",
> +			root->segment, bus_res, (unsigned long)bus_end);
> +		bus_res->end = bus_end;
> +	}
> +
> +	if (!root->mcfg_addr)
> +		root->mcfg_addr = entry->address;

Let's hope that no one will ever implement a hotplug bridge with config
space starting at physical address 0x0.

Nit: You should define what the return value means. For success, once you
return the _CBA address, once 0; this should be consistent.

Anyway, this function is not easy to read but it may well be fine, I will let
Bjorn decide what to do with corner cases:

a) _CBA is != 0 and you get a MCFG entry that matches its address (I am
    not sure that capping the _CRS bus numbers is PCI compliant in that case)
b) bus_end capping, either you leave code as-is (that caps also _CRS) or
   just warn and fail if the bus->end numbers mismatch

Pending Bjorn's opinion on the above (and commit log update):

Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

> +	return 0;
> +}
> +
> +static __init int pci_mcfg_parse(struct acpi_table_header *header)
> +{
> +	if (header->length < sizeof(struct acpi_table_mcfg))
> +		return -EINVAL;
> +
> +	mcfg_entries = (header->length - sizeof(struct acpi_table_mcfg)) /
> +					sizeof(struct acpi_mcfg_allocation);
> +	if (mcfg_entries == 0) {
> +		pr_err("MCFG has no entries\n");
> +		return -EINVAL;
> +	}
> +
> +	mcfg_table = (struct acpi_table_mcfg *)header;
> +	pr_info("MCFG table detected, %d entries\n", mcfg_entries);
> +	return 0;
> +}
> +
> +/* Interface called by ACPI - parse and save MCFG table */
> +void __init pci_mmcfg_late_init(void)
> +{
> +	int err = acpi_table_parse(ACPI_SIG_MCFG, pci_mcfg_parse);
> +	if (err)
> +		pr_err("Failed to parse MCFG (%d)\n", err);
> +}
> diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
> index 89ab057..e0e6dfc 100644
> --- a/include/linux/pci-acpi.h
> +++ b/include/linux/pci-acpi.h
> @@ -24,6 +24,8 @@ static inline acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
>  }
>  extern phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle);
>  
> +extern int pci_mcfg_lookup(struct acpi_pci_root *root);
> +
>  static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
>  {
>  	struct pci_bus *pbus = pdev->bus;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index bba4053..9661c85 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1724,7 +1724,7 @@ void pcibios_free_irq(struct pci_dev *dev);
>  extern struct dev_pm_ops pcibios_pm_ops;
>  #endif
>  
> -#ifdef CONFIG_PCI_MMCONFIG
> +#if defined(CONFIG_PCI_MMCONFIG) || defined(CONFIG_ACPI_MCFG)
>  void __init pci_mmcfg_early_init(void);
>  void __init pci_mmcfg_late_init(void);
>  #else
> -- 
> 1.9.1
>
Tomasz Nowicki June 6, 2016, 12:55 p.m. UTC | #2
On 03.06.2016 13:38, Lorenzo Pieralisi wrote:
> On Mon, May 30, 2016 at 05:14:20PM +0200, Tomasz Nowicki wrote:
>> In order to handle PCI config space regions properly in ACPI, new MCFG
>> interface is defined which does sanity checks on MCFG table and keeps its
>> root pointer. The user is able to lookup MCFG regions based on
>> host bridge root structure and domain:bus_start:bus_end touple.
>> Use pci_mmcfg_late_init old prototype to avoid another function name.
>
> "According to PCI firmware specifications, on systems booting with ACPI,
> PCI configuration for a host bridge must be set-up through the MCFG table
> regions for non-hotpluggable bridges and _CBA method for hotpluggable ones.
>
> Current MCFG table handling code, as implemented for x86, cannot be
> easily generalized owing to x86 specific quirks handling and related
> code, which makes it hard to reuse on other architectures.
>
> In order to implement MCFG PCI configuration handling for new platforms
> booting with ACPI (eg ARM64) this patch re-implements MCFG handling from
> scratch in a streamlined fashion and provides (through a generic
> interface available to all arches):
>
> - Simplified MCFG table parsing (executed through the pci_mmcfg_late_init()
>    hook as in current x86)
> - MCFG regions look-up interface through domain:bus_start:bus_end tuple
>
> The new MCFG regions handling interface is added to generic ACPI code
> so that existing architectures (eg x86) can be moved over to it and
> architectures relying on MCFG for ACPI PCI config space can rely on it
> without having to resort to arch specific implementations."
>

[...]

>> +
>> +#include <linux/kernel.h>
>> +#include <linux/pci.h>
>> +#include <linux/pci-acpi.h>
>> +
>> +/* Root pointer to the mapped MCFG table */
>> +static struct acpi_table_mcfg *mcfg_table;
>> +static int mcfg_entries;
>> +
>> +int pci_mcfg_lookup(struct acpi_pci_root *root)
>> +{
>> +	struct acpi_mcfg_allocation *mptr, *entry = NULL;
>> +	struct resource *bus_res = &root->secondary;
>> +	int i;
>> +
>> +	if (mcfg_table) {
>> +		mptr = (struct acpi_mcfg_allocation *) &mcfg_table[1];
>> +		for (i = 0; i < mcfg_entries && !entry; i++, mptr++)
>> +			if (mptr->pci_segment == root->segment &&
>> +			    mptr->start_bus_number == bus_res->start)
>> +				entry = mptr;
>> +	}
>> +
>> +	/* not found, use _CBA if available, else error */
>> +	if (!entry) {
>> +		if (root->mcfg_addr)
>> +			return root->mcfg_addr;
>> +		pr_err("%04x:%pR MCFG lookup failed\n", root->segment, bus_res);
>> +		return -ENOENT;
>> +	} else if (root->mcfg_addr && entry->address != root->mcfg_addr) {
>> +		pr_warn("%04x:%pR CBA %pa != MCFG %lx, using CBA\n",
>> +			root->segment, bus_res, &root->mcfg_addr,
>> +			(unsigned long)entry->address);
>> +		return 0;
>> +	}
>> +
>> +	/* found matching entry, bus range check */
>> +	if (entry->end_bus_number != bus_res->end) {
>> +		resource_size_t bus_end = min_t(resource_size_t,
>> +					entry->end_bus_number, bus_res->end);
>> +		pr_warn("%04x:%pR bus end mismatch, using %02lx\n",
>> +			root->segment, bus_res, (unsigned long)bus_end);
>> +		bus_res->end = bus_end;
>> +	}
>> +
>> +	if (!root->mcfg_addr)
>> +		root->mcfg_addr = entry->address;
>
> Let's hope that no one will ever implement a hotplug bridge with config
> space starting at physical address 0x0.
>
> Nit: You should define what the return value means. For success, once you
> return the _CBA address, once 0; this should be consistent.

As we decided to return CFG start address in root->mcfg_addr we should 
return 0 for the case (!entry) && (root->mcfg_addr). I'll fix it.

>
> Anyway, this function is not easy to read but it may well be fine, I will let
> Bjorn decide what to do with corner cases:
>
> a) _CBA is != 0 and you get a MCFG entry that matches its address (I am
>      not sure that capping the _CRS bus numbers is PCI compliant in that case)
> b) bus_end capping, either you leave code as-is (that caps also _CRS) or
>     just warn and fail if the bus->end numbers mismatch
>
> Pending Bjorn's opinion on the above (and commit log update):
>
> Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
>

Thanks,
Tomasz
Bjorn Helgaas June 8, 2016, 1:56 a.m. UTC | #3
On Mon, May 30, 2016 at 05:14:20PM +0200, Tomasz Nowicki wrote:
> In order to handle PCI config space regions properly in ACPI, new MCFG
> interface is defined which does sanity checks on MCFG table and keeps its
> root pointer. The user is able to lookup MCFG regions based on
> host bridge root structure and domain:bus_start:bus_end touple.
> Use pci_mmcfg_late_init old prototype to avoid another function name.
> 
> Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
> Signed-off-by: Jayachandran C <jchandra@broadcom.com>
> ---
>  drivers/acpi/Kconfig     |  3 ++
>  drivers/acpi/Makefile    |  1 +
>  drivers/acpi/pci_mcfg.c  | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci-acpi.h |  2 ++
>  include/linux/pci.h      |  2 +-
>  5 files changed, 101 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/acpi/pci_mcfg.c
> 
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index b7e2e77..f98c328 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -217,6 +217,9 @@ config ACPI_PROCESSOR_IDLE
>  	bool
>  	select CPU_IDLE
>  
> +config ACPI_MCFG
> +	bool
> +
>  config ACPI_CPPC_LIB
>  	bool
>  	depends on ACPI_PROCESSOR
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index 251ce85..632e81f 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -40,6 +40,7 @@ acpi-$(CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC) += processor_pdc.o
>  acpi-y				+= ec.o
>  acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
>  acpi-y				+= pci_root.o pci_link.o pci_irq.o
> +obj-$(CONFIG_ACPI_MCFG)		+= pci_mcfg.o
>  acpi-y				+= acpi_lpss.o acpi_apd.o
>  acpi-y				+= acpi_platform.o
>  acpi-y				+= acpi_pnp.o
> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> new file mode 100644
> index 0000000..1847f74
> --- /dev/null
> +++ b/drivers/acpi/pci_mcfg.c
> @@ -0,0 +1,94 @@
> +/*
> + * Copyright (C) 2016 Broadcom
> + *	Author: Jayachandran C <jchandra@broadcom.com>
> + * Copyright (C) 2016 Semihalf
> + * 	Author: Tomasz Nowicki <tn@semihalf.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation (the "GPL").
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License version 2 (GPLv2) for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * version 2 (GPLv2) along with this source code.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/pci-acpi.h>
> +
> +/* Root pointer to the mapped MCFG table */
> +static struct acpi_table_mcfg *mcfg_table;
> +static int mcfg_entries;
> +
> +int pci_mcfg_lookup(struct acpi_pci_root *root)

I think this would be better if we passed in the domain and a pointer
to the bus range resource and returned the ECAM base address.  I don't
think we need to be connected to struct acpi_pci_root.

> +{
> +	struct acpi_mcfg_allocation *mptr, *entry = NULL;
> +	struct resource *bus_res = &root->secondary;
> +	int i;
> +
> +	if (mcfg_table) {
> +		mptr = (struct acpi_mcfg_allocation *) &mcfg_table[1];
> +		for (i = 0; i < mcfg_entries && !entry; i++, mptr++)
> +			if (mptr->pci_segment == root->segment &&
> +			    mptr->start_bus_number == bus_res->start)
> +				entry = mptr;
> +	}
> +
> +	/* not found, use _CBA if available, else error */
> +	if (!entry) {
> +		if (root->mcfg_addr)
> +			return root->mcfg_addr;
> +		pr_err("%04x:%pR MCFG lookup failed\n", root->segment, bus_res);
> +		return -ENOENT;
> +	} else if (root->mcfg_addr && entry->address != root->mcfg_addr) {
> +		pr_warn("%04x:%pR CBA %pa != MCFG %lx, using CBA\n",
> +			root->segment, bus_res, &root->mcfg_addr,
> +			(unsigned long)entry->address);
> +		return 0;
> +	}

I keep looking at this code, trying to find where we "use _CBA", but I
can't find it.  Oh, I see, acpi_pci_root_add() calls
acpi_pci_root_get_mcfg_addr() (which evaluates _CBA), and sets
root->mcfg_addr to the result.

"root->mcfg_addr" is sort of an unfortunate name because "MCFG" is the
ACPI table name, we've used "ECAM" for the memory-mapped config space,
and this address can come from either the MCFG table or the _CBA
method.

In the case where we have both _CBA and an MCFG entry, I think we
should prefer _CBA, and I'm not sure it's even worth warning about it.
So I think you could simplify this function if you made
pci_acpi_setup_ecam_mapping() look like this:

  ... pci_acpi_setup_ecam_mapping(...)
  {
    ...
    if (!root->mcfg_addr)
      root->mcfg_addr = pci_mcfg_lookup(root);

    if (!root->mcfg_addr) {
      dev_err(..., "no ECAM region found\n");
      return -EINVAL;
    }

> +
> +	/* found matching entry, bus range check */
> +	if (entry->end_bus_number != bus_res->end) {
> +		resource_size_t bus_end = min_t(resource_size_t,
> +					entry->end_bus_number, bus_res->end);
> +		pr_warn("%04x:%pR bus end mismatch, using %02lx\n",
> +			root->segment, bus_res, (unsigned long)bus_end);
> +		bus_res->end = bus_end;
> +	}
> +
> +	if (!root->mcfg_addr)
> +		root->mcfg_addr = entry->address;

Please move the assignment to the caller (I think Lorenzo pointed this
out already).

> +	return 0;
> +}
> +
> +static __init int pci_mcfg_parse(struct acpi_table_header *header)
> +{
> +	if (header->length < sizeof(struct acpi_table_mcfg))
> +		return -EINVAL;
> +
> +	mcfg_entries = (header->length - sizeof(struct acpi_table_mcfg)) /
> +					sizeof(struct acpi_mcfg_allocation);
> +	if (mcfg_entries == 0) {
> +		pr_err("MCFG has no entries\n");

Include an address here?  I'm not really sure either of the messages
here is necessary.  Users (callers of pci_mcfg_lookup()) will notice
if we can't find any ECAM space and will probably complain there,
where the message can include more information, e.g., the affected
device.

> +		return -EINVAL;
> +	}
> +
> +	mcfg_table = (struct acpi_table_mcfg *)header;
> +	pr_info("MCFG table detected, %d entries\n", mcfg_entries);
> +	return 0;
> +}
> +
> +/* Interface called by ACPI - parse and save MCFG table */

I think we save a *pointer* to the MCFG table, not the table itself.

And acpi_table_parse() calls early_acpi_os_unmap_memory() immediately
after it calls pci_mcfg_parse(), so I'm doubtful that the pointer
remains valid.

> +void __init pci_mmcfg_late_init(void)
> +{
> +	int err = acpi_table_parse(ACPI_SIG_MCFG, pci_mcfg_parse);
> +	if (err)
> +		pr_err("Failed to parse MCFG (%d)\n", err);
> +}
> diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
> index 89ab057..e0e6dfc 100644
> --- a/include/linux/pci-acpi.h
> +++ b/include/linux/pci-acpi.h
> @@ -24,6 +24,8 @@ static inline acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
>  }
>  extern phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle);
>  
> +extern int pci_mcfg_lookup(struct acpi_pci_root *root);
> +
>  static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
>  {
>  	struct pci_bus *pbus = pdev->bus;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index bba4053..9661c85 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1724,7 +1724,7 @@ void pcibios_free_irq(struct pci_dev *dev);
>  extern struct dev_pm_ops pcibios_pm_ops;
>  #endif
>  
> -#ifdef CONFIG_PCI_MMCONFIG
> +#if defined(CONFIG_PCI_MMCONFIG) || defined(CONFIG_ACPI_MCFG)
>  void __init pci_mmcfg_early_init(void);
>  void __init pci_mmcfg_late_init(void);
>  #else
> -- 
> 1.9.1
>
Tomasz Nowicki June 8, 2016, 12:21 p.m. UTC | #4
On 08.06.2016 03:56, Bjorn Helgaas wrote:
> On Mon, May 30, 2016 at 05:14:20PM +0200, Tomasz Nowicki wrote:
>> In order to handle PCI config space regions properly in ACPI, new MCFG
>> interface is defined which does sanity checks on MCFG table and keeps its
>> root pointer. The user is able to lookup MCFG regions based on
>> host bridge root structure and domain:bus_start:bus_end touple.
>> Use pci_mmcfg_late_init old prototype to avoid another function name.
>>
>> Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
>> Signed-off-by: Jayachandran C <jchandra@broadcom.com>
>> ---
>>   drivers/acpi/Kconfig     |  3 ++
>>   drivers/acpi/Makefile    |  1 +
>>   drivers/acpi/pci_mcfg.c  | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   include/linux/pci-acpi.h |  2 ++
>>   include/linux/pci.h      |  2 +-
>>   5 files changed, 101 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/acpi/pci_mcfg.c
>>
>> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
>> index b7e2e77..f98c328 100644
>> --- a/drivers/acpi/Kconfig
>> +++ b/drivers/acpi/Kconfig
>> @@ -217,6 +217,9 @@ config ACPI_PROCESSOR_IDLE
>>   	bool
>>   	select CPU_IDLE
>>
>> +config ACPI_MCFG
>> +	bool
>> +
>>   config ACPI_CPPC_LIB
>>   	bool
>>   	depends on ACPI_PROCESSOR
>> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
>> index 251ce85..632e81f 100644
>> --- a/drivers/acpi/Makefile
>> +++ b/drivers/acpi/Makefile
>> @@ -40,6 +40,7 @@ acpi-$(CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC) += processor_pdc.o
>>   acpi-y				+= ec.o
>>   acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
>>   acpi-y				+= pci_root.o pci_link.o pci_irq.o
>> +obj-$(CONFIG_ACPI_MCFG)		+= pci_mcfg.o
>>   acpi-y				+= acpi_lpss.o acpi_apd.o
>>   acpi-y				+= acpi_platform.o
>>   acpi-y				+= acpi_pnp.o
>> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
>> new file mode 100644
>> index 0000000..1847f74
>> --- /dev/null
>> +++ b/drivers/acpi/pci_mcfg.c
>> @@ -0,0 +1,94 @@
>> +/*
>> + * Copyright (C) 2016 Broadcom
>> + *	Author: Jayachandran C <jchandra@broadcom.com>
>> + * Copyright (C) 2016 Semihalf
>> + * 	Author: Tomasz Nowicki <tn@semihalf.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License, version 2, as
>> + * published by the Free Software Foundation (the "GPL").
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License version 2 (GPLv2) for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * version 2 (GPLv2) along with this source code.
>> + */
>> +
>> +#define pr_fmt(fmt) "ACPI: " fmt
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/pci.h>
>> +#include <linux/pci-acpi.h>
>> +
>> +/* Root pointer to the mapped MCFG table */
>> +static struct acpi_table_mcfg *mcfg_table;
>> +static int mcfg_entries;
>> +
>> +int pci_mcfg_lookup(struct acpi_pci_root *root)
>
> I think this would be better if we passed in the domain and a pointer
> to the bus range resource and returned the ECAM base address.  I don't
> think we need to be connected to struct acpi_pci_root.

I will use domain and bus range resource as you suggested.

>
>> +{
>> +	struct acpi_mcfg_allocation *mptr, *entry = NULL;
>> +	struct resource *bus_res = &root->secondary;
>> +	int i;
>> +
>> +	if (mcfg_table) {
>> +		mptr = (struct acpi_mcfg_allocation *) &mcfg_table[1];
>> +		for (i = 0; i < mcfg_entries && !entry; i++, mptr++)
>> +			if (mptr->pci_segment == root->segment &&
>> +			    mptr->start_bus_number == bus_res->start)
>> +				entry = mptr;
>> +	}
>> +
>> +	/* not found, use _CBA if available, else error */
>> +	if (!entry) {
>> +		if (root->mcfg_addr)
>> +			return root->mcfg_addr;
>> +		pr_err("%04x:%pR MCFG lookup failed\n", root->segment, bus_res);
>> +		return -ENOENT;
>> +	} else if (root->mcfg_addr && entry->address != root->mcfg_addr) {
>> +		pr_warn("%04x:%pR CBA %pa != MCFG %lx, using CBA\n",
>> +			root->segment, bus_res, &root->mcfg_addr,
>> +			(unsigned long)entry->address);
>> +		return 0;
>> +	}
>
> I keep looking at this code, trying to find where we "use _CBA", but I
> can't find it.  Oh, I see, acpi_pci_root_add() calls
> acpi_pci_root_get_mcfg_addr() (which evaluates _CBA), and sets
> root->mcfg_addr to the result.
>
> "root->mcfg_addr" is sort of an unfortunate name because "MCFG" is the
> ACPI table name, we've used "ECAM" for the memory-mapped config space,
> and this address can come from either the MCFG table or the _CBA
> method.
>
> In the case where we have both _CBA and an MCFG entry, I think we
> should prefer _CBA, and I'm not sure it's even worth warning about it.
> So I think you could simplify this function if you made
> pci_acpi_setup_ecam_mapping() look like this:
>
>    ... pci_acpi_setup_ecam_mapping(...)
>    {
>      ...
>      if (!root->mcfg_addr)
>        root->mcfg_addr = pci_mcfg_lookup(root);
>
>      if (!root->mcfg_addr) {
>        dev_err(..., "no ECAM region found\n");
>        return -EINVAL;
>      }

OK

>
>> +
>> +	/* found matching entry, bus range check */
>> +	if (entry->end_bus_number != bus_res->end) {
>> +		resource_size_t bus_end = min_t(resource_size_t,
>> +					entry->end_bus_number, bus_res->end);
>> +		pr_warn("%04x:%pR bus end mismatch, using %02lx\n",
>> +			root->segment, bus_res, (unsigned long)bus_end);
>> +		bus_res->end = bus_end;
>> +	}

What about bus end mismatch case? Should we trim the host bridge bus 
range or expect MCFG entry covers that range? Sometimes we get _BBN-0xFF 
bus range, not from _CRS.

>> +
>> +	if (!root->mcfg_addr)
>> +		root->mcfg_addr = entry->address;
>
> Please move the assignment to the caller (I think Lorenzo pointed this
> out already).
>
>> +	return 0;
>> +}
>> +
>> +static __init int pci_mcfg_parse(struct acpi_table_header *header)
>> +{
>> +	if (header->length < sizeof(struct acpi_table_mcfg))
>> +		return -EINVAL;
>> +
>> +	mcfg_entries = (header->length - sizeof(struct acpi_table_mcfg)) /
>> +					sizeof(struct acpi_mcfg_allocation);
>> +	if (mcfg_entries == 0) {
>> +		pr_err("MCFG has no entries\n");
>
> Include an address here?  I'm not really sure either of the messages
> here is necessary.  Users (callers of pci_mcfg_lookup()) will notice
> if we can't find any ECAM space and will probably complain there,
> where the message can include more information, e.g., the affected
> device.

I would keep message about how many entries we found here. It would be 
valuable information IMO.

>
>> +		return -EINVAL;
>> +	}
>> +
>> +	mcfg_table = (struct acpi_table_mcfg *)header;
>> +	pr_info("MCFG table detected, %d entries\n", mcfg_entries);
>> +	return 0;
>> +}
>> +
>> +/* Interface called by ACPI - parse and save MCFG table */
>
> I think we save a *pointer* to the MCFG table, not the table itself.

Right, the comment is broken.

>
> And acpi_table_parse() calls early_acpi_os_unmap_memory() immediately
> after it calls pci_mcfg_parse(), so I'm doubtful that the pointer
> remains valid.

At this stage early_acpi_os_unmap_memory() is doing nothing since 
acpi_early_init() set acpi_gbl_permanent_mmap to 1 way before. The 
pointer is fine then.

Thanks,
Tomasz
Bjorn Helgaas June 8, 2016, 1:17 p.m. UTC | #5
On Wed, Jun 08, 2016 at 02:21:30PM +0200, Tomasz Nowicki wrote:
> On 08.06.2016 03:56, Bjorn Helgaas wrote:
> >On Mon, May 30, 2016 at 05:14:20PM +0200, Tomasz Nowicki wrote:
> >>In order to handle PCI config space regions properly in ACPI, new MCFG
> >>interface is defined which does sanity checks on MCFG table and keeps its
> >>root pointer. The user is able to lookup MCFG regions based on
> >>host bridge root structure and domain:bus_start:bus_end touple.
> >>Use pci_mmcfg_late_init old prototype to avoid another function name.

> >>+	/* found matching entry, bus range check */
> >>+	if (entry->end_bus_number != bus_res->end) {
> >>+		resource_size_t bus_end = min_t(resource_size_t,
> >>+					entry->end_bus_number, bus_res->end);
> >>+		pr_warn("%04x:%pR bus end mismatch, using %02lx\n",
> >>+			root->segment, bus_res, (unsigned long)bus_end);
> >>+		bus_res->end = bus_end;
> >>+	}
> 
> What about bus end mismatch case? Should we trim the host bridge bus
> range or expect MCFG entry covers that range? Sometimes we get
> _BBN-0xFF bus range, not from _CRS.

Lack of a bus range in _CRS is a firmware defect.  There's a comment
about this in acpi_pci_root_add().  On x86, we probably had to live
with firmware in the field that had this defect.  I think we should
expect all ARM64 systems to provide a bus number range in _CRS, and
fail the attach if it's not there.

I don't think we should warn about an MCFG entry that covers more than
the _CRS bus range.  On x86, it's common to have something like:

  ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-7f])
  ACPI: PCI Root Bridge [PCI1] (domain 0000 [bus 80-ff])

with a single MCFG entry that covers [bus 00-ff].  That seems
reasonable and I don't think it's worth warning about it.

If the MCFG entry doesn't cover all of a _CRS bus range, we should
just fail so we can find and fix broken firmware.

> >>+/* Interface called by ACPI - parse and save MCFG table */
> >
> >I think we save a *pointer* to the MCFG table, not the table itself.
> 
> Right, the comment is broken.
> 
> >And acpi_table_parse() calls early_acpi_os_unmap_memory() immediately
> >after it calls pci_mcfg_parse(), so I'm doubtful that the pointer
> >remains valid.
> 
> At this stage early_acpi_os_unmap_memory() is doing nothing since
> acpi_early_init() set acpi_gbl_permanent_mmap to 1 way before. The
> pointer is fine then.

Hmmm...  I see your argument, but this is a problem waiting to happen.
We should not depend on the internal implementation of
early_acpi_os_unmap_memory().  The pattern of:

  y = x;
  unmap(x);
  z = *y;

is just broken and we shouldn't expect readers to recognize that "oh,
unmap() isn't really unmapping anything in this special case, so this
looks wrong but is really fine."

Bjorn
Tomasz Nowicki June 8, 2016, 1:44 p.m. UTC | #6
On 08.06.2016 15:17, Bjorn Helgaas wrote:
> On Wed, Jun 08, 2016 at 02:21:30PM +0200, Tomasz Nowicki wrote:
>> On 08.06.2016 03:56, Bjorn Helgaas wrote:
>>> On Mon, May 30, 2016 at 05:14:20PM +0200, Tomasz Nowicki wrote:
>>>> In order to handle PCI config space regions properly in ACPI, new MCFG
>>>> interface is defined which does sanity checks on MCFG table and keeps its
>>>> root pointer. The user is able to lookup MCFG regions based on
>>>> host bridge root structure and domain:bus_start:bus_end touple.
>>>> Use pci_mmcfg_late_init old prototype to avoid another function name.
>
>>>> +	/* found matching entry, bus range check */
>>>> +	if (entry->end_bus_number != bus_res->end) {
>>>> +		resource_size_t bus_end = min_t(resource_size_t,
>>>> +					entry->end_bus_number, bus_res->end);
>>>> +		pr_warn("%04x:%pR bus end mismatch, using %02lx\n",
>>>> +			root->segment, bus_res, (unsigned long)bus_end);
>>>> +		bus_res->end = bus_end;
>>>> +	}
>>
>> What about bus end mismatch case? Should we trim the host bridge bus
>> range or expect MCFG entry covers that range? Sometimes we get
>> _BBN-0xFF bus range, not from _CRS.
>
> Lack of a bus range in _CRS is a firmware defect.  There's a comment
> about this in acpi_pci_root_add().  On x86, we probably had to live
> with firmware in the field that had this defect.  I think we should
> expect all ARM64 systems to provide a bus number range in _CRS, and
> fail the attach if it's not there.
>
> I don't think we should warn about an MCFG entry that covers more than
> the _CRS bus range.  On x86, it's common to have something like:
>
>    ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-7f])
>    ACPI: PCI Root Bridge [PCI1] (domain 0000 [bus 80-ff])
>
> with a single MCFG entry that covers [bus 00-ff].  That seems
> reasonable and I don't think it's worth warning about it.
>
> If the MCFG entry doesn't cover all of a _CRS bus range, we should
> just fail so we can find and fix broken firmware.

Make sense to me.

>
>>>> +/* Interface called by ACPI - parse and save MCFG table */
>>>
>>> I think we save a *pointer* to the MCFG table, not the table itself.
>>
>> Right, the comment is broken.
>>
>>> And acpi_table_parse() calls early_acpi_os_unmap_memory() immediately
>>> after it calls pci_mcfg_parse(), so I'm doubtful that the pointer
>>> remains valid.
>>
>> At this stage early_acpi_os_unmap_memory() is doing nothing since
>> acpi_early_init() set acpi_gbl_permanent_mmap to 1 way before. The
>> pointer is fine then.
>
> Hmmm...  I see your argument, but this is a problem waiting to happen.
> We should not depend on the internal implementation of
> early_acpi_os_unmap_memory().  The pattern of:
>
>    y = x;
>    unmap(x);
>    z = *y;
>
> is just broken and we shouldn't expect readers to recognize that "oh,
> unmap() isn't really unmapping anything in this special case, so this
> looks wrong but is really fine."
>

Right, so we are back to MCFG cache.

Thanks,
Tomasz
diff mbox

Patch

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index b7e2e77..f98c328 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -217,6 +217,9 @@  config ACPI_PROCESSOR_IDLE
 	bool
 	select CPU_IDLE
 
+config ACPI_MCFG
+	bool
+
 config ACPI_CPPC_LIB
 	bool
 	depends on ACPI_PROCESSOR
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 251ce85..632e81f 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -40,6 +40,7 @@  acpi-$(CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC) += processor_pdc.o
 acpi-y				+= ec.o
 acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
 acpi-y				+= pci_root.o pci_link.o pci_irq.o
+obj-$(CONFIG_ACPI_MCFG)		+= pci_mcfg.o
 acpi-y				+= acpi_lpss.o acpi_apd.o
 acpi-y				+= acpi_platform.o
 acpi-y				+= acpi_pnp.o
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
new file mode 100644
index 0000000..1847f74
--- /dev/null
+++ b/drivers/acpi/pci_mcfg.c
@@ -0,0 +1,94 @@ 
+/*
+ * Copyright (C) 2016 Broadcom
+ *	Author: Jayachandran C <jchandra@broadcom.com>
+ * Copyright (C) 2016 Semihalf
+ * 	Author: Tomasz Nowicki <tn@semihalf.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#define pr_fmt(fmt) "ACPI: " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+
+/* Root pointer to the mapped MCFG table */
+static struct acpi_table_mcfg *mcfg_table;
+static int mcfg_entries;
+
+int pci_mcfg_lookup(struct acpi_pci_root *root)
+{
+	struct acpi_mcfg_allocation *mptr, *entry = NULL;
+	struct resource *bus_res = &root->secondary;
+	int i;
+
+	if (mcfg_table) {
+		mptr = (struct acpi_mcfg_allocation *) &mcfg_table[1];
+		for (i = 0; i < mcfg_entries && !entry; i++, mptr++)
+			if (mptr->pci_segment == root->segment &&
+			    mptr->start_bus_number == bus_res->start)
+				entry = mptr;
+	}
+
+	/* not found, use _CBA if available, else error */
+	if (!entry) {
+		if (root->mcfg_addr)
+			return root->mcfg_addr;
+		pr_err("%04x:%pR MCFG lookup failed\n", root->segment, bus_res);
+		return -ENOENT;
+	} else if (root->mcfg_addr && entry->address != root->mcfg_addr) {
+		pr_warn("%04x:%pR CBA %pa != MCFG %lx, using CBA\n",
+			root->segment, bus_res, &root->mcfg_addr,
+			(unsigned long)entry->address);
+		return 0;
+	}
+
+	/* found matching entry, bus range check */
+	if (entry->end_bus_number != bus_res->end) {
+		resource_size_t bus_end = min_t(resource_size_t,
+					entry->end_bus_number, bus_res->end);
+		pr_warn("%04x:%pR bus end mismatch, using %02lx\n",
+			root->segment, bus_res, (unsigned long)bus_end);
+		bus_res->end = bus_end;
+	}
+
+	if (!root->mcfg_addr)
+		root->mcfg_addr = entry->address;
+	return 0;
+}
+
+static __init int pci_mcfg_parse(struct acpi_table_header *header)
+{
+	if (header->length < sizeof(struct acpi_table_mcfg))
+		return -EINVAL;
+
+	mcfg_entries = (header->length - sizeof(struct acpi_table_mcfg)) /
+					sizeof(struct acpi_mcfg_allocation);
+	if (mcfg_entries == 0) {
+		pr_err("MCFG has no entries\n");
+		return -EINVAL;
+	}
+
+	mcfg_table = (struct acpi_table_mcfg *)header;
+	pr_info("MCFG table detected, %d entries\n", mcfg_entries);
+	return 0;
+}
+
+/* Interface called by ACPI - parse and save MCFG table */
+void __init pci_mmcfg_late_init(void)
+{
+	int err = acpi_table_parse(ACPI_SIG_MCFG, pci_mcfg_parse);
+	if (err)
+		pr_err("Failed to parse MCFG (%d)\n", err);
+}
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 89ab057..e0e6dfc 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -24,6 +24,8 @@  static inline acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
 }
 extern phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle);
 
+extern int pci_mcfg_lookup(struct acpi_pci_root *root);
+
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
 	struct pci_bus *pbus = pdev->bus;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bba4053..9661c85 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1724,7 +1724,7 @@  void pcibios_free_irq(struct pci_dev *dev);
 extern struct dev_pm_ops pcibios_pm_ops;
 #endif
 
-#ifdef CONFIG_PCI_MMCONFIG
+#if defined(CONFIG_PCI_MMCONFIG) || defined(CONFIG_ACPI_MCFG)
 void __init pci_mmcfg_early_init(void);
 void __init pci_mmcfg_late_init(void);
 #else