diff mbox

[v6,6/6] pci: Add support for creating a generic host_bridge from device tree

Message ID 1394020137-1830-7-git-send-email-Liviu.Dudau@arm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

liviu.dudau@arm.com March 5, 2014, 11:48 a.m. UTC
Several platforms use a rather generic version of parsing
the device tree to find the host bridge ranges. Move the common code
into the generic PCI code and use it to create a pci_host_bridge
structure that can be used by arch code.

Based on early attempts by Andrew Murray to unify the code.
Used powerpc and microblaze PCI code as starting point.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Tested-by: Tanmay Inamdar <tinamdar@apm.com>
---
 drivers/pci/host-bridge.c | 156 ++++++++++++++++++++++++++++++++++++
 include/linux/pci.h       |  13 +++
 2 files changed, 169 insertions(+)

Comments

Grant Likely March 7, 2014, 9:14 p.m. UTC | #1
On Wed,  5 Mar 2014 11:48:57 +0000, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> Several platforms use a rather generic version of parsing
> the device tree to find the host bridge ranges. Move the common code
> into the generic PCI code and use it to create a pci_host_bridge
> structure that can be used by arch code.
> 
> Based on early attempts by Andrew Murray to unify the code.
> Used powerpc and microblaze PCI code as starting point.
> 
> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> Tested-by: Tanmay Inamdar <tinamdar@apm.com>

Tentative ack for the whole series conditional on Arnd or Ben

g.

> ---
>  drivers/pci/host-bridge.c | 156 ++++++++++++++++++++++++++++++++++++
>  include/linux/pci.h       |  13 +++
>  2 files changed, 169 insertions(+)
> 
> diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
> index 8708b652..db9f51a 100644
> --- a/drivers/pci/host-bridge.c
> +++ b/drivers/pci/host-bridge.c
> @@ -6,9 +6,14 @@
>  #include <linux/init.h>
>  #include <linux/pci.h>
>  #include <linux/module.h>
> +#include <linux/of_address.h>
> +#include <linux/of_pci.h>
> +#include <linux/slab.h>
>  
>  #include "pci.h"
>  
> +static atomic_t domain_nr = ATOMIC_INIT(-1);
> +
>  static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
>  {
>  	while (bus->parent)
> @@ -92,3 +97,154 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
>  	res->end = region->end + offset;
>  }
>  EXPORT_SYMBOL(pcibios_bus_to_resource);
> +
> +#ifdef CONFIG_OF
> +/**
> + * Simple version of the platform specific code for filtering the list
> + * of resources obtained from the ranges declaration in DT.
> + *
> + * Platforms can override this function in order to impose stronger
> + * constraints onto the list of resources that a host bridge can use.
> + * The filtered list will then be used to create a root bus and associate
> + * it with the host bridge.
> + *
> + */
> +int __weak pcibios_fixup_bridge_ranges(struct list_head *resources)
> +{
> +	return 0;
> +}
> +
> +/**
> + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> + * @dev: device node of the host bridge having the range property
> + * @resources: list where the range of resources will be added after DT parsing
> + * @io_base: pointer to a variable that will contain the physical address for
> + * the start of the I/O range.
> + *
> + * It is the callers job to free the @resources list if an error is returned.
> + *
> + * This function will parse the "ranges" property of a PCI host bridge device
> + * node and setup the resource mapping based on its content. It is expected
> + * that the property conforms with the Power ePAPR document.
> + *
> + * Each architecture is then offered the chance of applying their own
> + * filtering of pci_host_bridge_windows based on their own restrictions by
> + * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
> + * can then be used when creating a pci_host_bridge structure.
> + */
> +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> +		struct list_head *resources, resource_size_t *io_base)
> +{
> +	struct resource *res;
> +	struct of_pci_range range;
> +	struct of_pci_range_parser parser;
> +	int err;
> +
> +	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> +
> +	/* Check for ranges property */
> +	err = of_pci_range_parser_init(&parser, dev);
> +	if (err)
> +		return err;
> +
> +	pr_debug("Parsing ranges property...\n");
> +	for_each_of_pci_range(&parser, &range) {
> +		/* Read next ranges element */
> +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> +				range.pci_space, range.pci_addr);
> +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> +					range.cpu_addr, range.size);
> +
> +		/*
> +		 * If we failed translation or got a zero-sized region
> +		 * then skip this range
> +		 */
> +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> +			continue;
> +
> +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> +		if (!res)
> +			return -ENOMEM;
> +
> +		of_pci_range_to_resource(&range, dev, res);
> +
> +		if (resource_type(res) == IORESOURCE_IO)
> +			*io_base = range.cpu_addr;
> +
> +		pci_add_resource_offset(resources, res,
> +				res->start - range.pci_addr);
> +	}
> +
> +	/* Apply architecture specific fixups for the ranges */
> +	return pcibios_fixup_bridge_ranges(resources);
> +}
> +
> +/**
> + * of_create_pci_host_bridge - Create a PCI host bridge structure using
> + * information passed in the DT.
> + * @parent: device owning this host bridge
> + * @ops: pci_ops associated with the host controller
> + * @host_data: opaque data structure used by the host controller.
> + *
> + * returns a pointer to the newly created pci_host_bridge structure, or
> + * NULL if the call failed.
> + *
> + * This function will try to obtain the host bridge domain number by
> + * using of_alias_get_id() call with "pci-domain" as a stem. If that
> + * fails, a local allocator will be used that will put each host bridge
> + * in a new domain.
> + */
> +struct pci_host_bridge *
> +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
> +{
> +	int err, domain, busno;
> +	struct resource *bus_range;
> +	struct pci_bus *root_bus;
> +	struct pci_host_bridge *bridge;
> +	resource_size_t io_base;
> +	LIST_HEAD(res);
> +
> +	bus_range = kzalloc(sizeof(*bus_range), GFP_KERNEL);
> +	if (!bus_range)
> +		return ERR_PTR(-ENOMEM);
> +
> +	domain = of_alias_get_id(parent->of_node, "pci-domain");
> +	if (domain == -ENODEV)
> +		domain = atomic_inc_return(&domain_nr);
> +
> +	err = of_pci_parse_bus_range(parent->of_node, bus_range);
> +	if (err) {
> +		dev_info(parent, "No bus range for %s, using default [0-255]\n",
> +			parent->of_node->full_name);
> +		bus_range->start = 0;
> +		bus_range->end = 255;
> +		bus_range->flags = IORESOURCE_BUS;
> +	}
> +	busno = bus_range->start;
> +	pci_add_resource(&res, bus_range);
> +
> +	/* now parse the rest of host bridge bus ranges */
> +	err = pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base);
> +	if (err)
> +		goto err_create;
> +
> +	/* then create the root bus */
> +	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
> +						ops, host_data, &res);
> +	if (IS_ERR(root_bus)) {
> +		err = PTR_ERR(root_bus);
> +		goto err_create;
> +	}
> +
> +	bridge = to_pci_host_bridge(root_bus->bridge);
> +	bridge->io_base = io_base;
> +
> +	return bridge;
> +
> +err_create:
> +	pci_free_resource_list(&res);
> +	return ERR_PTR(err);
> +}
> +EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
> +
> +#endif /* CONFIG_OF */
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 1eed009..40ddd3d 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -395,6 +395,7 @@ struct pci_host_bridge {
>  	struct device dev;
>  	struct pci_bus *bus;		/* root bus */
>  	int domain_nr;
> +	resource_size_t io_base;	/* physical address for the start of I/O area */
>  	struct list_head windows;	/* pci_host_bridge_windows */
>  	void (*release_fn)(struct pci_host_bridge *);
>  	void *release_data;
> @@ -1786,11 +1787,23 @@ static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
>  	return bus ? bus->dev.of_node : NULL;
>  }
>  
> +struct pci_host_bridge *
> +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> +			void *host_data);
> +
> +int pcibios_fixup_bridge_ranges(struct list_head *resources);
>  #else /* CONFIG_OF */
>  static inline void pci_set_of_node(struct pci_dev *dev) { }
>  static inline void pci_release_of_node(struct pci_dev *dev) { }
>  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
>  static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
> +
> +static inline struct pci_host_bridge *
> +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> +			void *host_data)
> +{
> +	return NULL;
> +}
>  #endif  /* CONFIG_OF */
>  
>  #ifdef CONFIG_EEH
> -- 
> 1.9.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau March 8, 2014, 10:29 a.m. UTC | #2
On Fri, Mar 07, 2014 at 09:14:27PM +0000, Grant Likely wrote:
> On Wed,  5 Mar 2014 11:48:57 +0000, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > Several platforms use a rather generic version of parsing
> > the device tree to find the host bridge ranges. Move the common code
> > into the generic PCI code and use it to create a pci_host_bridge
> > structure that can be used by arch code.
> > 
> > Based on early attempts by Andrew Murray to unify the code.
> > Used powerpc and microblaze PCI code as starting point.
> > 
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > Tested-by: Tanmay Inamdar <tinamdar@apm.com>
> 
> Tentative ack for the whole series conditional on Arnd or Ben

Thanks Grant!

Liviu

> 
> g.
> 
> > ---
> >  drivers/pci/host-bridge.c | 156 ++++++++++++++++++++++++++++++++++++
> >  include/linux/pci.h       |  13 +++
> >  2 files changed, 169 insertions(+)
> > 
> > diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
> > index 8708b652..db9f51a 100644
> > --- a/drivers/pci/host-bridge.c
> > +++ b/drivers/pci/host-bridge.c
> > @@ -6,9 +6,14 @@
> >  #include <linux/init.h>
> >  #include <linux/pci.h>
> >  #include <linux/module.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_pci.h>
> > +#include <linux/slab.h>
> >  
> >  #include "pci.h"
> >  
> > +static atomic_t domain_nr = ATOMIC_INIT(-1);
> > +
> >  static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
> >  {
> >  	while (bus->parent)
> > @@ -92,3 +97,154 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
> >  	res->end = region->end + offset;
> >  }
> >  EXPORT_SYMBOL(pcibios_bus_to_resource);
> > +
> > +#ifdef CONFIG_OF
> > +/**
> > + * Simple version of the platform specific code for filtering the list
> > + * of resources obtained from the ranges declaration in DT.
> > + *
> > + * Platforms can override this function in order to impose stronger
> > + * constraints onto the list of resources that a host bridge can use.
> > + * The filtered list will then be used to create a root bus and associate
> > + * it with the host bridge.
> > + *
> > + */
> > +int __weak pcibios_fixup_bridge_ranges(struct list_head *resources)
> > +{
> > +	return 0;
> > +}
> > +
> > +/**
> > + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> > + * @dev: device node of the host bridge having the range property
> > + * @resources: list where the range of resources will be added after DT parsing
> > + * @io_base: pointer to a variable that will contain the physical address for
> > + * the start of the I/O range.
> > + *
> > + * It is the callers job to free the @resources list if an error is returned.
> > + *
> > + * This function will parse the "ranges" property of a PCI host bridge device
> > + * node and setup the resource mapping based on its content. It is expected
> > + * that the property conforms with the Power ePAPR document.
> > + *
> > + * Each architecture is then offered the chance of applying their own
> > + * filtering of pci_host_bridge_windows based on their own restrictions by
> > + * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
> > + * can then be used when creating a pci_host_bridge structure.
> > + */
> > +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> > +		struct list_head *resources, resource_size_t *io_base)
> > +{
> > +	struct resource *res;
> > +	struct of_pci_range range;
> > +	struct of_pci_range_parser parser;
> > +	int err;
> > +
> > +	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> > +
> > +	/* Check for ranges property */
> > +	err = of_pci_range_parser_init(&parser, dev);
> > +	if (err)
> > +		return err;
> > +
> > +	pr_debug("Parsing ranges property...\n");
> > +	for_each_of_pci_range(&parser, &range) {
> > +		/* Read next ranges element */
> > +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > +				range.pci_space, range.pci_addr);
> > +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > +					range.cpu_addr, range.size);
> > +
> > +		/*
> > +		 * If we failed translation or got a zero-sized region
> > +		 * then skip this range
> > +		 */
> > +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > +			continue;
> > +
> > +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > +		if (!res)
> > +			return -ENOMEM;
> > +
> > +		of_pci_range_to_resource(&range, dev, res);
> > +
> > +		if (resource_type(res) == IORESOURCE_IO)
> > +			*io_base = range.cpu_addr;
> > +
> > +		pci_add_resource_offset(resources, res,
> > +				res->start - range.pci_addr);
> > +	}
> > +
> > +	/* Apply architecture specific fixups for the ranges */
> > +	return pcibios_fixup_bridge_ranges(resources);
> > +}
> > +
> > +/**
> > + * of_create_pci_host_bridge - Create a PCI host bridge structure using
> > + * information passed in the DT.
> > + * @parent: device owning this host bridge
> > + * @ops: pci_ops associated with the host controller
> > + * @host_data: opaque data structure used by the host controller.
> > + *
> > + * returns a pointer to the newly created pci_host_bridge structure, or
> > + * NULL if the call failed.
> > + *
> > + * This function will try to obtain the host bridge domain number by
> > + * using of_alias_get_id() call with "pci-domain" as a stem. If that
> > + * fails, a local allocator will be used that will put each host bridge
> > + * in a new domain.
> > + */
> > +struct pci_host_bridge *
> > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
> > +{
> > +	int err, domain, busno;
> > +	struct resource *bus_range;
> > +	struct pci_bus *root_bus;
> > +	struct pci_host_bridge *bridge;
> > +	resource_size_t io_base;
> > +	LIST_HEAD(res);
> > +
> > +	bus_range = kzalloc(sizeof(*bus_range), GFP_KERNEL);
> > +	if (!bus_range)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	domain = of_alias_get_id(parent->of_node, "pci-domain");
> > +	if (domain == -ENODEV)
> > +		domain = atomic_inc_return(&domain_nr);
> > +
> > +	err = of_pci_parse_bus_range(parent->of_node, bus_range);
> > +	if (err) {
> > +		dev_info(parent, "No bus range for %s, using default [0-255]\n",
> > +			parent->of_node->full_name);
> > +		bus_range->start = 0;
> > +		bus_range->end = 255;
> > +		bus_range->flags = IORESOURCE_BUS;
> > +	}
> > +	busno = bus_range->start;
> > +	pci_add_resource(&res, bus_range);
> > +
> > +	/* now parse the rest of host bridge bus ranges */
> > +	err = pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base);
> > +	if (err)
> > +		goto err_create;
> > +
> > +	/* then create the root bus */
> > +	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
> > +						ops, host_data, &res);
> > +	if (IS_ERR(root_bus)) {
> > +		err = PTR_ERR(root_bus);
> > +		goto err_create;
> > +	}
> > +
> > +	bridge = to_pci_host_bridge(root_bus->bridge);
> > +	bridge->io_base = io_base;
> > +
> > +	return bridge;
> > +
> > +err_create:
> > +	pci_free_resource_list(&res);
> > +	return ERR_PTR(err);
> > +}
> > +EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
> > +
> > +#endif /* CONFIG_OF */
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 1eed009..40ddd3d 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -395,6 +395,7 @@ struct pci_host_bridge {
> >  	struct device dev;
> >  	struct pci_bus *bus;		/* root bus */
> >  	int domain_nr;
> > +	resource_size_t io_base;	/* physical address for the start of I/O area */
> >  	struct list_head windows;	/* pci_host_bridge_windows */
> >  	void (*release_fn)(struct pci_host_bridge *);
> >  	void *release_data;
> > @@ -1786,11 +1787,23 @@ static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
> >  	return bus ? bus->dev.of_node : NULL;
> >  }
> >  
> > +struct pci_host_bridge *
> > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> > +			void *host_data);
> > +
> > +int pcibios_fixup_bridge_ranges(struct list_head *resources);
> >  #else /* CONFIG_OF */
> >  static inline void pci_set_of_node(struct pci_dev *dev) { }
> >  static inline void pci_release_of_node(struct pci_dev *dev) { }
> >  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
> >  static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
> > +
> > +static inline struct pci_host_bridge *
> > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> > +			void *host_data)
> > +{
> > +	return NULL;
> > +}
> >  #endif  /* CONFIG_OF */
> >  
> >  #ifdef CONFIG_EEH
> > -- 
> > 1.9.0
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann March 8, 2014, 5:15 p.m. UTC | #3
On Wednesday 05 March 2014, Liviu Dudau wrote:
> +
> +	pr_debug("Parsing ranges property...\n");
> +	for_each_of_pci_range(&parser, &range) {
> +		/* Read next ranges element */
> +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> +				range.pci_space, range.pci_addr);
> +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> +					range.cpu_addr, range.size);
> +
> +		/*
> +		 * If we failed translation or got a zero-sized region
> +		 * then skip this range
> +		 */
> +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> +			continue;
> +
> +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> +		if (!res)
> +			return -ENOMEM;
> +
> +		of_pci_range_to_resource(&range, dev, res);
> +
> +		if (resource_type(res) == IORESOURCE_IO)
> +			*io_base = range.cpu_addr;
> +
> +		pci_add_resource_offset(resources, res,
> +				res->start - range.pci_addr);
> +	}

As mentioned regarding the pci_register_io_range() helper, x86
would not enter the 'resource_type(res) == IORESOURCE_IO' code path,
which on the one hand is fine so we can return an error from
pci_register_io_range() there, but I think it will lead to
io_base getting an uninitialized content.

There could also be other reasons why pci_register_io_range() fails,
e.g. because the space is exhausted, and I think we should try to
catch that here and skip the pci_add_resource_offset() and io_base
assignment then.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
liviu.dudau@arm.com March 10, 2014, 2:44 p.m. UTC | #4
On Sat, Mar 08, 2014 at 05:15:08PM +0000, Arnd Bergmann wrote:
> On Wednesday 05 March 2014, Liviu Dudau wrote:
> > +
> > +	pr_debug("Parsing ranges property...\n");
> > +	for_each_of_pci_range(&parser, &range) {
> > +		/* Read next ranges element */
> > +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > +				range.pci_space, range.pci_addr);
> > +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > +					range.cpu_addr, range.size);
> > +
> > +		/*
> > +		 * If we failed translation or got a zero-sized region
> > +		 * then skip this range
> > +		 */
> > +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > +			continue;
> > +
> > +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > +		if (!res)
> > +			return -ENOMEM;
> > +
> > +		of_pci_range_to_resource(&range, dev, res);
> > +
> > +		if (resource_type(res) == IORESOURCE_IO)
> > +			*io_base = range.cpu_addr;
> > +
> > +		pci_add_resource_offset(resources, res,
> > +				res->start - range.pci_addr);
> > +	}
> 
> As mentioned regarding the pci_register_io_range() helper, x86
> would not enter the 'resource_type(res) == IORESOURCE_IO' code path,
> which on the one hand is fine so we can return an error from
> pci_register_io_range() there, but I think it will lead to
> io_base getting an uninitialized content.
> 
> There could also be other reasons why pci_register_io_range() fails,
> e.g. because the space is exhausted, and I think we should try to
> catch that here and skip the pci_add_resource_offset() and io_base
> assignment then.

Hi Arnd,

I will try to improve the error handling in the next patchset version.
However I am still confused about the earlier discussion on
pci_register_io_range(). Your suggestion initially was to return an
error in the default weak implementation, but in your last email you
are talking about returning 'port'. My idea when I've introduced the
helper function was that it would return an error if it fails to
register the IO range and zero otherwise. I agree that we can treat
the default 'do nothing with the IO range' case as an error, with
the caveat that will force architectures that use this code to
provide their own implementation of pci_register_io_range() in order
to avoid failure, even for the cases where the architecture has a 1:1
mapping between IO and CPU addresses.

I've just noticed that my home server has silently dropped my reply
to you from the 7th of March, so I'm going to resend it using ARM's
setup.

Best regards,
Liviu


> 
> 	Arnd
>
Arnd Bergmann March 10, 2014, 3:21 p.m. UTC | #5
On Monday 10 March 2014 14:44:14 Liviu Dudau wrote:
> I will try to improve the error handling in the next patchset version.
> However I am still confused about the earlier discussion on
> pci_register_io_range(). Your suggestion initially was to return an
> error in the default weak implementation, but in your last email you
> are talking about returning 'port'.

You can do either one: 'port' should be positive or zero, while the
error would always be negative. We do the same thing in many interfaces
in the kernel.

> My idea when I've introduced the
> helper function was that it would return an error if it fails to
> register the IO range and zero otherwise. I agree that we can treat
> the default 'do nothing with the IO range' case as an error, with
> the caveat that will force architectures that use this code to
> provide their own implementation of pci_register_io_range() in order
> to avoid failure, even for the cases where the architecture has a 1:1
> mapping between IO and CPU addresses.

Which architectures are you thinking of? The only one I know that
does this is ia64, and we won't ever have to support this helper
on that architecture.

I did not ask to treat 'do nothing with the IO range' as an error,
what I meant is that we should treat 'architecture cannot translate
from I/O space to memory space but DT lists a translation anyway'
as an error. On x86, you should never see an entry for the I/O space
in "ranges", so we will not call this function unless there is a
bug in DT.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
liviu.dudau@arm.com March 10, 2014, 4:33 p.m. UTC | #6
On Mon, Mar 10, 2014 at 03:21:01PM +0000, Arnd Bergmann wrote:
> On Monday 10 March 2014 14:44:14 Liviu Dudau wrote:
> > I will try to improve the error handling in the next patchset version.
> > However I am still confused about the earlier discussion on
> > pci_register_io_range(). Your suggestion initially was to return an
> > error in the default weak implementation, but in your last email you
> > are talking about returning 'port'.
> 
> You can do either one: 'port' should be positive or zero, while the
> error would always be negative. We do the same thing in many interfaces
> in the kernel.
> 
> > My idea when I've introduced the
> > helper function was that it would return an error if it fails to
> > register the IO range and zero otherwise. I agree that we can treat
> > the default 'do nothing with the IO range' case as an error, with
> > the caveat that will force architectures that use this code to
> > provide their own implementation of pci_register_io_range() in order
> > to avoid failure, even for the cases where the architecture has a 1:1
> > mapping between IO and CPU addresses.
> 
> Which architectures are you thinking of? The only one I know that
> does this is ia64, and we won't ever have to support this helper
> on that architecture.

I was thinking about architectures that have IO_SPACE_LIMIT >= 0xffffffff.
While not an absolute indicator, with the default pci_address_to_pio()
that means that they can use the CPU MMIO address as IO address directly.

$ git grep IO_SPACE_LIMIT | grep -i ffffffff
arch/arm/include/asm/io.h:#define IO_SPACE_LIMIT ((resource_size_t)0xffffffff)
arch/arm/mach-at91/include/mach/io.h:#define IO_SPACE_LIMIT		0xFFFFFFFF
arch/arm/mach-omap1/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
arch/arm/mach-pxa/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
arch/arm/mach-s3c24xx/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
arch/avr32/include/asm/io.h:#define IO_SPACE_LIMIT	0xffffffff
arch/frv/include/asm/io.h:#define IO_SPACE_LIMIT	0xffffffff
arch/ia64/include/asm/io.h:#define IO_SPACE_LIMIT		0xffffffffffffffffUL
arch/m32r/include/asm/io.h:#define IO_SPACE_LIMIT  0xFFFFFFFF
arch/m68k/include/asm/io_no.h:#define IO_SPACE_LIMIT 0xffffffff
arch/microblaze/include/asm/io.h:#define IO_SPACE_LIMIT (0xFFFFFFFF)
arch/mn10300/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff
arch/sh/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff
arch/sparc/include/asm/io_32.h:#define IO_SPACE_LIMIT 0xffffffff
arch/sparc/include/asm/io_64.h:#define IO_SPACE_LIMIT 0xffffffffffffffffUL
arch/tile/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff


> 
> I did not ask to treat 'do nothing with the IO range' as an error,
> what I meant is that we should treat 'architecture cannot translate
> from I/O space to memory space but DT lists a translation anyway'
> as an error. On x86, you should never see an entry for the I/O space
> in "ranges", so we will not call this function unless there is a
> bug in DT.

Ok, it's just that there is no "architecture cannot translate from I/O
space to memory space" indicator anywhere and I don't want to make x86
a special case.

So, my proposal is this: default weak implementation of pci_register_io_range()
returns an error (meaning I have no idea how to translate IO addresses
to memory space) and anyone that wants this function to return success will
have to provide their implementation.

I will send an updated series.

Best regards,
Liviu

> 
> 	Arnd
> 
>
Arnd Bergmann March 10, 2014, 6:59 p.m. UTC | #7
On Monday 10 March 2014 16:33:44 Liviu Dudau wrote:
> On Mon, Mar 10, 2014 at 03:21:01PM +0000, Arnd Bergmann wrote:
> > On Monday 10 March 2014 14:44:14 Liviu Dudau wrote:
> > > I will try to improve the error handling in the next patchset version.
> > > However I am still confused about the earlier discussion on
> > > pci_register_io_range(). Your suggestion initially was to return an
> > > error in the default weak implementation, but in your last email you
> > > are talking about returning 'port'.
> > 
> > You can do either one: 'port' should be positive or zero, while the
> > error would always be negative. We do the same thing in many interfaces
> > in the kernel.
> > 
> > > My idea when I've introduced the
> > > helper function was that it would return an error if it fails to
> > > register the IO range and zero otherwise. I agree that we can treat
> > > the default 'do nothing with the IO range' case as an error, with
> > > the caveat that will force architectures that use this code to
> > > provide their own implementation of pci_register_io_range() in order
> > > to avoid failure, even for the cases where the architecture has a 1:1
> > > mapping between IO and CPU addresses.
> > 
> > Which architectures are you thinking of? The only one I know that
> > does this is ia64, and we won't ever have to support this helper
> > on that architecture.
> 
> I was thinking about architectures that have IO_SPACE_LIMIT >= 0xffffffff.
> While not an absolute indicator, with the default pci_address_to_pio()
> that means that they can use the CPU MMIO address as IO address directly.

Not really, that would only work if they also have instructions to do
raw accesses to physical memory addresses rather than virtual memory
pointers that most architectures do.

> $ git grep IO_SPACE_LIMIT | grep -i ffffffff
> arch/arm/include/asm/io.h:#define IO_SPACE_LIMIT ((resource_size_t)0xffffffff)
> arch/arm/mach-at91/include/mach/io.h:#define IO_SPACE_LIMIT		0xFFFFFFFF
> arch/arm/mach-omap1/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
> arch/arm/mach-pxa/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
> arch/arm/mach-s3c24xx/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff

These use a special trick where an __iomem pointer is the same as
the port number. This works most of the time, but breaks anything
that assumes that port numbers are low, such as /dev/port or
broken devices. Moreover, it means your code won't work because
it depends on passing the virtual start address of the PIO mapping
window as io_offset.

> arch/avr32/include/asm/io.h:#define IO_SPACE_LIMIT	0xffffffff
> arch/frv/include/asm/io.h:#define IO_SPACE_LIMIT	0xffffffff

They have no MMU, and the code relies on the port number to match
both the virtual and the physical address. You could be right about
these, but I would guess that the code also needs some other
changes if we want to make it work on nommu kernels. It also depends
on whether the I/O bus address is the same as the CPU address, or
if it starts at bus address 0.

> arch/ia64/include/asm/io.h:#define IO_SPACE_LIMIT		0xffffffffffffffffUL

Here, the definition is special, the token is just used to encode
a space number and an offset within the I/O space.

> arch/m32r/include/asm/io.h:#define IO_SPACE_LIMIT  0xFFFFFFFF

no PCI here.

> arch/m68k/include/asm/io_no.h:#define IO_SPACE_LIMIT 0xffffffff

This looks like a mistake, it should be smaller

> arch/microblaze/include/asm/io.h:#define IO_SPACE_LIMIT (0xFFFFFFFF)

I suspect it doesn't actually work. microblaze copied large parts
of this from PowerPC, but the parts that differ apparently get
it wrong for the I/O space. 

> arch/mn10300/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff

Same category as frv. We should ask David Howells whether he
thinks I/O space actually works on these.

> arch/sh/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff

I think this should just be 0xffff.

> arch/sparc/include/asm/io_32.h:#define IO_SPACE_LIMIT 0xffffffff
> arch/sparc/include/asm/io_64.h:#define IO_SPACE_LIMIT 0xffffffffffffffffUL

Sparc actually accesses the physical addresses, so in theory
it could always work. In the 64-bit case it would however have
to check that the port number is smaller than 0xffffffff, otherwise
you couldn't set the BAR. This means you still need a custom
function.

> arch/tile/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff

tile seems to support only ioport_map() but not inb/outb, if I'm
reading this right.

> > I did not ask to treat 'do nothing with the IO range' as an error,
> > what I meant is that we should treat 'architecture cannot translate
> > from I/O space to memory space but DT lists a translation anyway'
> > as an error. On x86, you should never see an entry for the I/O space
> > in "ranges", so we will not call this function unless there is a
> > bug in DT.
> 
> Ok, it's just that there is no "architecture cannot translate from I/O
> space to memory space" indicator anywhere and I don't want to make x86
> a special case.

Right.

> So, my proposal is this: default weak implementation of pci_register_io_range()
> returns an error (meaning I have no idea how to translate IO addresses
> to memory space) and anyone that wants this function to return success will
> have to provide their implementation.

Another idea: make this conditional on the definition of PCI_IOBASE: If this
is defined, we can use the arm64 version that uses this number. Otherwise
we fall back to returning an error, which means that either on the
architecture we shouldn't be calling that function, or we need a custom
implementation.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Geert Uytterhoeven March 10, 2014, 7:16 p.m. UTC | #8
On Mon, Mar 10, 2014 at 7:59 PM, Arnd Bergmann <arnd@arndb.de> wrote:
>> arch/avr32/include/asm/io.h:#define IO_SPACE_LIMIT    0xffffffff
>> arch/frv/include/asm/io.h:#define IO_SPACE_LIMIT      0xffffffff
>
> They have no MMU, and the code relies on the port number to match
> both the virtual and the physical address. You could be right about
> these, but I would guess that the code also needs some other
> changes if we want to make it work on nommu kernels. It also depends
> on whether the I/O bus address is the same as the CPU address, or
> if it starts at bus address 0.

>> arch/m68k/include/asm/io_no.h:#define IO_SPACE_LIMIT 0xffffffff
>
> This looks like a mistake, it should be smaller

io_no.h is for nommu.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann March 10, 2014, 7:28 p.m. UTC | #9
On Monday 10 March 2014 20:16:25 Geert Uytterhoeven wrote:
> On Mon, Mar 10, 2014 at 7:59 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> >> arch/avr32/include/asm/io.h:#define IO_SPACE_LIMIT    0xffffffff
> >> arch/frv/include/asm/io.h:#define IO_SPACE_LIMIT      0xffffffff
> >
> > They have no MMU, and the code relies on the port number to match
> > both the virtual and the physical address. You could be right about
> > these, but I would guess that the code also needs some other
> > changes if we want to make it work on nommu kernels. It also depends
> > on whether the I/O bus address is the same as the CPU address, or
> > if it starts at bus address 0.
> 
> >> arch/m68k/include/asm/io_no.h:#define IO_SPACE_LIMIT 0xffffffff
> >
> > This looks like a mistake, it should be smaller
> 
> io_no.h is for nommu.

Ah, I missed that. In that case I assume it doesn't matter because
the only m68k with PCI is M54xx and that always has an MMU.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau March 10, 2014, 9:56 p.m. UTC | #10
On Mon, Mar 10, 2014 at 07:59:59PM +0100, Arnd Bergmann wrote:
> On Monday 10 March 2014 16:33:44 Liviu Dudau wrote:
> > On Mon, Mar 10, 2014 at 03:21:01PM +0000, Arnd Bergmann wrote:
> > > On Monday 10 March 2014 14:44:14 Liviu Dudau wrote:
> > > > I will try to improve the error handling in the next patchset version.
> > > > However I am still confused about the earlier discussion on
> > > > pci_register_io_range(). Your suggestion initially was to return an
> > > > error in the default weak implementation, but in your last email you
> > > > are talking about returning 'port'.
> > > 
> > > You can do either one: 'port' should be positive or zero, while the
> > > error would always be negative. We do the same thing in many interfaces
> > > in the kernel.
> > > 
> > > > My idea when I've introduced the
> > > > helper function was that it would return an error if it fails to
> > > > register the IO range and zero otherwise. I agree that we can treat
> > > > the default 'do nothing with the IO range' case as an error, with
> > > > the caveat that will force architectures that use this code to
> > > > provide their own implementation of pci_register_io_range() in order
> > > > to avoid failure, even for the cases where the architecture has a 1:1
> > > > mapping between IO and CPU addresses.
> > > 
> > > Which architectures are you thinking of? The only one I know that
> > > does this is ia64, and we won't ever have to support this helper
> > > on that architecture.
> > 
> > I was thinking about architectures that have IO_SPACE_LIMIT >= 0xffffffff.
> > While not an absolute indicator, with the default pci_address_to_pio()
> > that means that they can use the CPU MMIO address as IO address directly.
> 
> Not really, that would only work if they also have instructions to do
> raw accesses to physical memory addresses rather than virtual memory
> pointers that most architectures do.
> 
> > $ git grep IO_SPACE_LIMIT | grep -i ffffffff
> > arch/arm/include/asm/io.h:#define IO_SPACE_LIMIT ((resource_size_t)0xffffffff)
> > arch/arm/mach-at91/include/mach/io.h:#define IO_SPACE_LIMIT		0xFFFFFFFF
> > arch/arm/mach-omap1/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
> > arch/arm/mach-pxa/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
> > arch/arm/mach-s3c24xx/include/mach/io.h:#define IO_SPACE_LIMIT 0xffffffff
> 
> These use a special trick where an __iomem pointer is the same as
> the port number. This works most of the time, but breaks anything
> that assumes that port numbers are low, such as /dev/port or
> broken devices. Moreover, it means your code won't work because
> it depends on passing the virtual start address of the PIO mapping
> window as io_offset.
> 
> > arch/avr32/include/asm/io.h:#define IO_SPACE_LIMIT	0xffffffff
> > arch/frv/include/asm/io.h:#define IO_SPACE_LIMIT	0xffffffff
> 
> They have no MMU, and the code relies on the port number to match
> both the virtual and the physical address. You could be right about
> these, but I would guess that the code also needs some other
> changes if we want to make it work on nommu kernels. It also depends
> on whether the I/O bus address is the same as the CPU address, or
> if it starts at bus address 0.
> 
> > arch/ia64/include/asm/io.h:#define IO_SPACE_LIMIT		0xffffffffffffffffUL
> 
> Here, the definition is special, the token is just used to encode
> a space number and an offset within the I/O space.
> 
> > arch/m32r/include/asm/io.h:#define IO_SPACE_LIMIT  0xFFFFFFFF
> 
> no PCI here.
> 
> > arch/m68k/include/asm/io_no.h:#define IO_SPACE_LIMIT 0xffffffff
> 
> This looks like a mistake, it should be smaller
> 
> > arch/microblaze/include/asm/io.h:#define IO_SPACE_LIMIT (0xFFFFFFFF)
> 
> I suspect it doesn't actually work. microblaze copied large parts
> of this from PowerPC, but the parts that differ apparently get
> it wrong for the I/O space. 
> 
> > arch/mn10300/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff
> 
> Same category as frv. We should ask David Howells whether he
> thinks I/O space actually works on these.
> 
> > arch/sh/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff
> 
> I think this should just be 0xffff.
> 
> > arch/sparc/include/asm/io_32.h:#define IO_SPACE_LIMIT 0xffffffff
> > arch/sparc/include/asm/io_64.h:#define IO_SPACE_LIMIT 0xffffffffffffffffUL
> 
> Sparc actually accesses the physical addresses, so in theory
> it could always work. In the 64-bit case it would however have
> to check that the port number is smaller than 0xffffffff, otherwise
> you couldn't set the BAR. This means you still need a custom
> function.
> 
> > arch/tile/include/asm/io.h:#define IO_SPACE_LIMIT 0xffffffff
> 
> tile seems to support only ioport_map() but not inb/outb, if I'm
> reading this right.
> 
> > > I did not ask to treat 'do nothing with the IO range' as an error,
> > > what I meant is that we should treat 'architecture cannot translate
> > > from I/O space to memory space but DT lists a translation anyway'
> > > as an error. On x86, you should never see an entry for the I/O space
> > > in "ranges", so we will not call this function unless there is a
> > > bug in DT.
> > 
> > Ok, it's just that there is no "architecture cannot translate from I/O
> > space to memory space" indicator anywhere and I don't want to make x86
> > a special case.
> 
> Right.
> 
> > So, my proposal is this: default weak implementation of pci_register_io_range()
> > returns an error (meaning I have no idea how to translate IO addresses
> > to memory space) and anyone that wants this function to return success will
> > have to provide their implementation.
> 
> Another idea: make this conditional on the definition of PCI_IOBASE: If this
> is defined, we can use the arm64 version that uses this number. Otherwise
> we fall back to returning an error, which means that either on the
> architecture we shouldn't be calling that function, or we need a custom
> implementation.

PCI_IOBASE is always defined. See the discussion with Russell on this subject.

include/asm-generic/io.h has at line 118:

#ifndef PCI_IOBASE
#define PCI_IOBASE ((void __iomem *) 0)
#endif

I will go with my idea tomorrow. arm64 overwrite the implementation anyway, I
find it cleaner rather than having to do #ifdefs and/or ifs.

Best regards,
Liviu

> 
> 	Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann March 11, 2014, 6:50 a.m. UTC | #11
On Monday 10 March 2014 21:56:00 Liviu Dudau wrote:
> 
> PCI_IOBASE is always defined. See the discussion with Russell on this subject.
> 
> include/asm-generic/io.h has at line 118:
> 
> #ifndef PCI_IOBASE
> #define PCI_IOBASE ((void __iomem *) 0)
> #endif

That is only defined for those that use asm-generic/pci.h, which most architectures
don't.
 
> I will go with my idea tomorrow. arm64 overwrite the implementation anyway, I
> find it cleaner rather than having to do #ifdefs and/or ifs.

I'd really hope we can get to a point where arm64 doesn't need any architecture
specific code for this. It doesn't do anything special.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
liviu.dudau@arm.com March 11, 2014, 9:46 a.m. UTC | #12
On Tue, Mar 11, 2014 at 06:50:24AM +0000, Arnd Bergmann wrote:
> On Monday 10 March 2014 21:56:00 Liviu Dudau wrote:
> > 
> > PCI_IOBASE is always defined. See the discussion with Russell on this subject.
> > 
> > include/asm-generic/io.h has at line 118:
> > 
> > #ifndef PCI_IOBASE
> > #define PCI_IOBASE ((void __iomem *) 0)
> > #endif
> 
> That is only defined for those that use asm-generic/pci.h, which most architectures
> don't.

I think it is defined for anyone that #includes <asm-generic/io.h>. There is no other
#ifdef around that.

>  
> > I will go with my idea tomorrow. arm64 overwrite the implementation anyway, I
> > find it cleaner rather than having to do #ifdefs and/or ifs.
> 
> I'd really hope we can get to a point where arm64 doesn't need any architecture
> specific code for this. It doesn't do anything special.

I agree.

Best regards,
Liviu

> 
> 	Arnd
> 
>
Arnd Bergmann March 11, 2014, 10:43 a.m. UTC | #13
On Tuesday 11 March 2014 09:46:41 Liviu Dudau wrote:
> On Tue, Mar 11, 2014 at 06:50:24AM +0000, Arnd Bergmann wrote:
> > On Monday 10 March 2014 21:56:00 Liviu Dudau wrote:
> > > 
> > > PCI_IOBASE is always defined. See the discussion with Russell on this subject.
> > > 
> > > include/asm-generic/io.h has at line 118:
> > > 
> > > #ifndef PCI_IOBASE
> > > #define PCI_IOBASE ((void __iomem *) 0)
> > > #endif
> > 
> > That is only defined for those that use asm-generic/pci.h, which most architectures
> > don't.
> 
> I think it is defined for anyone that #includes <asm-generic/io.h>. There is no other
> #ifdef around that.
> 

My mistake, I meant to write asm-generic/io.h.

On a related note, I would actually prefer to get rid of this PCI_IOBASE
default and move it into the architectures that really want it like this.
The default when PCI_IOBASE is not set IMHO should be to also not provide
inb/outb and ioport_map() helpers, but we need a little more infrastructure
to actually make the kernel build in all valid configuration when we remove them.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
index 8708b652..db9f51a 100644
--- a/drivers/pci/host-bridge.c
+++ b/drivers/pci/host-bridge.c
@@ -6,9 +6,14 @@ 
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/slab.h>
 
 #include "pci.h"
 
+static atomic_t domain_nr = ATOMIC_INIT(-1);
+
 static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
 {
 	while (bus->parent)
@@ -92,3 +97,154 @@  void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
 	res->end = region->end + offset;
 }
 EXPORT_SYMBOL(pcibios_bus_to_resource);
+
+#ifdef CONFIG_OF
+/**
+ * Simple version of the platform specific code for filtering the list
+ * of resources obtained from the ranges declaration in DT.
+ *
+ * Platforms can override this function in order to impose stronger
+ * constraints onto the list of resources that a host bridge can use.
+ * The filtered list will then be used to create a root bus and associate
+ * it with the host bridge.
+ *
+ */
+int __weak pcibios_fixup_bridge_ranges(struct list_head *resources)
+{
+	return 0;
+}
+
+/**
+ * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
+ * @dev: device node of the host bridge having the range property
+ * @resources: list where the range of resources will be added after DT parsing
+ * @io_base: pointer to a variable that will contain the physical address for
+ * the start of the I/O range.
+ *
+ * It is the callers job to free the @resources list if an error is returned.
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping based on its content. It is expected
+ * that the property conforms with the Power ePAPR document.
+ *
+ * Each architecture is then offered the chance of applying their own
+ * filtering of pci_host_bridge_windows based on their own restrictions by
+ * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
+ * can then be used when creating a pci_host_bridge structure.
+ */
+static int pci_host_bridge_of_get_ranges(struct device_node *dev,
+		struct list_head *resources, resource_size_t *io_base)
+{
+	struct resource *res;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	int err;
+
+	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
+
+	/* Check for ranges property */
+	err = of_pci_range_parser_init(&parser, dev);
+	if (err)
+		return err;
+
+	pr_debug("Parsing ranges property...\n");
+	for_each_of_pci_range(&parser, &range) {
+		/* Read next ranges element */
+		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
+				range.pci_space, range.pci_addr);
+		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
+					range.cpu_addr, range.size);
+
+		/*
+		 * If we failed translation or got a zero-sized region
+		 * then skip this range
+		 */
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+		if (!res)
+			return -ENOMEM;
+
+		of_pci_range_to_resource(&range, dev, res);
+
+		if (resource_type(res) == IORESOURCE_IO)
+			*io_base = range.cpu_addr;
+
+		pci_add_resource_offset(resources, res,
+				res->start - range.pci_addr);
+	}
+
+	/* Apply architecture specific fixups for the ranges */
+	return pcibios_fixup_bridge_ranges(resources);
+}
+
+/**
+ * of_create_pci_host_bridge - Create a PCI host bridge structure using
+ * information passed in the DT.
+ * @parent: device owning this host bridge
+ * @ops: pci_ops associated with the host controller
+ * @host_data: opaque data structure used by the host controller.
+ *
+ * returns a pointer to the newly created pci_host_bridge structure, or
+ * NULL if the call failed.
+ *
+ * This function will try to obtain the host bridge domain number by
+ * using of_alias_get_id() call with "pci-domain" as a stem. If that
+ * fails, a local allocator will be used that will put each host bridge
+ * in a new domain.
+ */
+struct pci_host_bridge *
+of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
+{
+	int err, domain, busno;
+	struct resource *bus_range;
+	struct pci_bus *root_bus;
+	struct pci_host_bridge *bridge;
+	resource_size_t io_base;
+	LIST_HEAD(res);
+
+	bus_range = kzalloc(sizeof(*bus_range), GFP_KERNEL);
+	if (!bus_range)
+		return ERR_PTR(-ENOMEM);
+
+	domain = of_alias_get_id(parent->of_node, "pci-domain");
+	if (domain == -ENODEV)
+		domain = atomic_inc_return(&domain_nr);
+
+	err = of_pci_parse_bus_range(parent->of_node, bus_range);
+	if (err) {
+		dev_info(parent, "No bus range for %s, using default [0-255]\n",
+			parent->of_node->full_name);
+		bus_range->start = 0;
+		bus_range->end = 255;
+		bus_range->flags = IORESOURCE_BUS;
+	}
+	busno = bus_range->start;
+	pci_add_resource(&res, bus_range);
+
+	/* now parse the rest of host bridge bus ranges */
+	err = pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base);
+	if (err)
+		goto err_create;
+
+	/* then create the root bus */
+	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
+						ops, host_data, &res);
+	if (IS_ERR(root_bus)) {
+		err = PTR_ERR(root_bus);
+		goto err_create;
+	}
+
+	bridge = to_pci_host_bridge(root_bus->bridge);
+	bridge->io_base = io_base;
+
+	return bridge;
+
+err_create:
+	pci_free_resource_list(&res);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
+
+#endif /* CONFIG_OF */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1eed009..40ddd3d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -395,6 +395,7 @@  struct pci_host_bridge {
 	struct device dev;
 	struct pci_bus *bus;		/* root bus */
 	int domain_nr;
+	resource_size_t io_base;	/* physical address for the start of I/O area */
 	struct list_head windows;	/* pci_host_bridge_windows */
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
@@ -1786,11 +1787,23 @@  static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
 	return bus ? bus->dev.of_node : NULL;
 }
 
+struct pci_host_bridge *
+of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
+			void *host_data);
+
+int pcibios_fixup_bridge_ranges(struct list_head *resources);
 #else /* CONFIG_OF */
 static inline void pci_set_of_node(struct pci_dev *dev) { }
 static inline void pci_release_of_node(struct pci_dev *dev) { }
 static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
 static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
+
+static inline struct pci_host_bridge *
+of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
+			void *host_data)
+{
+	return NULL;
+}
 #endif  /* CONFIG_OF */
 
 #ifdef CONFIG_EEH