diff mbox

[v2,4/4] pci: Add support for creating a generic host_bridge from device tree

Message ID 1393506402-11474-5-git-send-email-Liviu.Dudau@arm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Liviu Dudau Feb. 27, 2014, 1:06 p.m. UTC
Several platforms use a rather generic version of parsing
the device tree to find the host bridge ranges. Move the common code
into the generic PCI code and use it to create a pci_host_bridge
structure that can be used by arch code.

Based on early attempts by Andrew Murray to unify the code.
Used powerpc and microblaze PCI code as starting point.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>

Comments

Arnd Bergmann Feb. 27, 2014, 1:38 p.m. UTC | #1
On Thursday 27 February 2014 13:06:42 Liviu Dudau wrote:
> Several platforms use a rather generic version of parsing
> the device tree to find the host bridge ranges. Move the common code
> into the generic PCI code and use it to create a pci_host_bridge
> structure that can be used by arch code.
> 
> Based on early attempts by Andrew Murray to unify the code.
> Used powerpc and microblaze PCI code as starting point.
> 
> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>

Please add Benjamin Herrenschmidt to Cc here, I think it would be helpful
to get his input so we can make this work on powerpc as well.

> diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
> index 06ace62..feb8436 100644
> --- a/drivers/pci/host-bridge.c
> +++ b/drivers/pci/host-bridge.c
> @@ -6,9 +6,13 @@
>  #include <linux/init.h>
>  #include <linux/pci.h>
>  #include <linux/module.h>
> +#include <linux/of_address.h>
> +#include <linux/of_pci.h>
>  
>  #include "pci.h"
>  
> +static int domain_nr;
> +

For correctness, I think you want an 'atomic_t' here and use
atomic_inc_return() to get a new value.

>  static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
>  {
>  	while (bus->parent)
> @@ -91,3 +95,133 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
>  	res->end = region->end + offset;
>  }
>  EXPORT_SYMBOL(pcibios_bus_to_resource);
> +
> +/**
> + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> + * @dev: device node of the host bridge having the range property
> + * @resources: list where the range of resources will be added after DT parsing
> + * @io_base: pointer to a variable that will contain the physical address for
> + * the start of the I/O range.
> + *
> + * If this function returns an error then the @resources list will be freed.
> + *
> + * This function will parse the "ranges" property of a PCI host bridge device
> + * node and setup the resource mapping based on its content. It is expected
> + * that the property conforms with the Power ePAPR document.
> + *
> + * Each architecture is then offered the chance of applying their own
> + * filtering of pci_host_bridge_windows based on their own restrictions by
> + * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
> + * can then be used when creating a pci_host_bridge structure.
> + */
> +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> +		struct list_head *resources, resource_size_t *io_base)
> +{
> +	struct resource *res;
> +	struct of_pci_range range;
> +	struct of_pci_range_parser parser;
> +	int err;
> +
> +	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> +
> +	/* Check for ranges property */
> +	err = of_pci_range_parser_init(&parser, dev);
> +	if (err)
> +		return err;
> +
> +	pr_debug("Parsing ranges property...\n");
> +	for_each_of_pci_range(&parser, &range) {
> +		/* Read next ranges element */
> +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> +				range.pci_space, range.pci_addr);
> +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> +					range.cpu_addr, range.size);
> +
> +		/*
> +		 * If we failed translation or got a zero-sized region
> +		 * then skip this range
> +		 */
> +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> +			continue;
> +
> +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> +		if (!res) {
> +			err = -ENOMEM;
> +			goto bridge_ranges_nomem;
> +		}
> +
> +		of_pci_range_to_resource(&range, dev, res);
> +
> +		if (resource_type(res) == IORESOURCE_IO)
> +			*io_base = range.cpu_addr;
> +
> +		pci_add_resource_offset(resources, res,
> +				res->start - range.pci_addr);
> +	}

This is not the correct resource for I/O space at all. Please talk
to Will, I've been over this with him in detail and he probably
understands it now. I assume you are both working in the same
building.

Since this is common PCI code, you could also decide to open-code
the pci_add_resource_offset() function. If you don't do that, I
think you have a memory leak for the resources that you can avoid
by allocating the resource and pci_host_bridge_window structures
together with a single kzalloc.

> +	/* Apply architecture specific fixups for the ranges */
> +	pcibios_fixup_bridge_ranges(resources);
> +
> +	return 0;
> +
> +bridge_ranges_nomem:
> +	pci_free_resource_list(resources);
> +	return err;
> +}
> +
> +/**
> + * of_create_pci_host_bridge - Create a PCI host bridge structure using
> + * information passed in the DT.
> + * @parent: device owning this host bridge
> + * @ops: pci_ops associated with the host controller
> + * @host_data: opaque data structure used by the host controller.
> + *
> + * returns a pointer to the newly created pci_host_bridge structure, or
> + * NULL if the call failed.
> + *
> + * This function will try to obtain the host bridge domain number by
> + * using of_alias_get_id() call with "pci-domain" as a stem. If that
> + * fails, a local allocator will be used that will put each host bridge
> + * in a new domain.
> + */
> +struct pci_host_bridge *
> +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
> +{
> +	int err, domain, busno;
> +	struct resource bus_range;
> +	struct pci_bus *root_bus;
> +	struct pci_host_bridge *bridge;
> +	resource_size_t io_base;
> +	LIST_HEAD(res);
> +
> +	domain = of_alias_get_id(parent->of_node, "pci-domain");
> +	if (domain == -ENODEV)
> +		domain = domain_nr++;
> +
> +	err = of_pci_parse_bus_range(parent->of_node, &bus_range);
> +	if (err) {
> +		dev_info(parent, "No bus range for %s, using default [0-255]\n",
> +			parent->of_node->full_name);
> +		bus_range.start = 0;
> +		bus_range.end = 255;
> +		bus_range.flags = IORESOURCE_BUS;
> +	}
> +	busno = bus_range.start;
> +	pci_add_resource(&res, &bus_range);
> +
> +	/* now parse the rest of host bridge bus ranges */
> +	if (pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base))
> +		return NULL;
> +
> +	/* then create the root bus */
> +	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
> +						ops, host_data, &res);
> +	if (!root_bus)
> +		return NULL;

Do we have any code that checks for conflicting domain/bus numbers here?
I guess pci_create_root_bus_in_domain() will fail if you have that.

Since pci_create_root_bus_in_domain() is a new function that you just
introduced, it would be helpful to change the calling conventions
so it returns an error pointer instead of NULL upon failing.
of_create_pci_host_bridge() can do the same, but pci_create_root_bus()
should keep returning NULL so we don't have to change all the
callers.

> +	bridge = to_pci_host_bridge(root_bus->bridge);
> +	bridge->io_base = io_base;
> +
> +	return bridge;
> +}
> +EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 1eed009..0c5e269 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -395,6 +395,7 @@ struct pci_host_bridge {
>  	struct device dev;
>  	struct pci_bus *bus;		/* root bus */
>  	int domain_nr;
> +	resource_size_t io_base;	/* physical address for the start of I/O area */
>  	struct list_head windows;	/* pci_host_bridge_windows */
>  	void (*release_fn)(struct pci_host_bridge *);
>  	void *release_data;

What is the io_base used for here?

> @@ -1786,11 +1787,23 @@ static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
>  	return bus ? bus->dev.of_node : NULL;
>  }
>  
> +struct pci_host_bridge *
> +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> +			void *host_data);
> +
> +void pcibios_fixup_bridge_ranges(struct list_head *resources);
>  #else /* CONFIG_OF */
>  static inline void pci_set_of_node(struct pci_dev *dev) { }
>  static inline void pci_release_of_node(struct pci_dev *dev) { }
>  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
>  static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
> +
> +static inline struct pci_host_bridge *
> +pci_host_bridge_of_init(struct device *parent, struct pci_ops *ops,
> +			void *host_data)
> +{
> +	return NULL;
> +}
>  #endif  /* CONFIG_OF */
>  
>  #ifdef CONFIG_EEH
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Feb. 27, 2014, 1:48 p.m. UTC | #2
On Thursday 27 February 2014 14:38:32 Arnd Bergmann wrote:
> > +     pr_debug("Parsing ranges property...\n");
> > +     for_each_of_pci_range(&parser, &range) {
> > +             /* Read next ranges element */
> > +             pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > +                             range.pci_space, range.pci_addr);
> > +             pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > +                                     range.cpu_addr, range.size);
> > +
> > +             /*
> > +              * If we failed translation or got a zero-sized region
> > +              * then skip this range
> > +              */
> > +             if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > +                     continue;
> > +
> > +             res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > +             if (!res) {
> > +                     err = -ENOMEM;
> > +                     goto bridge_ranges_nomem;
> > +             }
> > +
> > +             of_pci_range_to_resource(&range, dev, res);
> > +
> > +             if (resource_type(res) == IORESOURCE_IO)
> > +                     *io_base = range.cpu_addr;
> > +
> > +             pci_add_resource_offset(resources, res,
> > +                             res->start - range.pci_addr);
> > +     }
> 
> This is not the correct resource for I/O space at all. Please talk
> to Will, I've been over this with him in detail and he probably
> understands it now. I assume you are both working in the same
> building.
> 

Sorry, I initially missed part of your changes in patch 1.
I think it's actually correct now.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 27, 2014, 2:13 p.m. UTC | #3
On Thu, Feb 27, 2014 at 01:38:32PM +0000, Arnd Bergmann wrote:
> On Thursday 27 February 2014 13:06:42 Liviu Dudau wrote:
> > Several platforms use a rather generic version of parsing
> > the device tree to find the host bridge ranges. Move the common code
> > into the generic PCI code and use it to create a pci_host_bridge
> > structure that can be used by arch code.
> > 
> > Based on early attempts by Andrew Murray to unify the code.
> > Used powerpc and microblaze PCI code as starting point.
> > 
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> 
> Please add Benjamin Herrenschmidt to Cc here, I think it would be helpful
> to get his input so we can make this work on powerpc as well.

Sorry to Benjamin for omitting him, I will add him to future postings (if he finds
the direction interesting ;) ).

> 
> > diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
> > index 06ace62..feb8436 100644
> > --- a/drivers/pci/host-bridge.c
> > +++ b/drivers/pci/host-bridge.c
> > @@ -6,9 +6,13 @@
> >  #include <linux/init.h>
> >  #include <linux/pci.h>
> >  #include <linux/module.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_pci.h>
> >  
> >  #include "pci.h"
> >  
> > +static int domain_nr;
> > +
> 
> For correctness, I think you want an 'atomic_t' here and use
> atomic_inc_return() to get a new value.

OK, will do.

> 
> >  static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
> >  {
> >  	while (bus->parent)
> > @@ -91,3 +95,133 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
> >  	res->end = region->end + offset;
> >  }
> >  EXPORT_SYMBOL(pcibios_bus_to_resource);
> > +
> > +/**
> > + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> > + * @dev: device node of the host bridge having the range property
> > + * @resources: list where the range of resources will be added after DT parsing
> > + * @io_base: pointer to a variable that will contain the physical address for
> > + * the start of the I/O range.
> > + *
> > + * If this function returns an error then the @resources list will be freed.
> > + *
> > + * This function will parse the "ranges" property of a PCI host bridge device
> > + * node and setup the resource mapping based on its content. It is expected
> > + * that the property conforms with the Power ePAPR document.
> > + *
> > + * Each architecture is then offered the chance of applying their own
> > + * filtering of pci_host_bridge_windows based on their own restrictions by
> > + * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
> > + * can then be used when creating a pci_host_bridge structure.
> > + */
> > +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> > +		struct list_head *resources, resource_size_t *io_base)
> > +{
> > +	struct resource *res;
> > +	struct of_pci_range range;
> > +	struct of_pci_range_parser parser;
> > +	int err;
> > +
> > +	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> > +
> > +	/* Check for ranges property */
> > +	err = of_pci_range_parser_init(&parser, dev);
> > +	if (err)
> > +		return err;
> > +
> > +	pr_debug("Parsing ranges property...\n");
> > +	for_each_of_pci_range(&parser, &range) {
> > +		/* Read next ranges element */
> > +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > +				range.pci_space, range.pci_addr);
> > +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > +					range.cpu_addr, range.size);
> > +
> > +		/*
> > +		 * If we failed translation or got a zero-sized region
> > +		 * then skip this range
> > +		 */
> > +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > +			continue;
> > +
> > +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > +		if (!res) {
> > +			err = -ENOMEM;
> > +			goto bridge_ranges_nomem;
> > +		}
> > +
> > +		of_pci_range_to_resource(&range, dev, res);
> > +
> > +		if (resource_type(res) == IORESOURCE_IO)
> > +			*io_base = range.cpu_addr;
> > +
> > +		pci_add_resource_offset(resources, res,
> > +				res->start - range.pci_addr);
> > +	}
> 
> This is not the correct resource for I/O space at all. Please talk
> to Will, I've been over this with him in detail and he probably
> understands it now. I assume you are both working in the same
> building.

As you noticed later, it is now correct because res->start will
have the start logical I/O port. I'm talking with Will on regular basis
and I think we are both on the same sheet.

> 
> Since this is common PCI code, you could also decide to open-code
> the pci_add_resource_offset() function. If you don't do that, I
> think you have a memory leak for the resources that you can avoid
> by allocating the resource and pci_host_bridge_window structures
> together with a single kzalloc.

This is a list of temporary resources that hasn't been yet added
to pci_host_bridge_window. As noted in the comment for the function, if
the call fails it will clear the resources list.

> 
> > +	/* Apply architecture specific fixups for the ranges */
> > +	pcibios_fixup_bridge_ranges(resources);
> > +
> > +	return 0;
> > +
> > +bridge_ranges_nomem:
> > +	pci_free_resource_list(resources);
> > +	return err;
> > +}
> > +
> > +/**
> > + * of_create_pci_host_bridge - Create a PCI host bridge structure using
> > + * information passed in the DT.
> > + * @parent: device owning this host bridge
> > + * @ops: pci_ops associated with the host controller
> > + * @host_data: opaque data structure used by the host controller.
> > + *
> > + * returns a pointer to the newly created pci_host_bridge structure, or
> > + * NULL if the call failed.
> > + *
> > + * This function will try to obtain the host bridge domain number by
> > + * using of_alias_get_id() call with "pci-domain" as a stem. If that
> > + * fails, a local allocator will be used that will put each host bridge
> > + * in a new domain.
> > + */
> > +struct pci_host_bridge *
> > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
> > +{
> > +	int err, domain, busno;
> > +	struct resource bus_range;
> > +	struct pci_bus *root_bus;
> > +	struct pci_host_bridge *bridge;
> > +	resource_size_t io_base;
> > +	LIST_HEAD(res);
> > +
> > +	domain = of_alias_get_id(parent->of_node, "pci-domain");
> > +	if (domain == -ENODEV)
> > +		domain = domain_nr++;
> > +
> > +	err = of_pci_parse_bus_range(parent->of_node, &bus_range);
> > +	if (err) {
> > +		dev_info(parent, "No bus range for %s, using default [0-255]\n",
> > +			parent->of_node->full_name);
> > +		bus_range.start = 0;
> > +		bus_range.end = 255;
> > +		bus_range.flags = IORESOURCE_BUS;
> > +	}
> > +	busno = bus_range.start;
> > +	pci_add_resource(&res, &bus_range);
> > +
> > +	/* now parse the rest of host bridge bus ranges */
> > +	if (pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base))
> > +		return NULL;
> > +
> > +	/* then create the root bus */
> > +	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
> > +						ops, host_data, &res);
> > +	if (!root_bus)
> > +		return NULL;
> 
> Do we have any code that checks for conflicting domain/bus numbers here?
> I guess pci_create_root_bus_in_domain() will fail if you have that.

It does fail.

> 
> Since pci_create_root_bus_in_domain() is a new function that you just
> introduced, it would be helpful to change the calling conventions
> so it returns an error pointer instead of NULL upon failing.
> of_create_pci_host_bridge() can do the same, but pci_create_root_bus()
> should keep returning NULL so we don't have to change all the
> callers.

I need to look if any useful information is being captured in the function.
Please note that pci_create_root_bus_in_domain() is actually the old
pci_create_root_bus() function with an added parameter.

> 
> > +	bridge = to_pci_host_bridge(root_bus->bridge);
> > +	bridge->io_base = io_base;

To answer your later question, io_base is remembered here.

> > +
> > +	return bridge;
> > +}
> > +EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 1eed009..0c5e269 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -395,6 +395,7 @@ struct pci_host_bridge {
> >  	struct device dev;
> >  	struct pci_bus *bus;		/* root bus */
> >  	int domain_nr;
> > +	resource_size_t io_base;	/* physical address for the start of I/O area */
> >  	struct list_head windows;	/* pci_host_bridge_windows */
> >  	void (*release_fn)(struct pci_host_bridge *);
> >  	void *release_data;
> 
> What is the io_base used for here?

It is useful for host bridge drivers as this is the only place where we store
the physical CPU address for the IO range. This is then needed when setting up the
translation registers. Also used when calling the pci_ioremap_io function that I'm
introducing in the AArch64 patches.

Whole patch is still under legal review, but the fragment for setting up the ATR looks
like this:

	list_for_each_entry(window, &bridge->windows, list) {
		res = window->res;
		offset = window->offset;
		wsize = ilog2(resource_size(res)) - 1;

		if (resource_type(res) == IORESOURCE_MEM)
			update_atr_entry(pp->base + ATR_REG_whatever,
				res->start, 	                        /* CPU address */
				res->start - offset,                    /* PCI address */
				0, wsize);
		else if (resource_type(res) == IORESOURCE_IO) {
			io_offset = pci_ioremap_io(res, bridge->io_base + offset);
			update_atr_entry(pp->base + ATR_REG_whatever,
				bridge->io_base + res->start + offset,  /* CPU address */
				res->start,                             /* PCI address */
				0x20000, wsize);
		}
	}


Best regards,
Liviu


> 
> > @@ -1786,11 +1787,23 @@ static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
> >  	return bus ? bus->dev.of_node : NULL;
> >  }
> >  
> > +struct pci_host_bridge *
> > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> > +			void *host_data);
> > +
> > +void pcibios_fixup_bridge_ranges(struct list_head *resources);
> >  #else /* CONFIG_OF */
> >  static inline void pci_set_of_node(struct pci_dev *dev) { }
> >  static inline void pci_release_of_node(struct pci_dev *dev) { }
> >  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
> >  static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
> > +
> > +static inline struct pci_host_bridge *
> > +pci_host_bridge_of_init(struct device *parent, struct pci_ops *ops,
> > +			void *host_data)
> > +{
> > +	return NULL;
> > +}
> >  #endif  /* CONFIG_OF */
> >  
> >  #ifdef CONFIG_EEH
> > 
> 
> 
>
Arnd Bergmann Feb. 27, 2014, 3:58 p.m. UTC | #4
On Thursday 27 February 2014 14:13:22 Liviu Dudau wrote:
> 
> It is useful for host bridge drivers as this is the only place where we store
> the physical CPU address for the IO range. This is then needed when setting up the
> translation registers. Also used when calling the pci_ioremap_io function that I'm
> introducing in the AArch64 patches.

I don't understand what translation windows you are talking about. Is this
about how the PCI spaces are mapped into the CPU address space? If so, I
would strongly recommend to have this handled by the boot loader before
calling into the kernel. For ARM32, we have a lot of embedded systems
that require the PCI host driver to set up those windows, but actually
it would be much better to just have the firmware tell us what the setup
is and that use that.

> Whole patch is still under legal review, but the fragment for setting up the ATR looks
> like this:
> 
>         list_for_each_entry(window, &bridge->windows, list) {
>                 res = window->res;
>                 offset = window->offset;
>                 wsize = ilog2(resource_size(res)) - 1;
> 
>                 if (resource_type(res) == IORESOURCE_MEM)
>                         update_atr_entry(pp->base + ATR_REG_whatever,
>                                 res->start,                             /* CPU address */
>                                 res->start - offset,                    /* PCI address */
>                                 0, wsize);
>                 else if (resource_type(res) == IORESOURCE_IO) {
>                         io_offset = pci_ioremap_io(res, bridge->io_base + offset);
>                         update_atr_entry(pp->base + ATR_REG_whatever,
>                                 bridge->io_base + res->start + offset,  /* CPU address */
>                                 res->start,                             /* PCI address */
>                                 0x20000, wsize);
>                 }
>         }

Hmm, I again don't see how 'bridge->io_base + res->start + offset' is
the correct address here. What is it you are trying to pass into
update_atr_entry()?

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 27, 2014, 4:20 p.m. UTC | #5
On Thu, Feb 27, 2014 at 03:58:41PM +0000, Arnd Bergmann wrote:
> On Thursday 27 February 2014 14:13:22 Liviu Dudau wrote:
> > 
> > It is useful for host bridge drivers as this is the only place where we store
> > the physical CPU address for the IO range. This is then needed when setting up the
> > translation registers. Also used when calling the pci_ioremap_io function that I'm
> > introducing in the AArch64 patches.
> 
> I don't understand what translation windows you are talking about. Is this
> about how the PCI spaces are mapped into the CPU address space? If so, I
> would strongly recommend to have this handled by the boot loader before
> calling into the kernel. For ARM32, we have a lot of embedded systems
> that require the PCI host driver to set up those windows, but actually
> it would be much better to just have the firmware tell us what the setup
> is and that use that.

The AXI to PCI bridge that I'm using has a set of registers for doing address translation.
When it sees an AXI translation that matches the programmed translation window will
convert it into a PCI write using the PCI address base written in that translation window.
In other words you basically program the DT range into those address translation registers
and the bridge does the AXI to PCI conversion for you.

> 
> > Whole patch is still under legal review, but the fragment for setting up the ATR looks
> > like this:
> > 
> >         list_for_each_entry(window, &bridge->windows, list) {
> >                 res = window->res;
> >                 offset = window->offset;
> >                 wsize = ilog2(resource_size(res)) - 1;
> > 
> >                 if (resource_type(res) == IORESOURCE_MEM)
> >                         update_atr_entry(pp->base + ATR_REG_whatever,
> >                                 res->start,                             /* CPU address */
> >                                 res->start - offset,                    /* PCI address */
> >                                 0, wsize);
> >                 else if (resource_type(res) == IORESOURCE_IO) {
> >                         io_offset = pci_ioremap_io(res, bridge->io_base + offset);
> >                         update_atr_entry(pp->base + ATR_REG_whatever,
> >                                 bridge->io_base + res->start + offset,  /* CPU address */
> >                                 res->start,                             /* PCI address */
> >                                 0x20000, wsize);
> >                 }
> >         }
> 
> Hmm, I again don't see how 'bridge->io_base + res->start + offset' is
> the correct address here. What is it you are trying to pass into
> update_atr_entry()?

The physical CPU address where the IO range starts. 

For IO:
  res->start = beginning of logical I/O port block
  offset = window->offset = res->start - range.pci_addr
  bridge->io_base = range.cpu_addr

  bridge->io_base + res->start + offset = range.cpu_addr + res->start + res->start - range.pci_addr

Hmm, writting code without coffee .... :) 

Lucky for me res->start = range.pci_addr = 0, but I can see now where I've got it wrong. Thanks!

Liviu

> 
> 	Arnd
> 
>
Arnd Bergmann Feb. 27, 2014, 4:45 p.m. UTC | #6
On Thursday 27 February 2014 16:20:03 Liviu Dudau wrote:
> On Thu, Feb 27, 2014 at 03:58:41PM +0000, Arnd Bergmann wrote:
> > On Thursday 27 February 2014 14:13:22 Liviu Dudau wrote:
> > > 
> > > It is useful for host bridge drivers as this is the only place where we store
> > > the physical CPU address for the IO range. This is then needed when setting up the
> > > translation registers. Also used when calling the pci_ioremap_io function that I'm
> > > introducing in the AArch64 patches.
> > 
> > I don't understand what translation windows you are talking about. Is this
> > about how the PCI spaces are mapped into the CPU address space? If so, I
> > would strongly recommend to have this handled by the boot loader before
> > calling into the kernel. For ARM32, we have a lot of embedded systems
> > that require the PCI host driver to set up those windows, but actually
> > it would be much better to just have the firmware tell us what the setup
> > is and that use that.
> 
> The AXI to PCI bridge that I'm using has a set of registers for doing address translation.
> When it sees an AXI translation that matches the programmed translation window will
> convert it into a PCI write using the PCI address base written in that translation window.
> In other words you basically program the DT range into those address translation registers
> and the bridge does the AXI to PCI conversion for you.

Right. That should definitely be done in the boot loader
before Linux is started.


	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 27, 2014, 4:54 p.m. UTC | #7
On Thu, Feb 27, 2014 at 04:45:24PM +0000, Arnd Bergmann wrote:
> On Thursday 27 February 2014 16:20:03 Liviu Dudau wrote:
> > On Thu, Feb 27, 2014 at 03:58:41PM +0000, Arnd Bergmann wrote:
> > > On Thursday 27 February 2014 14:13:22 Liviu Dudau wrote:
> > > > 
> > > > It is useful for host bridge drivers as this is the only place where we store
> > > > the physical CPU address for the IO range. This is then needed when setting up the
> > > > translation registers. Also used when calling the pci_ioremap_io function that I'm
> > > > introducing in the AArch64 patches.
> > > 
> > > I don't understand what translation windows you are talking about. Is this
> > > about how the PCI spaces are mapped into the CPU address space? If so, I
> > > would strongly recommend to have this handled by the boot loader before
> > > calling into the kernel. For ARM32, we have a lot of embedded systems
> > > that require the PCI host driver to set up those windows, but actually
> > > it would be much better to just have the firmware tell us what the setup
> > > is and that use that.
> > 
> > The AXI to PCI bridge that I'm using has a set of registers for doing address translation.
> > When it sees an AXI translation that matches the programmed translation window will
> > convert it into a PCI write using the PCI address base written in that translation window.
> > In other words you basically program the DT range into those address translation registers
> > and the bridge does the AXI to PCI conversion for you.
> 
> Right. That should definitely be done in the boot loader
> before Linux is started.

There are examples of other host controllers doing similar things. pci-tegra.c does it in
tegra_pcie_setup_translations(), pcie-designware.c has dw_pcie_prog_viewport_*() functions.

I'm afraid for the moment the bootloader on my platform is not going to be of too much use on
this, so it will have to be initially in Linux.

Best regards,
Liviu

> 
> 
> 	Arnd
> 
>
Arnd Bergmann Feb. 27, 2014, 6:42 p.m. UTC | #8
On Thursday 27 February 2014, Liviu Dudau wrote:
> There are examples of other host controllers doing similar things. pci-tegra.c does it in
> tegra_pcie_setup_translations(), pcie-designware.c has dw_pcie_prog_viewport_*() functions.
> 
> I'm afraid for the moment the bootloader on my platform is not going to be of too much use on
> this, so it will have to be initially in Linux.

Ah, too bad. I know that we always do it wrong on arm32, I was hoping for a fresh
start on arm64 to fix this particular issue.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Feb. 27, 2014, 11:32 p.m. UTC | #9
On Thu, 2014-02-27 at 14:38 +0100, Arnd Bergmann wrote:
> On Thursday 27 February 2014 13:06:42 Liviu Dudau wrote:
> > Several platforms use a rather generic version of parsing
> > the device tree to find the host bridge ranges. Move the common code
> > into the generic PCI code and use it to create a pci_host_bridge
> > structure that can be used by arch code.
> > 
> > Based on early attempts by Andrew Murray to unify the code.
> > Used powerpc and microblaze PCI code as starting point.

So that is only going to work for archs that don't have any special
twist. For example it won't work for powerpc which is why Andrew
original approach didn't fly.

The range walk helpers do help though, I need to review in more details
and test Andrew powerpc patch here and will merge it.

> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> 
> Please add Benjamin Herrenschmidt to Cc here, I think it would be helpful
> to get his input so we can make this work on powerpc as well.

Tricky... We would need hooks which would turn the whole thing into a
pile of spaghetti. I think we should stick to using the range helpers
(Andrew latest patch), which makes the powerpc code a lot smaller,
and leave it at that.

> > diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
> > index 06ace62..feb8436 100644
> > --- a/drivers/pci/host-bridge.c
> > +++ b/drivers/pci/host-bridge.c
> > @@ -6,9 +6,13 @@
> >  #include <linux/init.h>
> >  #include <linux/pci.h>
> >  #include <linux/module.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_pci.h>
> >  
> >  #include "pci.h"
> >  
> > +static int domain_nr;
> > +
> 
> For correctness, I think you want an 'atomic_t' here and use
> atomic_inc_return() to get a new value.
> 
> >  static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
> >  {
> >  	while (bus->parent)
> > @@ -91,3 +95,133 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
> >  	res->end = region->end + offset;
> >  }
> >  EXPORT_SYMBOL(pcibios_bus_to_resource);
> > +
> > +/**
> > + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> > + * @dev: device node of the host bridge having the range property
> > + * @resources: list where the range of resources will be added after DT parsing
> > + * @io_base: pointer to a variable that will contain the physical address for
> > + * the start of the I/O range.
> > + *
> > + * If this function returns an error then the @resources list will be freed.
> > + *
> > + * This function will parse the "ranges" property of a PCI host bridge device
> > + * node and setup the resource mapping based on its content. It is expected
> > + * that the property conforms with the Power ePAPR document.
> > + *
> > + * Each architecture is then offered the chance of applying their own
> > + * filtering of pci_host_bridge_windows based on their own restrictions by
> > + * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
> > + * can then be used when creating a pci_host_bridge structure.
> > + */
> > +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> > +		struct list_head *resources, resource_size_t *io_base)
> > +{
> > +	struct resource *res;
> > +	struct of_pci_range range;
> > +	struct of_pci_range_parser parser;
> > +	int err;
> > +
> > +	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> > +
> > +	/* Check for ranges property */
> > +	err = of_pci_range_parser_init(&parser, dev);
> > +	if (err)
> > +		return err;
> > +
> > +	pr_debug("Parsing ranges property...\n");
> > +	for_each_of_pci_range(&parser, &range) {
> > +		/* Read next ranges element */
> > +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > +				range.pci_space, range.pci_addr);
> > +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > +					range.cpu_addr, range.size);
> > +
> > +		/*
> > +		 * If we failed translation or got a zero-sized region
> > +		 * then skip this range
> > +		 */
> > +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > +			continue;

Shouldn't that test move into the parsing helper ?

> > +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > +		if (!res) {
> > +			err = -ENOMEM;
> > +			goto bridge_ranges_nomem;
> > +		}
> > +
> > +		of_pci_range_to_resource(&range, dev, res);
> > +
> > +		if (resource_type(res) == IORESOURCE_IO)
> > +			*io_base = range.cpu_addr;

You don't care about the size of the IO space ?

> > +		pci_add_resource_offset(resources, res,
> > +				res->start - range.pci_addr);
> > +	}
> 
> This is not the correct resource for I/O space at all. Please talk
> to Will, I've been over this with him in detail and he probably
> understands it now. I assume you are both working in the same
> building.

Yes, the IO offsets work differently on powerpc as well

> Since this is common PCI code, you could also decide to open-code
> the pci_add_resource_offset() function. If you don't do that, I
> think you have a memory leak for the resources that you can avoid
> by allocating the resource and pci_host_bridge_window structures
> together with a single kzalloc.
> 
> > +	/* Apply architecture specific fixups for the ranges */
> > +	pcibios_fixup_bridge_ranges(resources);
> > +
> > +	return 0;
> > +
> > +bridge_ranges_nomem:
> > +	pci_free_resource_list(resources);
> > +	return err;
> > +}
> > +
> > +/**
> > + * of_create_pci_host_bridge - Create a PCI host bridge structure using
> > + * information passed in the DT.
> > + * @parent: device owning this host bridge
> > + * @ops: pci_ops associated with the host controller
> > + * @host_data: opaque data structure used by the host controller.
> > + *
> > + * returns a pointer to the newly created pci_host_bridge structure, or
> > + * NULL if the call failed.
> > + *
> > + * This function will try to obtain the host bridge domain number by
> > + * using of_alias_get_id() call with "pci-domain" as a stem. If that
> > + * fails, a local allocator will be used that will put each host bridge
> > + * in a new domain.
> > + */
> > +struct pci_host_bridge *
> > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
> > +{
> > +	int err, domain, busno;
> > +	struct resource bus_range;
> > +	struct pci_bus *root_bus;
> > +	struct pci_host_bridge *bridge;
> > +	resource_size_t io_base;
> > +	LIST_HEAD(res);
> > +
> > +	domain = of_alias_get_id(parent->of_node, "pci-domain");
> > +	if (domain == -ENODEV)
> > +		domain = domain_nr++;
> >
We probably want some uniqueness testing here.

> > +	err = of_pci_parse_bus_range(parent->of_node, &bus_range);
> > +	if (err) {
> > +		dev_info(parent, "No bus range for %s, using default [0-255]\n",
> > +			parent->of_node->full_name);
> > +		bus_range.start = 0;
> > +		bus_range.end = 255;
> > +		bus_range.flags = IORESOURCE_BUS;
> > +	}
> > +	busno = bus_range.start;
> > +	pci_add_resource(&res, &bus_range);
> > +
> > +	/* now parse the rest of host bridge bus ranges */
> > +	if (pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base))
> > +		return NULL;
> > +
> > +	/* then create the root bus */
> > +	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
> > +						ops, host_data, &res);
> > +	if (!root_bus)
> > +		return NULL;
> 
> Do we have any code that checks for conflicting domain/bus numbers here?
> I guess pci_create_root_bus_in_domain() will fail if you have that.
> 
> Since pci_create_root_bus_in_domain() is a new function that you just
> introduced, it would be helpful to change the calling conventions
> so it returns an error pointer instead of NULL upon failing.
> of_create_pci_host_bridge() can do the same, but pci_create_root_bus()
> should keep returning NULL so we don't have to change all the
> callers.
> 
> > +	bridge = to_pci_host_bridge(root_bus->bridge);
> > +	bridge->io_base = io_base;
> > +
> > +	return bridge;
> > +}
> > +EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 1eed009..0c5e269 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -395,6 +395,7 @@ struct pci_host_bridge {
> >  	struct device dev;
> >  	struct pci_bus *bus;		/* root bus */
> >  	int domain_nr;
> > +	resource_size_t io_base;	/* physical address for the start of I/O area */
> >  	struct list_head windows;	/* pci_host_bridge_windows */
> >  	void (*release_fn)(struct pci_host_bridge *);
> >  	void *release_data;
> 
> What is the io_base used for here?
> 
> > @@ -1786,11 +1787,23 @@ static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
> >  	return bus ? bus->dev.of_node : NULL;
> >  }
> >  
> > +struct pci_host_bridge *
> > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> > +			void *host_data);
> > +
> > +void pcibios_fixup_bridge_ranges(struct list_head *resources);
> >  #else /* CONFIG_OF */
> >  static inline void pci_set_of_node(struct pci_dev *dev) { }
> >  static inline void pci_release_of_node(struct pci_dev *dev) { }
> >  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
> >  static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
> > +
> > +static inline struct pci_host_bridge *
> > +pci_host_bridge_of_init(struct device *parent, struct pci_ops *ops,
> > +			void *host_data)
> > +{
> > +	return NULL;
> > +}
> >  #endif  /* CONFIG_OF */
> >  
> >  #ifdef CONFIG_EEH
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Feb. 28, 2014, 8:46 a.m. UTC | #10
On Friday 28 February 2014 10:32:04 Benjamin Herrenschmidt wrote:
> On Thu, 2014-02-27 at 14:38 +0100, Arnd Bergmann wrote:
> > On Thursday 27 February 2014 13:06:42 Liviu Dudau wrote:
> > > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > 
> > Please add Benjamin Herrenschmidt to Cc here, I think it would be helpful
> > to get his input so we can make this work on powerpc as well.
> 
> Tricky... We would need hooks which would turn the whole thing into a
> pile of spaghetti. I think we should stick to using the range helpers
> (Andrew latest patch), which makes the powerpc code a lot smaller,
> and leave it at that.

We can certainly do both: small helpers that let you shrink the powerpc
code, and a generic implementation that can be shared by some of the
other architectures that you don't use. The PCI core already uses
a number of 'weak' functions here, and we can expand on that.

 > > +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > > +		if (!res) {
> > > +			err = -ENOMEM;
> > > +			goto bridge_ranges_nomem;
> > > +		}
> > > +
> > > +		of_pci_range_to_resource(&range, dev, res);
> > > +
> > > +		if (resource_type(res) == IORESOURCE_IO)
> > > +			*io_base = range.cpu_addr;
> 
> You don't care about the size of the IO space ?

We probably should. The ARM code currently assumes that each I/O
space is 64KB, but for a generic implementation we probably want
to handle both smaller and larger windows. I suggested not supporting
more than 1MB though, which is the maximum that I can see a reason
for, i.e. the pci-mvebu fake host bridge that has to combine
multiple per-port HW I/O spaces into one logical space.

> > > +		pci_add_resource_offset(resources, res,
> > > +				res->start - range.pci_addr);
> > > +	}
> > 
> > This is not the correct resource for I/O space at all. Please talk
> > to Will, I've been over this with him in detail and he probably
> > understands it now. I assume you are both working in the same
> > building.
> 
> Yes, the IO offsets work differently on powerpc as well

As I noticed later, the first patch in the series actually changes
the range_to_resource parser to return the logical start here, which
would make the offset correct even on powerpc, but unfortunately
I think that cannot work.

> > > +struct pci_host_bridge *
> > > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
> > > +{
> > > +	int err, domain, busno;
> > > +	struct resource bus_range;
> > > +	struct pci_bus *root_bus;
> > > +	struct pci_host_bridge *bridge;
> > > +	resource_size_t io_base;
> > > +	LIST_HEAD(res);
> > > +
> > > +	domain = of_alias_get_id(parent->of_node, "pci-domain");
> > > +	if (domain == -ENODEV)
> > > +		domain = domain_nr++;
> > >
> We probably want some uniqueness testing here.

I thought this at first too, but as Liviu mentioned, this does
get caught later when trying to create the root bus with
a conflicting number.

What the above code cannot do is to put multiple host bridges
into the same domain, using distinct bus ranges. This is an
intentional simplification over what some architectures currently
do, and we could not see a reason why you would still need
to put multiple host bridges into one domain in 2014.

x86 with ACPI does it, but they won't call of_create_pci_host_bridge.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 28, 2014, 9:55 a.m. UTC | #11
On Thu, Feb 27, 2014 at 11:32:04PM +0000, Benjamin Herrenschmidt wrote:
> On Thu, 2014-02-27 at 14:38 +0100, Arnd Bergmann wrote:
> > On Thursday 27 February 2014 13:06:42 Liviu Dudau wrote:
> > > Several platforms use a rather generic version of parsing
> > > the device tree to find the host bridge ranges. Move the common code
> > > into the generic PCI code and use it to create a pci_host_bridge
> > > structure that can be used by arch code.
> > > 
> > > Based on early attempts by Andrew Murray to unify the code.
> > > Used powerpc and microblaze PCI code as starting point.
> 
> So that is only going to work for archs that don't have any special
> twist. For example it won't work for powerpc which is why Andrew
> original approach didn't fly.
> 
> The range walk helpers do help though, I need to review in more details
> and test Andrew powerpc patch here and will merge it.
> 
> > > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > 
> > Please add Benjamin Herrenschmidt to Cc here, I think it would be helpful
> > to get his input so we can make this work on powerpc as well.
> 
> Tricky... We would need hooks which would turn the whole thing into a
> pile of spaghetti. I think we should stick to using the range helpers
> (Andrew latest patch), which makes the powerpc code a lot smaller,
> and leave it at that.

Benjamin,

Thanks for looking at the patches.

I did look at powerpc and microblaze. I've actually started modifying microblaze
as it seems to have fewer corner cases and that's how I came up with the current
design. Unfortunately my QEMU environment crashes when trying to feed it a custom
dtb file that has a pci host bridge node.

I know that in the range parsing powerpc does a lot more work and that is why
I've split my code into the generic part and the arch specific callback that can
do validation of the ranges and also a bit of housekeeping.

The next phase will be to see how much of the pci_controller structure can move
into the generic code and reduce its size. I do suffer from not having a PowerPC
platform where I can test the changes, but I can produce some compiled code that
someone can test.

> 
> > > diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
> > > index 06ace62..feb8436 100644
> > > --- a/drivers/pci/host-bridge.c
> > > +++ b/drivers/pci/host-bridge.c
> > > @@ -6,9 +6,13 @@
> > >  #include <linux/init.h>
> > >  #include <linux/pci.h>
> > >  #include <linux/module.h>
> > > +#include <linux/of_address.h>
> > > +#include <linux/of_pci.h>
> > >  
> > >  #include "pci.h"
> > >  
> > > +static int domain_nr;
> > > +
> > 
> > For correctness, I think you want an 'atomic_t' here and use
> > atomic_inc_return() to get a new value.
> > 
> > >  static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
> > >  {
> > >  	while (bus->parent)
> > > @@ -91,3 +95,133 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
> > >  	res->end = region->end + offset;
> > >  }
> > >  EXPORT_SYMBOL(pcibios_bus_to_resource);
> > > +
> > > +/**
> > > + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> > > + * @dev: device node of the host bridge having the range property
> > > + * @resources: list where the range of resources will be added after DT parsing
> > > + * @io_base: pointer to a variable that will contain the physical address for
> > > + * the start of the I/O range.
> > > + *
> > > + * If this function returns an error then the @resources list will be freed.
> > > + *
> > > + * This function will parse the "ranges" property of a PCI host bridge device
> > > + * node and setup the resource mapping based on its content. It is expected
> > > + * that the property conforms with the Power ePAPR document.
> > > + *
> > > + * Each architecture is then offered the chance of applying their own
> > > + * filtering of pci_host_bridge_windows based on their own restrictions by
> > > + * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
> > > + * can then be used when creating a pci_host_bridge structure.
> > > + */
> > > +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> > > +		struct list_head *resources, resource_size_t *io_base)
> > > +{
> > > +	struct resource *res;
> > > +	struct of_pci_range range;
> > > +	struct of_pci_range_parser parser;
> > > +	int err;
> > > +
> > > +	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> > > +
> > > +	/* Check for ranges property */
> > > +	err = of_pci_range_parser_init(&parser, dev);
> > > +	if (err)
> > > +		return err;
> > > +
> > > +	pr_debug("Parsing ranges property...\n");
> > > +	for_each_of_pci_range(&parser, &range) {
> > > +		/* Read next ranges element */
> > > +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > > +				range.pci_space, range.pci_addr);
> > > +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > > +					range.cpu_addr, range.size);
> > > +
> > > +		/*
> > > +		 * If we failed translation or got a zero-sized region
> > > +		 * then skip this range
> > > +		 */
> > > +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > > +			continue;
> 
> Shouldn't that test move into the parsing helper ?

Good question. Should we then automatically skip the bad translated
ranges?

> 
> > > +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > > +		if (!res) {
> > > +			err = -ENOMEM;
> > > +			goto bridge_ranges_nomem;
> > > +		}
> > > +
> > > +		of_pci_range_to_resource(&range, dev, res);
> > > +
> > > +		if (resource_type(res) == IORESOURCE_IO)
> > > +			*io_base = range.cpu_addr;
> 
> You don't care about the size of the IO space ?

Not here. I will leave it to the platform specific hook to do the validation.

> 
> > > +		pci_add_resource_offset(resources, res,
> > > +				res->start - range.pci_addr);
> > > +	}
> > 
> > This is not the correct resource for I/O space at all. Please talk
> > to Will, I've been over this with him in detail and he probably
> > understands it now. I assume you are both working in the same
> > building.
> 
> Yes, the IO offsets work differently on powerpc as well

Can you check for powerpc in light of the changes I've made in the
of_pci_range_to_resource() function? (and please read range.cpu_addr instead
of range.pci_addr in the pci_address_to_pio(). I'll post v3 shortly.) 

> 
> > Since this is common PCI code, you could also decide to open-code
> > the pci_add_resource_offset() function. If you don't do that, I
> > think you have a memory leak for the resources that you can avoid
> > by allocating the resource and pci_host_bridge_window structures
> > together with a single kzalloc.
> > 
> > > +	/* Apply architecture specific fixups for the ranges */
> > > +	pcibios_fixup_bridge_ranges(resources);
> > > +
> > > +	return 0;
> > > +
> > > +bridge_ranges_nomem:
> > > +	pci_free_resource_list(resources);
> > > +	return err;
> > > +}
> > > +
> > > +/**
> > > + * of_create_pci_host_bridge - Create a PCI host bridge structure using
> > > + * information passed in the DT.
> > > + * @parent: device owning this host bridge
> > > + * @ops: pci_ops associated with the host controller
> > > + * @host_data: opaque data structure used by the host controller.
> > > + *
> > > + * returns a pointer to the newly created pci_host_bridge structure, or
> > > + * NULL if the call failed.
> > > + *
> > > + * This function will try to obtain the host bridge domain number by
> > > + * using of_alias_get_id() call with "pci-domain" as a stem. If that
> > > + * fails, a local allocator will be used that will put each host bridge
> > > + * in a new domain.
> > > + */
> > > +struct pci_host_bridge *
> > > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
> > > +{
> > > +	int err, domain, busno;
> > > +	struct resource bus_range;
> > > +	struct pci_bus *root_bus;
> > > +	struct pci_host_bridge *bridge;
> > > +	resource_size_t io_base;
> > > +	LIST_HEAD(res);
> > > +
> > > +	domain = of_alias_get_id(parent->of_node, "pci-domain");
> > > +	if (domain == -ENODEV)
> > > +		domain = domain_nr++;
> > >
> We probably want some uniqueness testing here.

You mean you want one host bridge per domain? It's not entirely clear if there is
general consensus here on this, but I'm in favour of enforcing that.

Best regards,
Liviu

> 
> > > +	err = of_pci_parse_bus_range(parent->of_node, &bus_range);
> > > +	if (err) {
> > > +		dev_info(parent, "No bus range for %s, using default [0-255]\n",
> > > +			parent->of_node->full_name);
> > > +		bus_range.start = 0;
> > > +		bus_range.end = 255;
> > > +		bus_range.flags = IORESOURCE_BUS;
> > > +	}
> > > +	busno = bus_range.start;
> > > +	pci_add_resource(&res, &bus_range);
> > > +
> > > +	/* now parse the rest of host bridge bus ranges */
> > > +	if (pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base))
> > > +		return NULL;
> > > +
> > > +	/* then create the root bus */
> > > +	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
> > > +						ops, host_data, &res);
> > > +	if (!root_bus)
> > > +		return NULL;
> > 
> > Do we have any code that checks for conflicting domain/bus numbers here?
> > I guess pci_create_root_bus_in_domain() will fail if you have that.
> > 
> > Since pci_create_root_bus_in_domain() is a new function that you just
> > introduced, it would be helpful to change the calling conventions
> > so it returns an error pointer instead of NULL upon failing.
> > of_create_pci_host_bridge() can do the same, but pci_create_root_bus()
> > should keep returning NULL so we don't have to change all the
> > callers.
> > 
> > > +	bridge = to_pci_host_bridge(root_bus->bridge);
> > > +	bridge->io_base = io_base;
> > > +
> > > +	return bridge;
> > > +}
> > > +EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
> > > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > > index 1eed009..0c5e269 100644
> > > --- a/include/linux/pci.h
> > > +++ b/include/linux/pci.h
> > > @@ -395,6 +395,7 @@ struct pci_host_bridge {
> > >  	struct device dev;
> > >  	struct pci_bus *bus;		/* root bus */
> > >  	int domain_nr;
> > > +	resource_size_t io_base;	/* physical address for the start of I/O area */
> > >  	struct list_head windows;	/* pci_host_bridge_windows */
> > >  	void (*release_fn)(struct pci_host_bridge *);
> > >  	void *release_data;
> > 
> > What is the io_base used for here?
> > 
> > > @@ -1786,11 +1787,23 @@ static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
> > >  	return bus ? bus->dev.of_node : NULL;
> > >  }
> > >  
> > > +struct pci_host_bridge *
> > > +of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
> > > +			void *host_data);
> > > +
> > > +void pcibios_fixup_bridge_ranges(struct list_head *resources);
> > >  #else /* CONFIG_OF */
> > >  static inline void pci_set_of_node(struct pci_dev *dev) { }
> > >  static inline void pci_release_of_node(struct pci_dev *dev) { }
> > >  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
> > >  static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
> > > +
> > > +static inline struct pci_host_bridge *
> > > +pci_host_bridge_of_init(struct device *parent, struct pci_ops *ops,
> > > +			void *host_data)
> > > +{
> > > +	return NULL;
> > > +}
> > >  #endif  /* CONFIG_OF */
> > >  
> > >  #ifdef CONFIG_EEH
> > > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
>
Benjamin Herrenschmidt March 2, 2014, 1:23 a.m. UTC | #12
On Fri, 2014-02-28 at 09:55 +0000, Liviu Dudau wrote:
> The next phase will be to see how much of the pci_controller structure can move
> into the generic code and reduce its size. I do suffer from not having a PowerPC
> platform where I can test the changes, but I can produce some compiled code that
> someone can test.

Ok, we can probably start from there. I think the right approach is to actually
kill pci_controller by merging it with the new host bridge structure.

Most of the resources are there now, we need to move the rest. We probably will
need a way to attach platform specific bits to it though, and a few hooks to
populate them but that isn't terribly hard.

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt March 2, 2014, 1:25 a.m. UTC | #13
On Sun, 2014-03-02 at 12:23 +1100, Benjamin Herrenschmidt wrote:
> On Fri, 2014-02-28 at 09:55 +0000, Liviu Dudau wrote:
> > The next phase will be to see how much of the pci_controller structure can move
> > into the generic code and reduce its size. I do suffer from not having a PowerPC
> > platform where I can test the changes, but I can produce some compiled code that
> > someone can test.
> 
> Ok, we can probably start from there. I think the right approach is to actually
> kill pci_controller by merging it with the new host bridge structure.
> 
> Most of the resources are there now, we need to move the rest. We probably will
> need a way to attach platform specific bits to it though, and a few hooks to
> populate them but that isn't terribly hard.

Oh and regarding testing...

qemu should help. For qemu ppc64, try -M pseries, that's fairly well
maintained since it's the basis for all our new KVM based products,
and there's still some (hopefully) working 32-bit mac stuff in there.

With pseries, I would expect that you can install fedora, debian or
ubuntu (in fact you can also install SLES11/RHEL6 I believe).

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Grant Likely March 7, 2014, 6:58 p.m. UTC | #14
On Thu, 27 Feb 2014 14:38:32 +0100, Arnd Bergmann <arnd@arndb.de> wrote:
> On Thursday 27 February 2014 13:06:42 Liviu Dudau wrote:
> > Several platforms use a rather generic version of parsing
> > the device tree to find the host bridge ranges. Move the common code
> > into the generic PCI code and use it to create a pci_host_bridge
> > structure that can be used by arch code.
> > 
> > Based on early attempts by Andrew Murray to unify the code.
> > Used powerpc and microblaze PCI code as starting point.
> > 
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> 
> Please add Benjamin Herrenschmidt to Cc here, I think it would be helpful
> to get his input so we can make this work on powerpc as well.

s/helpful/required/

:-)

g.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
index 06ace62..feb8436 100644
--- a/drivers/pci/host-bridge.c
+++ b/drivers/pci/host-bridge.c
@@ -6,9 +6,13 @@ 
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
 
 #include "pci.h"
 
+static int domain_nr;
+
 static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
 {
 	while (bus->parent)
@@ -91,3 +95,133 @@  void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
 	res->end = region->end + offset;
 }
 EXPORT_SYMBOL(pcibios_bus_to_resource);
+
+/**
+ * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
+ * @dev: device node of the host bridge having the range property
+ * @resources: list where the range of resources will be added after DT parsing
+ * @io_base: pointer to a variable that will contain the physical address for
+ * the start of the I/O range.
+ *
+ * If this function returns an error then the @resources list will be freed.
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping based on its content. It is expected
+ * that the property conforms with the Power ePAPR document.
+ *
+ * Each architecture is then offered the chance of applying their own
+ * filtering of pci_host_bridge_windows based on their own restrictions by
+ * calling pcibios_fixup_bridge_ranges(). The filtered list of windows
+ * can then be used when creating a pci_host_bridge structure.
+ */
+static int pci_host_bridge_of_get_ranges(struct device_node *dev,
+		struct list_head *resources, resource_size_t *io_base)
+{
+	struct resource *res;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	int err;
+
+	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
+
+	/* Check for ranges property */
+	err = of_pci_range_parser_init(&parser, dev);
+	if (err)
+		return err;
+
+	pr_debug("Parsing ranges property...\n");
+	for_each_of_pci_range(&parser, &range) {
+		/* Read next ranges element */
+		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
+				range.pci_space, range.pci_addr);
+		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
+					range.cpu_addr, range.size);
+
+		/*
+		 * If we failed translation or got a zero-sized region
+		 * then skip this range
+		 */
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+		if (!res) {
+			err = -ENOMEM;
+			goto bridge_ranges_nomem;
+		}
+
+		of_pci_range_to_resource(&range, dev, res);
+
+		if (resource_type(res) == IORESOURCE_IO)
+			*io_base = range.cpu_addr;
+
+		pci_add_resource_offset(resources, res,
+				res->start - range.pci_addr);
+	}
+
+	/* Apply architecture specific fixups for the ranges */
+	pcibios_fixup_bridge_ranges(resources);
+
+	return 0;
+
+bridge_ranges_nomem:
+	pci_free_resource_list(resources);
+	return err;
+}
+
+/**
+ * of_create_pci_host_bridge - Create a PCI host bridge structure using
+ * information passed in the DT.
+ * @parent: device owning this host bridge
+ * @ops: pci_ops associated with the host controller
+ * @host_data: opaque data structure used by the host controller.
+ *
+ * returns a pointer to the newly created pci_host_bridge structure, or
+ * NULL if the call failed.
+ *
+ * This function will try to obtain the host bridge domain number by
+ * using of_alias_get_id() call with "pci-domain" as a stem. If that
+ * fails, a local allocator will be used that will put each host bridge
+ * in a new domain.
+ */
+struct pci_host_bridge *
+of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops, void *host_data)
+{
+	int err, domain, busno;
+	struct resource bus_range;
+	struct pci_bus *root_bus;
+	struct pci_host_bridge *bridge;
+	resource_size_t io_base;
+	LIST_HEAD(res);
+
+	domain = of_alias_get_id(parent->of_node, "pci-domain");
+	if (domain == -ENODEV)
+		domain = domain_nr++;
+
+	err = of_pci_parse_bus_range(parent->of_node, &bus_range);
+	if (err) {
+		dev_info(parent, "No bus range for %s, using default [0-255]\n",
+			parent->of_node->full_name);
+		bus_range.start = 0;
+		bus_range.end = 255;
+		bus_range.flags = IORESOURCE_BUS;
+	}
+	busno = bus_range.start;
+	pci_add_resource(&res, &bus_range);
+
+	/* now parse the rest of host bridge bus ranges */
+	if (pci_host_bridge_of_get_ranges(parent->of_node, &res, &io_base))
+		return NULL;
+
+	/* then create the root bus */
+	root_bus = pci_create_root_bus_in_domain(parent, domain, busno,
+						ops, host_data, &res);
+	if (!root_bus)
+		return NULL;
+
+	bridge = to_pci_host_bridge(root_bus->bridge);
+	bridge->io_base = io_base;
+
+	return bridge;
+}
+EXPORT_SYMBOL_GPL(of_create_pci_host_bridge);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1eed009..0c5e269 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -395,6 +395,7 @@  struct pci_host_bridge {
 	struct device dev;
 	struct pci_bus *bus;		/* root bus */
 	int domain_nr;
+	resource_size_t io_base;	/* physical address for the start of I/O area */
 	struct list_head windows;	/* pci_host_bridge_windows */
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
@@ -1786,11 +1787,23 @@  static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
 	return bus ? bus->dev.of_node : NULL;
 }
 
+struct pci_host_bridge *
+of_create_pci_host_bridge(struct device *parent, struct pci_ops *ops,
+			void *host_data);
+
+void pcibios_fixup_bridge_ranges(struct list_head *resources);
 #else /* CONFIG_OF */
 static inline void pci_set_of_node(struct pci_dev *dev) { }
 static inline void pci_release_of_node(struct pci_dev *dev) { }
 static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
 static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
+
+static inline struct pci_host_bridge *
+pci_host_bridge_of_init(struct device *parent, struct pci_ops *ops,
+			void *host_data)
+{
+	return NULL;
+}
 #endif  /* CONFIG_OF */
 
 #ifdef CONFIG_EEH