diff mbox

[v7,4/6] pci: Introduce a domain number for pci_host_bridge.

Message ID 1394811272-1547-5-git-send-email-Liviu.Dudau@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liviu Dudau March 14, 2014, 3:34 p.m. UTC
Make it easier to discover the domain number of a bus by storing
the number in pci_host_bridge for the root bus. Several architectures
have their own way of storing this information, so it makes sense
to try to unify the code. While at this, add a new function that
creates a root bus in a given domain and make pci_create_root_bus()
a wrapper around this function.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Tested-by: Tanmay Inamdar <tinamdar@apm.com>
---
 drivers/pci/probe.c | 41 +++++++++++++++++++++++++++++++++--------
 include/linux/pci.h |  4 ++++
 2 files changed, 37 insertions(+), 8 deletions(-)

Comments

Bjorn Helgaas April 5, 2014, midnight UTC | #1
On Fri, Mar 14, 2014 at 03:34:30PM +0000, Liviu Dudau wrote:
> Make it easier to discover the domain number of a bus by storing
> the number in pci_host_bridge for the root bus. Several architectures
> have their own way of storing this information, so it makes sense
> to try to unify the code. 

I like the idea of unifying the way we handle the domain number.  But 
I'd like to see more of the strategy before committing to this approach.

This patch adds struct pci_host_bridge.domain_nr, but of course
pci_domain_nr() doesn't use it.  It can't today, because
pci_create_root_bus() relies on pci_domain_nr() to fill in
pci_host_bridge.domain_nr.

But I suppose the intent is that someday we can make pci_domain_nr()
arch-independent somehow.  I'd just like to see more of the path to
that happening.

> While at this, add a new function that
> creates a root bus in a given domain and make pci_create_root_bus()
> a wrapper around this function.

I'm a little concerned about adding a new "create root bus" interface,
partly because we have quite a few already, and I'd like to reduce the
number of them instead of adding more.  And I think there might be other
similar opportunities for unification, so I could easily see us adding new
functions in the future to unify NUMA node info, ECAM info, etc.

I wonder if we need some sort of descriptor structure that the arch could
fill in and pass to the PCI core.  Then we could add new members without
having to create new interfaces each time.

> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> Tested-by: Tanmay Inamdar <tinamdar@apm.com>
> ---
>  drivers/pci/probe.c | 41 +++++++++++++++++++++++++++++++++--------
>  include/linux/pci.h |  4 ++++
>  2 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index fd11c12..172c615 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -1714,8 +1714,9 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
>  {
>  }
>  
> -struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> -		struct pci_ops *ops, void *sysdata, struct list_head *resources)
> +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent,
> +		int domain, int bus, struct pci_ops *ops, void *sysdata,
> +		struct list_head *resources)
>  {
>  	int error;
>  	struct pci_host_bridge *bridge;
> @@ -1728,30 +1729,34 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
>  
>  	bridge = pci_alloc_host_bridge();
>  	if (!bridge)
> -		return NULL;
> +		return ERR_PTR(-ENOMEM);
>  
>  	bridge->dev.parent = parent;
>  	bridge->dev.release = pci_release_host_bridge_dev;
> +	bridge->domain_nr = domain;
>  	error = pcibios_root_bridge_prepare(bridge);
>  	if (error)
>  		goto err_out;
>  
>  	b = pci_alloc_bus();
> -	if (!b)
> +	if (!b) {
> +		error = -ENOMEM;
>  		goto err_out;
> +	}
>  
>  	b->sysdata = sysdata;
>  	b->ops = ops;
>  	b->number = b->busn_res.start = bus;
> -	b2 = pci_find_bus(pci_domain_nr(b), bus);
> +	b2 = pci_find_bus(bridge->domain_nr, bus);
>  	if (b2) {
>  		/* If we already got to this bus through a different bridge, ignore it */
>  		dev_dbg(&b2->dev, "bus already known\n");
> +		error = -EEXIST;
>  		goto err_bus_out;
>  	}
>  
>  	bridge->bus = b;
> -	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus);
> +	dev_set_name(&bridge->dev, "pci%04x:%02x", bridge->domain_nr, bus);
>  	error = device_register(&bridge->dev);
>  	if (error) {
>  		put_device(&bridge->dev);
> @@ -1766,7 +1771,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
>  
>  	b->dev.class = &pcibus_class;
>  	b->dev.parent = b->bridge;
> -	dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus);
> +	dev_set_name(&b->dev, "%04x:%02x", bridge->domain_nr, bus);
>  	error = device_register(&b->dev);
>  	if (error)
>  		goto class_dev_reg_err;
> @@ -1816,7 +1821,27 @@ err_bus_out:
>  	kfree(b);
>  err_out:
>  	kfree(bridge);
> -	return NULL;
> +	return ERR_PTR(error);
> +}
> +
> +struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> +		struct pci_ops *ops, void *sysdata, struct list_head *resources)
> +{
> +	int domain_nr;
> +	struct pci_bus *b = pci_alloc_bus();
> +	if (!b)
> +		return NULL;
> +
> +	b->sysdata = sysdata;
> +	domain_nr = pci_domain_nr(b);
> +	kfree(b);
> +
> +	b = pci_create_root_bus_in_domain(parent, domain_nr, bus,
> +				ops, sysdata, resources);
> +	if (IS_ERR(b))
> +		return NULL;
> +
> +	return b;
>  }
>  
>  int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 33aa2ca..1eed009 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -394,6 +394,7 @@ struct pci_host_bridge_window {
>  struct pci_host_bridge {
>  	struct device dev;
>  	struct pci_bus *bus;		/* root bus */
> +	int domain_nr;
>  	struct list_head windows;	/* pci_host_bridge_windows */
>  	void (*release_fn)(struct pci_host_bridge *);
>  	void *release_data;
> @@ -747,6 +748,9 @@ struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
>  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
>  				    struct pci_ops *ops, void *sysdata,
>  				    struct list_head *resources);
> +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent,
> +			int domain, int bus, struct pci_ops *ops,
> +			void *sysdata, struct list_head *resources);
>  int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
>  int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
>  void pci_bus_release_busn_res(struct pci_bus *b);
> -- 
> 1.9.0
>
Liviu Dudau April 7, 2014, 8:46 a.m. UTC | #2
On Sat, Apr 05, 2014 at 01:00:07AM +0100, Bjorn Helgaas wrote:
> On Fri, Mar 14, 2014 at 03:34:30PM +0000, Liviu Dudau wrote:
> > Make it easier to discover the domain number of a bus by storing
> > the number in pci_host_bridge for the root bus. Several architectures
> > have their own way of storing this information, so it makes sense
> > to try to unify the code. 
> 
> I like the idea of unifying the way we handle the domain number.  But 
> I'd like to see more of the strategy before committing to this approach.

*My* strategy is to get rid of pci_domain_nr(). I don't see why we need
to have arch specific way of providing the number, specially after looking
at the existing implementations that return a value from a variable that
is never touched or incremented. My guess is that pci_domain_nr() was
created to work around the fact that there was no domain_nr maintainance in
the generic code.

> 
> This patch adds struct pci_host_bridge.domain_nr, but of course
> pci_domain_nr() doesn't use it.  It can't today, because
> pci_create_root_bus() relies on pci_domain_nr() to fill in
> pci_host_bridge.domain_nr.
> 
> But I suppose the intent is that someday we can make pci_domain_nr()
> arch-independent somehow.  I'd just like to see more of the path to
> that happening.

The path would be to send a patch that removes all existing pci_domain_nr()
macros/inline functions and rely on the generic function.

> 
> > While at this, add a new function that
> > creates a root bus in a given domain and make pci_create_root_bus()
> > a wrapper around this function.
> 
> I'm a little concerned about adding a new "create root bus" interface,
> partly because we have quite a few already, and I'd like to reduce the
> number of them instead of adding more.  And I think there might be other
> similar opportunities for unification, so I could easily see us adding new
> functions in the future to unify NUMA node info, ECAM info, etc.

The reason for creating the wrapper function was to allow for explicit passing
of domain_nr. If we find architectures where generic allocation of domain_nr
doesn't work for them, we can make them use this wrapper to pass the domain_nr
into the host bridge when creating the root bus.

> 
> I wonder if we need some sort of descriptor structure that the arch could
> fill in and pass to the PCI core.  Then we could add new members without
> having to create new interfaces each time.

I'm trying to reduce the number of variables being passed between architectures
and generic code. host_bridge (with the associated root bus), domain_nr those
are needed. Is there anything else that you have in your mind that needs to
be shared?

My approach would be in sharing of the data: PCI is a standard, and the core
framework implements it. What is so broken in your architecture that you need
to work around the core code? And I'm not talking about drivers and quirks,
but architectural level support.

Best regards,
Liviu

> 
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> > Tested-by: Tanmay Inamdar <tinamdar@apm.com>
> > ---
> >  drivers/pci/probe.c | 41 +++++++++++++++++++++++++++++++++--------
> >  include/linux/pci.h |  4 ++++
> >  2 files changed, 37 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index fd11c12..172c615 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -1714,8 +1714,9 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
> >  {
> >  }
> >  
> > -struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> > -		struct pci_ops *ops, void *sysdata, struct list_head *resources)
> > +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent,
> > +		int domain, int bus, struct pci_ops *ops, void *sysdata,
> > +		struct list_head *resources)
> >  {
> >  	int error;
> >  	struct pci_host_bridge *bridge;
> > @@ -1728,30 +1729,34 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> >  
> >  	bridge = pci_alloc_host_bridge();
> >  	if (!bridge)
> > -		return NULL;
> > +		return ERR_PTR(-ENOMEM);
> >  
> >  	bridge->dev.parent = parent;
> >  	bridge->dev.release = pci_release_host_bridge_dev;
> > +	bridge->domain_nr = domain;
> >  	error = pcibios_root_bridge_prepare(bridge);
> >  	if (error)
> >  		goto err_out;
> >  
> >  	b = pci_alloc_bus();
> > -	if (!b)
> > +	if (!b) {
> > +		error = -ENOMEM;
> >  		goto err_out;
> > +	}
> >  
> >  	b->sysdata = sysdata;
> >  	b->ops = ops;
> >  	b->number = b->busn_res.start = bus;
> > -	b2 = pci_find_bus(pci_domain_nr(b), bus);
> > +	b2 = pci_find_bus(bridge->domain_nr, bus);
> >  	if (b2) {
> >  		/* If we already got to this bus through a different bridge, ignore it */
> >  		dev_dbg(&b2->dev, "bus already known\n");
> > +		error = -EEXIST;
> >  		goto err_bus_out;
> >  	}
> >  
> >  	bridge->bus = b;
> > -	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus);
> > +	dev_set_name(&bridge->dev, "pci%04x:%02x", bridge->domain_nr, bus);
> >  	error = device_register(&bridge->dev);
> >  	if (error) {
> >  		put_device(&bridge->dev);
> > @@ -1766,7 +1771,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> >  
> >  	b->dev.class = &pcibus_class;
> >  	b->dev.parent = b->bridge;
> > -	dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus);
> > +	dev_set_name(&b->dev, "%04x:%02x", bridge->domain_nr, bus);
> >  	error = device_register(&b->dev);
> >  	if (error)
> >  		goto class_dev_reg_err;
> > @@ -1816,7 +1821,27 @@ err_bus_out:
> >  	kfree(b);
> >  err_out:
> >  	kfree(bridge);
> > -	return NULL;
> > +	return ERR_PTR(error);
> > +}
> > +
> > +struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> > +		struct pci_ops *ops, void *sysdata, struct list_head *resources)
> > +{
> > +	int domain_nr;
> > +	struct pci_bus *b = pci_alloc_bus();
> > +	if (!b)
> > +		return NULL;
> > +
> > +	b->sysdata = sysdata;
> > +	domain_nr = pci_domain_nr(b);
> > +	kfree(b);
> > +
> > +	b = pci_create_root_bus_in_domain(parent, domain_nr, bus,
> > +				ops, sysdata, resources);
> > +	if (IS_ERR(b))
> > +		return NULL;
> > +
> > +	return b;
> >  }
> >  
> >  int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 33aa2ca..1eed009 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -394,6 +394,7 @@ struct pci_host_bridge_window {
> >  struct pci_host_bridge {
> >  	struct device dev;
> >  	struct pci_bus *bus;		/* root bus */
> > +	int domain_nr;
> >  	struct list_head windows;	/* pci_host_bridge_windows */
> >  	void (*release_fn)(struct pci_host_bridge *);
> >  	void *release_data;
> > @@ -747,6 +748,9 @@ struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
> >  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> >  				    struct pci_ops *ops, void *sysdata,
> >  				    struct list_head *resources);
> > +struct pci_bus *pci_create_root_bus_in_domain(struct device *parent,
> > +			int domain, int bus, struct pci_ops *ops,
> > +			void *sysdata, struct list_head *resources);
> >  int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
> >  int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
> >  void pci_bus_release_busn_res(struct pci_bus *b);
> > -- 
> > 1.9.0
> > 
>
Benjamin Herrenschmidt April 7, 2014, 9:14 a.m. UTC | #3
On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
> 
> *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
> to have arch specific way of providing the number, specially after looking
> at the existing implementations that return a value from a variable that
> is never touched or incremented. My guess is that pci_domain_nr() was
> created to work around the fact that there was no domain_nr maintainance in
> the generic code.

Well, there was no generic host bridge structure. There is one now, it should
go there.

Cheers,
Ben.
Liviu Dudau April 7, 2014, 10:07 a.m. UTC | #4
On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote:
> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
> > 
> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
> > to have arch specific way of providing the number, specially after looking
> > at the existing implementations that return a value from a variable that
> > is never touched or incremented. My guess is that pci_domain_nr() was
> > created to work around the fact that there was no domain_nr maintainance in
> > the generic code.
> 
> Well, there was no generic host bridge structure. There is one now, it should
> go there.

Exactly! Hence my patch. After it gets accepted I will go through architectures
and remove their version of pci_domain_nr().

Best regards,
Liviu

> 
> Cheers,
> Ben.
>  
> 
>
Bjorn Helgaas April 7, 2014, 10:44 p.m. UTC | #5
On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote:
>> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
>> >
>> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
>> > to have arch specific way of providing the number, specially after looking
>> > at the existing implementations that return a value from a variable that
>> > is never touched or incremented. My guess is that pci_domain_nr() was
>> > created to work around the fact that there was no domain_nr maintainance in
>> > the generic code.
>>
>> Well, there was no generic host bridge structure. There is one now, it should
>> go there.
>
> Exactly! Hence my patch. After it gets accepted I will go through architectures
> and remove their version of pci_domain_nr().

Currently the arch has to supply pci_domain_nr() because that's the
only way for the generic code to learn the domain.  After you add
pci_create_root_bus_in_domain(), the arch can supply the domain that
way, and we won't need the arch-specific pci_domain_nr().  Right?
That makes more sense to me; thanks for the explanation.

Let me try to explain my concern about the
pci_create_root_bus_in_domain() interface.  We currently have these
interfaces:

  pci_scan_root_bus()
  pci_scan_bus()
  pci_scan_bus_parented()
  pci_create_root_bus()

pci_scan_root_bus() is a higher-level interface than
pci_create_root_bus(), so I'm trying to migrate toward it because it
lets us remove a little code from the arch, e.g., pci_scan_child_bus()
and pci_bus_add_devices().

I think we can only remove the arch-specific pci_domain_nr() if that
arch uses pci_create_root_bus_in_domain().  When we convert an arch
from using scan_bus interfaces to using
pci_create_root_bus_in_domain(), we will have to move the rest of the
scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into
the arch code.

One alternative is to add an _in_domain() variant of each of these
interfaces, but that doesn't seem very convenient either.  My idea of
passing in a structure would also require adding variants, so there's
not really an advantage there, but I am thinking of the next
unification effort, e.g., for NUMA node info.  I don't really want to
have to change all the _in_domain() interfaces to also take yet
another parameter for the node number.

Bjorn
Liviu Dudau April 8, 2014, 10:20 a.m. UTC | #6
On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
> On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote:
> >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
> >> >
> >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
> >> > to have arch specific way of providing the number, specially after looking
> >> > at the existing implementations that return a value from a variable that
> >> > is never touched or incremented. My guess is that pci_domain_nr() was
> >> > created to work around the fact that there was no domain_nr maintainance in
> >> > the generic code.
> >>
> >> Well, there was no generic host bridge structure. There is one now, it should
> >> go there.
> >
> > Exactly! Hence my patch. After it gets accepted I will go through architectures
> > and remove their version of pci_domain_nr().
> 
> Currently the arch has to supply pci_domain_nr() because that's the
> only way for the generic code to learn the domain.  After you add
> pci_create_root_bus_in_domain(), the arch can supply the domain that
> way, and we won't need the arch-specific pci_domain_nr().  Right?
> That makes more sense to me; thanks for the explanation.

Right.

> 
> Let me try to explain my concern about the
> pci_create_root_bus_in_domain() interface.  We currently have these
> interfaces:
> 
>   pci_scan_root_bus()
>   pci_scan_bus()
>   pci_scan_bus_parented()
>   pci_create_root_bus()
> 
> pci_scan_root_bus() is a higher-level interface than
> pci_create_root_bus(), so I'm trying to migrate toward it because it
> lets us remove a little code from the arch, e.g., pci_scan_child_bus()
> and pci_bus_add_devices().
> 
> I think we can only remove the arch-specific pci_domain_nr() if that
> arch uses pci_create_root_bus_in_domain().  When we convert an arch
> from using scan_bus interfaces to using
> pci_create_root_bus_in_domain(), we will have to move the rest of the
> scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into
> the arch code.
> 
> One alternative is to add an _in_domain() variant of each of these
> interfaces, but that doesn't seem very convenient either.  My idea of
> passing in a structure would also require adding variants, so there's
> not really an advantage there, but I am thinking of the next
> unification effort, e.g., for NUMA node info.  I don't really want to
> have to change all the _in_domain() interfaces to also take yet
> another parameter for the node number.

OK, what about this: all the functions that you have mentioned take a
void *sysdata parameter. Should we convert this opaque pointer into a
specific structure that holds the domain_nr and (in future) the NUMA
node info?

Converting all the architectures is going to be a long winded job as
everyone loved to add their own stuff in the structure pointed by sysdata,
breaking that will lead to frustrations, but maybe the framework should
take back ownership of it.

Best regards,
Liviu

> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Bjorn Helgaas April 8, 2014, 4:28 p.m. UTC | #7
On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:

>> Let me try to explain my concern about the
>> pci_create_root_bus_in_domain() interface.  We currently have these
>> interfaces:
>>
>>   pci_scan_root_bus()
>>   pci_scan_bus()
>>   pci_scan_bus_parented()
>>   pci_create_root_bus()
>> ...

>> One alternative is to add an _in_domain() variant of each of these
>> interfaces, but that doesn't seem very convenient either.  My idea of
>> passing in a structure would also require adding variants, so there's
>> not really an advantage there, but I am thinking of the next
>> unification effort, e.g., for NUMA node info.  I don't really want to
>> have to change all the _in_domain() interfaces to also take yet
>> another parameter for the node number.
>
> OK, what about this: all the functions that you have mentioned take a
> void *sysdata parameter. Should we convert this opaque pointer into a
> specific structure that holds the domain_nr and (in future) the NUMA
> node info?

I doubt if we can make sysdata itself generic because I suspect we
need a way to have *some* arch-specific data.  But maybe the arch
could supply a structure containing a struct device *, domain, struct
pci_ops *, list of resources, aperture info, etc.  I wonder if struct
pci_host_bridge would be a reasonable place to put this stuff, e.g.,
something like this:

  struct pci_host_bridge {
    int domain;
    int node;
    struct device *dev;
    struct pci_ops *ops;
    struct list_head resources;
    void *sysdata;
    struct pci_bus *bus;  /* filled in by core, not by arch */
    ... /* other existing contents managed by core */
  };

  struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
Liviu Dudau April 9, 2014, 12:07 p.m. UTC | #8
On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote:
> On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
> 
> >> Let me try to explain my concern about the
> >> pci_create_root_bus_in_domain() interface.  We currently have these
> >> interfaces:
> >>
> >>   pci_scan_root_bus()
> >>   pci_scan_bus()
> >>   pci_scan_bus_parented()
> >>   pci_create_root_bus()
> >> ...
> 
> >> One alternative is to add an _in_domain() variant of each of these
> >> interfaces, but that doesn't seem very convenient either.  My idea of
> >> passing in a structure would also require adding variants, so there's
> >> not really an advantage there, but I am thinking of the next
> >> unification effort, e.g., for NUMA node info.  I don't really want to
> >> have to change all the _in_domain() interfaces to also take yet
> >> another parameter for the node number.
> >
> > OK, what about this: all the functions that you have mentioned take a
> > void *sysdata parameter. Should we convert this opaque pointer into a
> > specific structure that holds the domain_nr and (in future) the NUMA
> > node info?
> 
> I doubt if we can make sysdata itself generic because I suspect we
> need a way to have *some* arch-specific data.  But maybe the arch
> could supply a structure containing a struct device *, domain, struct
> pci_ops *, list of resources, aperture info, etc.  I wonder if struct
> pci_host_bridge would be a reasonable place to put this stuff, e.g.,
> something like this:
> 
>   struct pci_host_bridge {
>     int domain;
>     int node;
>     struct device *dev;
>     struct pci_ops *ops;
>     struct list_head resources;
>     void *sysdata;
>     struct pci_bus *bus;  /* filled in by core, not by arch */
>     ... /* other existing contents managed by core */
>   };
> 
>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);

I'm really reluctant to give the arches more rope to hang themselves. I
really dislike the use of xxxx_initcall() to do PCI initialisation ordering
that is currently in widespread use through the arch code. As I hope to
have proven with my arm64 code, you can have PCI support for an architecture
without having to provide any arch specific code. We have enough correct
code in the PCI framework, what would the architectures provide to the generic
code that we cannot get by following the standard?

Of course, there are always arch specific corners and they need their data
structures to make sense of those, but rather than having architectures
fill in a structure *before* we can setup host bridges I think we need
to reverse the order. Using your example structure, I don't think is
the arch's job to provide the list of resources or the domain number
before we can scan the host bridge. We should be able to get those from
somewhere else (like adding by default the ioport_resource and
iomem_resource and managing domain numbers inside the core framework).

Best regards,
Liviu

> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Bjorn Helgaas April 9, 2014, 2:02 p.m. UTC | #9
On Wed, Apr 9, 2014 at 6:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote:
>> On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
>> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
>>
>> >> Let me try to explain my concern about the
>> >> pci_create_root_bus_in_domain() interface.  We currently have these
>> >> interfaces:
>> >>
>> >>   pci_scan_root_bus()
>> >>   pci_scan_bus()
>> >>   pci_scan_bus_parented()
>> >>   pci_create_root_bus()
>> >> ...
>>
>> >> One alternative is to add an _in_domain() variant of each of these
>> >> interfaces, but that doesn't seem very convenient either.  My idea of
>> >> passing in a structure would also require adding variants, so there's
>> >> not really an advantage there, but I am thinking of the next
>> >> unification effort, e.g., for NUMA node info.  I don't really want to
>> >> have to change all the _in_domain() interfaces to also take yet
>> >> another parameter for the node number.
>> >
>> > OK, what about this: all the functions that you have mentioned take a
>> > void *sysdata parameter. Should we convert this opaque pointer into a
>> > specific structure that holds the domain_nr and (in future) the NUMA
>> > node info?
>>
>> I doubt if we can make sysdata itself generic because I suspect we
>> need a way to have *some* arch-specific data.  But maybe the arch
>> could supply a structure containing a struct device *, domain, struct
>> pci_ops *, list of resources, aperture info, etc.  I wonder if struct
>> pci_host_bridge would be a reasonable place to put this stuff, e.g.,
>> something like this:
>>
>>   struct pci_host_bridge {
>>     int domain;
>>     int node;
>>     struct device *dev;
>>     struct pci_ops *ops;
>>     struct list_head resources;
>>     void *sysdata;
>>     struct pci_bus *bus;  /* filled in by core, not by arch */
>>     ... /* other existing contents managed by core */
>>   };
>>
>>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
>
> I'm really reluctant to give the arches more rope to hang themselves.

If you mean the sysdata pointer is rope to hang themselves, I think it
would be great it we didn't need sysdata at all.  But I think it would
be a huge amount of work to get rid of it completely, and keeping it
would let us work at that incrementally.

> I
> really dislike the use of xxxx_initcall() to do PCI initialisation ordering
> that is currently in widespread use through the arch code.

I certainly agree that initcall ordering is fragile and to be avoided
when possible.

> As I hope to
> have proven with my arm64 code, you can have PCI support for an architecture
> without having to provide any arch specific code. We have enough correct
> code in the PCI framework, what would the architectures provide to the generic
> code that we cannot get by following the standard?

PCI host bridges are not architected, i.e., the PCI/PCIe specs do not
say anything about how to discover them or how to program them.  So
the arch or a driver for another bus (ACPI, OF, etc.) must enumerate
them and discover the bridge apertures (those are in the resource
list).  And obviously the arch has to provide the root bus number and
PCI config accessors.

> Of course, there are always arch specific corners and they need their data
> structures to make sense of those, but rather than having architectures
> fill in a structure *before* we can setup host bridges I think we need
> to reverse the order. Using your example structure, I don't think is
> the arch's job to provide the list of resources or the domain number
> before we can scan the host bridge. We should be able to get those from
> somewhere else (like adding by default the ioport_resource and
> iomem_resource and managing domain numbers inside the core framework).

It's possible we could manage domain numbers in the core.  On ACPI
systems, we currently we use the ACPI _SEG value as the domain.  In
some cases, e.g., on ia64, config space access is done via firmware
interfaces, and those interfaces expect the _SEG values.  We could
conceivably maintain a mapping between _SEG and domain, but I'm not
sure there's an advantage there.

I probably don't understand what you intend by reversing the order.
Are you suggesting something like new pcibios_*() interfaces the arch
can use to get the host bridge apertures and domain number?

Bjorn
Arnd Bergmann April 9, 2014, 2:08 p.m. UTC | #10
On Wednesday 09 April 2014 08:02:41 Bjorn Helgaas wrote:
> 
> It's possible we could manage domain numbers in the core.  On ACPI
> systems, we currently we use the ACPI _SEG value as the domain.  In
> some cases, e.g., on ia64, config space access is done via firmware
> interfaces, and those interfaces expect the _SEG values.  We could
> conceivably maintain a mapping between _SEG and domain, but I'm not
> sure there's an advantage there.

I think it's a safe assumption that we will never have more than
one firmware trying to enumerate the domains, so it would be
safe to let ACPI keep doing its own thing for domain numbers,
have the DT code pick domain number using some method we come up
with, and for everything else let the architecture code deal with
it. There are probably very few systems that have multiple domains
but use neither ACPI nor DT.

	Arnd
Benjamin Herrenschmidt April 9, 2014, 11:49 p.m. UTC | #11
On Wed, 2014-04-09 at 08:02 -0600, Bjorn Helgaas wrote:
> It's possible we could manage domain numbers in the core.  On ACPI
> systems, we currently we use the ACPI _SEG value as the domain.  In
> some cases, e.g., on ia64, config space access is done via firmware
> interfaces, and those interfaces expect the _SEG values.  We could
> conceivably maintain a mapping between _SEG and domain, but I'm not
> sure there's an advantage there.

I'd rather keep the ability for the architecture to assign domain
numbers.

I'm working on making them relate to the physical slot numbers
on our new systems so we get predictable PCI IDs which helps with
some stuff like the new network device naming scheme etc...

Predictability is a good thing :-)

> I probably don't understand what you intend by reversing the order.
> Are you suggesting something like new pcibios_*() interfaces the arch
> can use to get the host bridge apertures and domain number?

Cheers,
Ben.
Liviu Dudau April 10, 2014, 1:27 a.m. UTC | #12
On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote:
> On Wed, Apr 9, 2014 at 6:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote:
> >> On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> >> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
> >>
> >> >> Let me try to explain my concern about the
> >> >> pci_create_root_bus_in_domain() interface.  We currently have these
> >> >> interfaces:
> >> >>
> >> >>   pci_scan_root_bus()
> >> >>   pci_scan_bus()
> >> >>   pci_scan_bus_parented()
> >> >>   pci_create_root_bus()
> >> >> ...
> >>
> >> >> One alternative is to add an _in_domain() variant of each of these
> >> >> interfaces, but that doesn't seem very convenient either.  My idea of
> >> >> passing in a structure would also require adding variants, so there's
> >> >> not really an advantage there, but I am thinking of the next
> >> >> unification effort, e.g., for NUMA node info.  I don't really want to
> >> >> have to change all the _in_domain() interfaces to also take yet
> >> >> another parameter for the node number.
> >> >
> >> > OK, what about this: all the functions that you have mentioned take a
> >> > void *sysdata parameter. Should we convert this opaque pointer into a
> >> > specific structure that holds the domain_nr and (in future) the NUMA
> >> > node info?
> >>
> >> I doubt if we can make sysdata itself generic because I suspect we
> >> need a way to have *some* arch-specific data.  But maybe the arch
> >> could supply a structure containing a struct device *, domain, struct
> >> pci_ops *, list of resources, aperture info, etc.  I wonder if struct
> >> pci_host_bridge would be a reasonable place to put this stuff, e.g.,
> >> something like this:
> >>
> >>   struct pci_host_bridge {
> >>     int domain;
> >>     int node;
> >>     struct device *dev;
> >>     struct pci_ops *ops;
> >>     struct list_head resources;
> >>     void *sysdata;
> >>     struct pci_bus *bus;  /* filled in by core, not by arch */
> >>     ... /* other existing contents managed by core */
> >>   };
> >>
> >>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
> >
> > I'm really reluctant to give the arches more rope to hang themselves.
> 
> If you mean the sysdata pointer is rope to hang themselves, I think it
> would be great it we didn't need sysdata at all.  But I think it would
> be a huge amount of work to get rid of it completely, and keeping it
> would let us work at that incrementally.

Agree. But then your suggestion was to wrap sysdata inside another structure,
which to me constitutes additional rope.

> 
> > I
> > really dislike the use of xxxx_initcall() to do PCI initialisation ordering
> > that is currently in widespread use through the arch code.
> 
> I certainly agree that initcall ordering is fragile and to be avoided
> when possible.
> 
> > As I hope to
> > have proven with my arm64 code, you can have PCI support for an architecture
> > without having to provide any arch specific code. We have enough correct
> > code in the PCI framework, what would the architectures provide to the generic
> > code that we cannot get by following the standard?
> 
> PCI host bridges are not architected, i.e., the PCI/PCIe specs do not
> say anything about how to discover them or how to program them.  So
> the arch or a driver for another bus (ACPI, OF, etc.) must enumerate
> them and discover the bridge apertures (those are in the resource
> list).  And obviously the arch has to provide the root bus number and
> PCI config accessors.

The "what" for PCI host bridges is defined in the spec. The "how" is implementation
defined. What I'm trying to get with the cleanup is the ordering of pci_host_bridge
creation: core creates the structure first ("what"), arch then has the chance
of adding specific data to it (ops, resources, etc) ("how").

At the moment arm and powerpc do some horrible dances in trying to create their
local idea of a host bridge before passing it to the core code.

As for the root bus number, maybe we can offer some sensible default strategy
for numbering them and the arches that don't care too much can use that.

> 
> > Of course, there are always arch specific corners and they need their data
> > structures to make sense of those, but rather than having architectures
> > fill in a structure *before* we can setup host bridges I think we need
> > to reverse the order. Using your example structure, I don't think is
> > the arch's job to provide the list of resources or the domain number
> > before we can scan the host bridge. We should be able to get those from
> > somewhere else (like adding by default the ioport_resource and
> > iomem_resource and managing domain numbers inside the core framework).
> 
> It's possible we could manage domain numbers in the core.  On ACPI
> systems, we currently we use the ACPI _SEG value as the domain.  In
> some cases, e.g., on ia64, config space access is done via firmware
> interfaces, and those interfaces expect the _SEG values.  We could
> conceivably maintain a mapping between _SEG and domain, but I'm not
> sure there's an advantage there.
> 
> I probably don't understand what you intend by reversing the order.
> Are you suggesting something like new pcibios_*() interfaces the arch
> can use to get the host bridge apertures and domain number?

Yes. Lets stop having the architectures do early inits so that they can
prepare their host bridge structures to be ready for when the PCI framework
calls them. Lets create the host bridge in the PCI core and use pcibios_*()
to add to it where necessary without having to race for position.

Best regards,
Liviu

> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Bjorn Helgaas April 10, 2014, 3:48 a.m. UTC | #13
On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote:
> On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote:
>> On Wed, Apr 9, 2014 at 6:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
>> > On Tue, Apr 08, 2014 at 05:28:39PM +0100, Bjorn Helgaas wrote:
>> >> On Tue, Apr 8, 2014 at 4:20 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
>> >> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
>> >>
>> >> >> Let me try to explain my concern about the
>> >> >> pci_create_root_bus_in_domain() interface.  We currently have these
>> >> >> interfaces:
>> >> >>
>> >> >>   pci_scan_root_bus()
>> >> >>   pci_scan_bus()
>> >> >>   pci_scan_bus_parented()
>> >> >>   pci_create_root_bus()
>> >> >> ...
>> >>
>> >> >> One alternative is to add an _in_domain() variant of each of these
>> >> >> interfaces, but that doesn't seem very convenient either.  My idea of
>> >> >> passing in a structure would also require adding variants, so there's
>> >> >> not really an advantage there, but I am thinking of the next
>> >> >> unification effort, e.g., for NUMA node info.  I don't really want to
>> >> >> have to change all the _in_domain() interfaces to also take yet
>> >> >> another parameter for the node number.
>> >> >
>> >> > OK, what about this: all the functions that you have mentioned take a
>> >> > void *sysdata parameter. Should we convert this opaque pointer into a
>> >> > specific structure that holds the domain_nr and (in future) the NUMA
>> >> > node info?
>> >>
>> >> I doubt if we can make sysdata itself generic because I suspect we
>> >> need a way to have *some* arch-specific data.  But maybe the arch
>> >> could supply a structure containing a struct device *, domain, struct
>> >> pci_ops *, list of resources, aperture info, etc.  I wonder if struct
>> >> pci_host_bridge would be a reasonable place to put this stuff, e.g.,
>> >> something like this:
>> >>
>> >>   struct pci_host_bridge {
>> >>     int domain;
>> >>     int node;
>> >>     struct device *dev;
>> >>     struct pci_ops *ops;
>> >>     struct list_head resources;
>> >>     void *sysdata;
>> >>     struct pci_bus *bus;  /* filled in by core, not by arch */
>> >>     ... /* other existing contents managed by core */
>> >>   };
>> >>
>> >>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
>> >
>> > I'm really reluctant to give the arches more rope to hang themselves.
>>
>> If you mean the sysdata pointer is rope to hang themselves, I think it
>> would be great it we didn't need sysdata at all.  But I think it would
>> be a huge amount of work to get rid of it completely, and keeping it
>> would let us work at that incrementally.
>
> Agree. But then your suggestion was to wrap sysdata inside another structure,
> which to me constitutes additional rope.

I'll ponder this more, but I don't see your point here yet.  The arch
already supplies a sysdata pointer to pci_scan_root_bus(), and we
stash it in every struct pci_bus already.  My idea was just to pass it
in differently, as a structure member rather than a separate argument.
 (And I'm not completely attached to my proposal; it was only to
illustrate my concern about the explosion of interfaces if we have to
add *_domain(), *_node(), etc.)

> The "what" for PCI host bridges is defined in the spec. The "how" is implementation
> defined. What I'm trying to get with the cleanup is the ordering of pci_host_bridge
> creation: core creates the structure first ("what"), arch then has the chance
> of adding specific data to it (ops, resources, etc) ("how").
>
> At the moment arm and powerpc do some horrible dances in trying to create their
> local idea of a host bridge before passing it to the core code.
>
> As for the root bus number, maybe we can offer some sensible default strategy
> for numbering them and the arches that don't care too much can use that.

I think we're at the limit of what can be accomplished with the
abstractness of English.

My opinion is that it's reasonable for the arch to discover the host
bridge properties first and pass them to the core, and it doesn't
require unreasonable things of the arch.  I know the arm PCI setup is
complicated, partly because it deals with a huge number of machines
that don't have a consistent firmware interface.  The x86/ACPI setup
is relatively simple because it deals with a simple firmware
interface.  But that's just my opinion, and maybe your code will show
otherwise.

Bjorn
Arnd Bergmann April 10, 2014, 8 a.m. UTC | #14
On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote:
> On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote:
> > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote:
> >> >>   struct pci_host_bridge {
> >> >>     int domain;
> >> >>     int node;
> >> >>     struct device *dev;
> >> >>     struct pci_ops *ops;
> >> >>     struct list_head resources;
> >> >>     void *sysdata;
> >> >>     struct pci_bus *bus;  /* filled in by core, not by arch */
> >> >>     ... /* other existing contents managed by core */
> >> >>   };
> >> >>
> >> >>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
> >> >
> >> > I'm really reluctant to give the arches more rope to hang themselves.
> >>
> >> If you mean the sysdata pointer is rope to hang themselves, I think it
> >> would be great it we didn't need sysdata at all.  But I think it would
> >> be a huge amount of work to get rid of it completely, and keeping it
> >> would let us work at that incrementally.
> >
> > Agree. But then your suggestion was to wrap sysdata inside another structure,
> > which to me constitutes additional rope.
> 
> I'll ponder this more, but I don't see your point here yet.  The arch
> already supplies a sysdata pointer to pci_scan_root_bus(), and we
> stash it in every struct pci_bus already.  My idea was just to pass it
> in differently, as a structure member rather than a separate argument.
>  (And I'm not completely attached to my proposal; it was only to
> illustrate my concern about the explosion of interfaces if we have to
> add *_domain(), *_node(), etc.)

As a minor variation of your suggestion, how about passing in a pointer
to struct pci_host_bridge, and embed that within its own private
structure? I think this is closer to how a lot of other subsystems
do the abstraction.

> > The "what" for PCI host bridges is defined in the spec. The "how" is implementation
> > defined. What I'm trying to get with the cleanup is the ordering of pci_host_bridge
> > creation: core creates the structure first ("what"), arch then has the chance
> > of adding specific data to it (ops, resources, etc) ("how").
> >
> > At the moment arm and powerpc do some horrible dances in trying to create their
> > local idea of a host bridge before passing it to the core code.
> >
> > As for the root bus number, maybe we can offer some sensible default strategy
> > for numbering them and the arches that don't care too much can use that.
> 
> I think we're at the limit of what can be accomplished with the
> abstractness of English.
> 
> My opinion is that it's reasonable for the arch to discover the host
> bridge properties first and pass them to the core, and it doesn't
> require unreasonable things of the arch.  I know the arm PCI setup is
> complicated, partly because it deals with a huge number of machines
> that don't have a consistent firmware interface.  The x86/ACPI setup
> is relatively simple because it deals with a simple firmware
> interface.  But that's just my opinion, and maybe your code will show
> otherwise.

Makes sense. One of the areas where the PCI code shows its age is the
method how the various parts link together: there are function calls
going back and forth between architecture specific files and generic
files, rather a hierarchy of files with generic code being code by more
specific code.

To do the probing properly, I think it's totally fine to have the
core code expect stuff like the resources and domain number
to be filled out already by whoever calls it, but then have wrappers
around it that get this information from a firmware interface, or
from hardwired architecture specific code where necessary.

	Arnd
Bjorn Helgaas April 10, 2014, 1:50 p.m. UTC | #15
On Thu, Apr 10, 2014 at 2:00 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote:
>> On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote:
>> > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote:
>> >> >>   struct pci_host_bridge {
>> >> >>     int domain;
>> >> >>     int node;
>> >> >>     struct device *dev;
>> >> >>     struct pci_ops *ops;
>> >> >>     struct list_head resources;
>> >> >>     void *sysdata;
>> >> >>     struct pci_bus *bus;  /* filled in by core, not by arch */
>> >> >>     ... /* other existing contents managed by core */
>> >> >>   };
>> >> >>
>> >> >>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
>> >> >
>> >> > I'm really reluctant to give the arches more rope to hang themselves.
>> >>
>> >> If you mean the sysdata pointer is rope to hang themselves, I think it
>> >> would be great it we didn't need sysdata at all.  But I think it would
>> >> be a huge amount of work to get rid of it completely, and keeping it
>> >> would let us work at that incrementally.
>> >
>> > Agree. But then your suggestion was to wrap sysdata inside another structure,
>> > which to me constitutes additional rope.
>>
>> I'll ponder this more, but I don't see your point here yet.  The arch
>> already supplies a sysdata pointer to pci_scan_root_bus(), and we
>> stash it in every struct pci_bus already.  My idea was just to pass it
>> in differently, as a structure member rather than a separate argument.
>>  (And I'm not completely attached to my proposal; it was only to
>> illustrate my concern about the explosion of interfaces if we have to
>> add *_domain(), *_node(), etc.)
>
> As a minor variation of your suggestion, how about passing in a pointer
> to struct pci_host_bridge, and embed that within its own private
> structure? I think this is closer to how a lot of other subsystems
> do the abstraction.

I'm not sure I'm following you; you mean the arch-specific sysdata
structure would contain a pointer to struct pci_host_bridge?

I have to admit that I'm not up on how other subsystems handle this
sort of abstraction.  Do you have any pointers to good examples that I
can study?

Bjorn
Arnd Bergmann April 10, 2014, 2:07 p.m. UTC | #16
On Thursday 10 April 2014 07:50:52 Bjorn Helgaas wrote:
> On Thu, Apr 10, 2014 at 2:00 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote:
> >> On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote:
> >> > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote:
> >> >> >>   struct pci_host_bridge {
> >> >> >>     int domain;
> >> >> >>     int node;
> >> >> >>     struct device *dev;
> >> >> >>     struct pci_ops *ops;
> >> >> >>     struct list_head resources;
> >> >> >>     void *sysdata;
> >> >> >>     struct pci_bus *bus;  /* filled in by core, not by arch */
> >> >> >>     ... /* other existing contents managed by core */
> >> >> >>   };
> >> >> >>
> >> >> >>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
> >> >> >
> 
> I'm not sure I'm following you; you mean the arch-specific sysdata
> structure would contain a pointer to struct pci_host_bridge?
> 
> I have to admit that I'm not up on how other subsystems handle this
> sort of abstraction.  Do you have any pointers to good examples that I
> can study?

What I mean is like this:

/* generic structure */
struct pci_host_bridge {
        int domain;
        int node;
        struct device *dev;
        struct pci_ops *ops; 
        struct list_head resources;
        struct pci_bus *bus;  /* filled in by core, not by arch */
        ... /* other existing contents managed by core */
};

/* arm specific structure */
struct pci_sys_data {
        char            io_res_name[12];
                                        /* Bridge swizzling                     */
        u8              (*swizzle)(struct pci_dev *, u8 *);
                                        /* IRQ mapping                          */
        int             (*map_irq)(const struct pci_dev *, u8, u8);
                                        /* Resource alignement requirements     */
        void            (*add_bus)(struct pci_bus *bus);
        void            (*remove_bus)(struct pci_bus *bus);
        void            *private_data;  /* platform controller private data     */

	/* not a pointer: */
	struct pci_host_bridge bridge;
};
static inline struct pci_sys_data *to_pci_sys_data(struct pci_host_bridge *bridge)
{
	return container_of(bridge, struct pci_sys_data, bridge);
}

/* arm specific, driver specific structure */
struct tegra_pcie {
        void __iomem *pads;
        void __iomem *afi;

        struct clk *pex_clk;
        struct clk *afi_clk;
        struct clk *pll_e;
        struct clk *cml_clk;

        struct tegra_msi msi;

        struct list_head ports;
        unsigned int num_ports;

	struct pci_sys_data sysdata;
};
static inline struct tegra_pcie *to_tegra_pcie(struct pci_sys_data *sysdata)
{
	return container_of(sysdata, struct tegra_pcie, sysdata);
}

This mirrors how we treat devices: a pci_device has an embedded device,
and so on, in other subsystems we can have multiple layers.

In this example, the tegra pcie driver then allocates its own tegra_pcie
structure, fills out the fields it needs, and registers it with the
ARM architecture code, passing just the pci_sys_data pointer. That function
in turn passes a pointer to the embedded pci_host_bridge down to the
generic code. Ideally we should try to eliminate the architecture specific
portion here, but that is a later step.

	Arnd
Liviu Dudau April 10, 2014, 2:53 p.m. UTC | #17
On Thu, Apr 10, 2014 at 03:07:44PM +0100, Arnd Bergmann wrote:
> On Thursday 10 April 2014 07:50:52 Bjorn Helgaas wrote:
> > On Thu, Apr 10, 2014 at 2:00 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> > > On Wednesday 09 April 2014 21:48:14 Bjorn Helgaas wrote:
> > >> On Wed, Apr 9, 2014 at 7:27 PM, Liviu Dudau <liviu@dudau.co.uk> wrote:
> > >> > On Wed, Apr 09, 2014 at 08:02:41AM -0600, Bjorn Helgaas wrote:
> > >> >> >>   struct pci_host_bridge {
> > >> >> >>     int domain;
> > >> >> >>     int node;
> > >> >> >>     struct device *dev;
> > >> >> >>     struct pci_ops *ops;
> > >> >> >>     struct list_head resources;
> > >> >> >>     void *sysdata;
> > >> >> >>     struct pci_bus *bus;  /* filled in by core, not by arch */
> > >> >> >>     ... /* other existing contents managed by core */
> > >> >> >>   };
> > >> >> >>
> > >> >> >>   struct pci_bus *pci_scan_host_bridge(struct pci_host_bridge *bridge);
> > >> >> >
> > 
> > I'm not sure I'm following you; you mean the arch-specific sysdata
> > structure would contain a pointer to struct pci_host_bridge?
> > 
> > I have to admit that I'm not up on how other subsystems handle this
> > sort of abstraction.  Do you have any pointers to good examples that I
> > can study?
> 
> What I mean is like this:
> 
> /* generic structure */
> struct pci_host_bridge {
>         int domain;
>         int node;
>         struct device *dev;
>         struct pci_ops *ops; 
>         struct list_head resources;
>         struct pci_bus *bus;  /* filled in by core, not by arch */
>         ... /* other existing contents managed by core */
> };
> 
> /* arm specific structure */
> struct pci_sys_data {
>         char            io_res_name[12];
>                                         /* Bridge swizzling                     */
>         u8              (*swizzle)(struct pci_dev *, u8 *);
>                                         /* IRQ mapping                          */
>         int             (*map_irq)(const struct pci_dev *, u8, u8);
>                                         /* Resource alignement requirements     */
>         void            (*add_bus)(struct pci_bus *bus);
>         void            (*remove_bus)(struct pci_bus *bus);
>         void            *private_data;  /* platform controller private data     */
> 
> 	/* not a pointer: */
> 	struct pci_host_bridge bridge;
> };
> static inline struct pci_sys_data *to_pci_sys_data(struct pci_host_bridge *bridge)
> {
> 	return container_of(bridge, struct pci_sys_data, bridge);
> }
> 
> /* arm specific, driver specific structure */
> struct tegra_pcie {
>         void __iomem *pads;
>         void __iomem *afi;
> 
>         struct clk *pex_clk;
>         struct clk *afi_clk;
>         struct clk *pll_e;
>         struct clk *cml_clk;
> 
>         struct tegra_msi msi;
> 
>         struct list_head ports;
>         unsigned int num_ports;
> 
> 	struct pci_sys_data sysdata;
> };
> static inline struct tegra_pcie *to_tegra_pcie(struct pci_sys_data *sysdata)
> {
> 	return container_of(sysdata, struct tegra_pcie, sysdata);
> }
> 
> This mirrors how we treat devices: a pci_device has an embedded device,
> and so on, in other subsystems we can have multiple layers.
> 
> In this example, the tegra pcie driver then allocates its own tegra_pcie
> structure, fills out the fields it needs, and registers it with the
> ARM architecture code, passing just the pci_sys_data pointer. That function
> in turn passes a pointer to the embedded pci_host_bridge down to the
> generic code. Ideally we should try to eliminate the architecture specific
> portion here, but that is a later step.

So Arnd seems to agree with me: we should try to get out of architecture specific
pci_sys_data and link the host bridge driver straight into the PCI core. The
core then can call into arch code via pcibios_*() functions.

Arnd, am I reading correctly into what you are saying?

Liviu

> 
> 	Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
>
Arnd Bergmann April 10, 2014, 8:46 p.m. UTC | #18
On Thursday 10 April 2014 15:53:04 Liviu Dudau wrote:
> On Thu, Apr 10, 2014 at 03:07:44PM +0100, Arnd Bergmann wrote:

> > This mirrors how we treat devices: a pci_device has an embedded device,
> > and so on, in other subsystems we can have multiple layers.
> > 
> > In this example, the tegra pcie driver then allocates its own tegra_pcie
> > structure, fills out the fields it needs, and registers it with the
> > ARM architecture code, passing just the pci_sys_data pointer. That function
> > in turn passes a pointer to the embedded pci_host_bridge down to the
> > generic code. Ideally we should try to eliminate the architecture specific
> > portion here, but that is a later step.
> 
> So Arnd seems to agree with me: we should try to get out of architecture specific
> pci_sys_data and link the host bridge driver straight into the PCI core. The
> core then can call into arch code via pcibios_*() functions.
> 
> Arnd, am I reading correctly into what you are saying?

Half of it ;-)

I think it would be better to not have an architecture specific data
structure, just like it would be better not to have architecture specific
pcibios_* functions that get called by the PCI core. Note that the
architecture specific functions are the ones that rely on the architecture
specific data structures as well. If they only use the common fields,
it should also be possible to share the code.

I also don't realistically think we can get there on a lot of architectures
any time soon. Note that most architectures only have one PCI host
implementation, so the architecture structure is the same as the host
driver structure anyway.

For architectures like powerpc and arm that have people actively working
on them, we have a chance to clean up that code in the way we want it
(if we can agree on the direction), but it's still not trivial to do.

Speaking of arm32 in particular, I think we will end up with a split
approach: modern platforms (multiplatform, possibly all DT based) using
PCI core infrastructure directly and no architecture specific PCI
code on the one side, and a variation of today's code for the legacy
platforms on the other.

	Arnd
Benjamin Herrenschmidt April 11, 2014, 5:01 a.m. UTC | #19
On Thu, 2014-04-10 at 22:46 +0200, Arnd Bergmann wrote:

> Half of it ;-)
> 
> I think it would be better to not have an architecture specific data
> structure, just like it would be better not to have architecture specific
> pcibios_* functions that get called by the PCI core. Note that the
> architecture specific functions are the ones that rely on the architecture
> specific data structures as well. If they only use the common fields,
> it should also be possible to share the code.

I don't understand... we'll never get rid of architecture specific hooks
in one form or another.

We'll always need to some things in an architecture or host-bridge
specific way. Now if you don't want to call them arch hooks, then call
them host bridge ops, but they are needed and thus they will need some
kind of architecture specific extension to the base host bridge 
structure.

EEH is one big nasty example on powerpc.

Another random one that happens to be hot in my brain right now because
we just finished debugging it: On powernv, we are just fixing a series
of bugs caused by the generic code trying to do hot resets on PCIe "by
hand" by directly toggling the secondary reset register in bridges.

Well, on our root complexes, this triggers a link loss which triggers
a fatal EEH "ER_all" interrupt which we escalate into a fence and all
hell breaks loose.

We need to mask some error traps in the hardware before doing something
that can cause an intentional link loss... and unmask them when done.
(Among other things, there are other issues on P7 with hot reset).

So hot reset must be an architecture hook.

PERST (fundamental reset) can *only* be a hook. The way to generate a
PERST is not specified. In fact, on our machines, we have special GPIOs
we can use to generate PERST on individual slots below a PLX bridge
and a different methods for slots directly on a PHB.

Eventually most of those hooks land into firmware, and as such it's akin
to ACPI which also keeps a separate state structure and a pile of hooks.

> I also don't realistically think we can get there on a lot of architectures
> any time soon. Note that most architectures only have one PCI host
> implementation, so the architecture structure is the same as the host
> driver structure anyway.
> 
> For architectures like powerpc and arm that have people actively working
> on them, we have a chance to clean up that code in the way we want it
> (if we can agree on the direction), but it's still not trivial to do.
> 
> Speaking of arm32 in particular, I think we will end up with a split
> approach: modern platforms (multiplatform, possibly all DT based) using
> PCI core infrastructure directly and no architecture specific PCI
> code on the one side, and a variation of today's code for the legacy
> platforms on the other.
> 
> 	Arnd
Arnd Bergmann April 11, 2014, 8:36 a.m. UTC | #20
On Friday 11 April 2014 15:01:09 Benjamin Herrenschmidt wrote:
> On Thu, 2014-04-10 at 22:46 +0200, Arnd Bergmann wrote:
> 
> > Half of it ;-)
> > 
> > I think it would be better to not have an architecture specific data
> > structure, just like it would be better not to have architecture specific
> > pcibios_* functions that get called by the PCI core. Note that the
> > architecture specific functions are the ones that rely on the architecture
> > specific data structures as well. If they only use the common fields,
> > it should also be possible to share the code.
> 
> I don't understand... we'll never get rid of architecture specific hooks
> in one form or another.
> 
> We'll always need to some things in an architecture or host-bridge
> specific way. Now if you don't want to call them arch hooks, then call
> them host bridge ops, but they are needed and thus they will need some
> kind of architecture specific extension to the base host bridge 
> structure.

Absolutely right. The thing I'd like to get rid of in the long run
is global functions defined in the architecture code that are called
by core code for host bridge specific functionality. In a lot of
cases, they should not be needed if we can express the same things
in a generic way. In other cases, we can use function pointers that
are set at the time that the host bridge is registered.

> EEH is one big nasty example on powerpc.
> 
> Another random one that happens to be hot in my brain right now because
> we just finished debugging it: On powernv, we are just fixing a series
> of bugs caused by the generic code trying to do hot resets on PCIe "by
> hand" by directly toggling the secondary reset register in bridges.
> 
> Well, on our root complexes, this triggers a link loss which triggers
> a fatal EEH "ER_all" interrupt which we escalate into a fence and all
> hell breaks loose.
> 
> We need to mask some error traps in the hardware before doing something
> that can cause an intentional link loss... and unmask them when done.
> (Among other things, there are other issues on P7 with hot reset).
> 
> So hot reset must be an architecture hook.

This sounds to me very much host bridge specific, not architecture specific.
If you have the same host bridge in an ARM system, you'd want the same
things to happen, and if you have another host bridge on PowerPC, you
probably don't want that code to be called.

> PERST (fundamental reset) can *only* be a hook. The way to generate a
> PERST is not specified. In fact, on our machines, we have special GPIOs
> we can use to generate PERST on individual slots below a PLX bridge
> and a different methods for slots directly on a PHB.
> 
> Eventually most of those hooks land into firmware, and as such it's akin
> to ACPI which also keeps a separate state structure and a pile of hooks.

On PowerPC, there are currently a bunch of platform specific callbacks
in the ppc_md: pcibios_after_init, pci_exclude_device,
pcibios_fixup_resources, pcibios_fixup_bus, pcibios_enable_device_hook,
pcibios_fixup_phb, pcibios_window_alignment, and possibly some more.
There is some architecture specific code that gets called by the
PCI core, with the main purpose of calling into these.

On ARM32, we have a similar set of callbacks in the architecture
private pci_sys_data: swizzle, map_irq, align_resource, add_bus,
remove_bus, and some more callbacks for setup in the hw_pci
structure that is used at initialization time: setup, scan,
preinit, postinit. Again, these are called from architecture
specific code that gets called by the PCI core.

I'm sure some of the other architectures have similar things, most
of them probably less verbose because there is fewer variability
between subarchitectures.

I think a nice way to deal with these in the long run would be
to have a generic 'struct pci_host_bridge_ops' that can be defined
by the architecture or the platform, or a particular host bridge
driver. We'd have to define exactly which function pointers would
go in there, but a good start would be the set of functions that
are today provided by each architecture. The reset method you describe
above would also fit well into this.

A host bridge driver can fill out the pointers with its own functions,
or put platform or architecture specific function pointers in there,
that get called by the PCI core. There are multiple ways to deal with
default implementations here, one way would be that the core just
falls back to a generic implementation (which is currently the __weak
function) if it sees a NULL pointer. Another way would be to require
each driver to either fill out all pointers or none of them, in which
case we would use a default pci_host_bridge_ops struct that contains
the pointers to the global pcibios_*() functions.

	Arnd
Benjamin Herrenschmidt April 11, 2014, 9:16 a.m. UTC | #21
On Fri, 2014-04-11 at 10:36 +0200, Arnd Bergmann wrote:

> > EEH is one big nasty example on powerpc.
> > 
> > Another random one that happens to be hot in my brain right now because
> > we just finished debugging it: On powernv, we are just fixing a series
> > of bugs caused by the generic code trying to do hot resets on PCIe "by
> > hand" by directly toggling the secondary reset register in bridges.
> > 
> > Well, on our root complexes, this triggers a link loss which triggers
> > a fatal EEH "ER_all" interrupt which we escalate into a fence and all
> > hell breaks loose.
> > 
> > We need to mask some error traps in the hardware before doing something
> > that can cause an intentional link loss... and unmask them when done.
> > (Among other things, there are other issues on P7 with hot reset).
> > 
> > So hot reset must be an architecture hook.
> 
> This sounds to me very much host bridge specific, not architecture specific.
> If you have the same host bridge in an ARM system, you'd want the same
> things to happen, and if you have another host bridge on PowerPC, you
> probably don't want that code to be called.

Yes, it is partially host bridge specific, partially firmware related
(OPAL vs. ACPI, vs....) so it's a mixture here.

So I do agree, host bridge ops to replace most of these pcibios_* hooks
does make sense.
> 
> > PERST (fundamental reset) can *only* be a hook. The way to generate a
> > PERST is not specified. In fact, on our machines, we have special GPIOs
> > we can use to generate PERST on individual slots below a PLX bridge
> > and a different methods for slots directly on a PHB.
> > 
> > Eventually most of those hooks land into firmware, and as such it's akin
> > to ACPI which also keeps a separate state structure and a pile of hooks.
> 
> On PowerPC, there are currently a bunch of platform specific callbacks
> in the ppc_md: pcibios_after_init, pci_exclude_device,
> pcibios_fixup_resources, pcibios_fixup_bus, pcibios_enable_device_hook,
> pcibios_fixup_phb, pcibios_window_alignment, and possibly some more.
> There is some architecture specific code that gets called by the
> PCI core, with the main purpose of calling into these.

Yes. Most of them could be made into host bridge hooks fairly easily.

The remaining ones are going to need somebody (probably me) to
untangle :-)

> On ARM32, we have a similar set of callbacks in the architecture
> private pci_sys_data: swizzle, map_irq, align_resource, add_bus,
> remove_bus, and some more callbacks for setup in the hw_pci
> structure that is used at initialization time: setup, scan,
> preinit, postinit. Again, these are called from architecture
> specific code that gets called by the PCI core.
> 
> I'm sure some of the other architectures have similar things, most
> of them probably less verbose because there is fewer variability
> between subarchitectures.
> 
> I think a nice way to deal with these in the long run would be
> to have a generic 'struct pci_host_bridge_ops' that can be defined
> by the architecture or the platform, or a particular host bridge
> driver.

Well, the host bridge needs either a "driver" or be subclassed or
both...

>  We'd have to define exactly which function pointers would
> go in there, but a good start would be the set of functions that
> are today provided by each architecture. The reset method you describe
> above would also fit well into this.
> 
> A host bridge driver can fill out the pointers with its own functions,
> or put platform or architecture specific function pointers in there,
> that get called by the PCI core. There are multiple ways to deal with
> default implementations here, one way would be that the core just
> falls back to a generic implementation (which is currently the __weak
> function) if it sees a NULL pointer. Another way would be to require
> each driver to either fill out all pointers or none of them, in which
> case we would use a default pci_host_bridge_ops struct that contains
> the pointers to the global pcibios_*() functions.

Either works, we can start with the easy ones like window alignment, and
move from there.

Ben.
Liviu Dudau April 11, 2014, 9:22 a.m. UTC | #22
On Thu, Apr 10, 2014 at 09:46:36PM +0100, Arnd Bergmann wrote:
> On Thursday 10 April 2014 15:53:04 Liviu Dudau wrote:
> > On Thu, Apr 10, 2014 at 03:07:44PM +0100, Arnd Bergmann wrote:
> 
> > > This mirrors how we treat devices: a pci_device has an embedded device,
> > > and so on, in other subsystems we can have multiple layers.
> > > 
> > > In this example, the tegra pcie driver then allocates its own tegra_pcie
> > > structure, fills out the fields it needs, and registers it with the
> > > ARM architecture code, passing just the pci_sys_data pointer. That function
> > > in turn passes a pointer to the embedded pci_host_bridge down to the
> > > generic code. Ideally we should try to eliminate the architecture specific
> > > portion here, but that is a later step.
> > 
> > So Arnd seems to agree with me: we should try to get out of architecture specific
> > pci_sys_data and link the host bridge driver straight into the PCI core. The
> > core then can call into arch code via pcibios_*() functions.
> > 
> > Arnd, am I reading correctly into what you are saying?
> 
> Half of it ;-)
> 
> I think it would be better to not have an architecture specific data
> structure, just like it would be better not to have architecture specific
> pcibios_* functions that get called by the PCI core. Note that the
> architecture specific functions are the ones that rely on the architecture
> specific data structures as well. If they only use the common fields,
> it should also be possible to share the code.

While I've come to like the pcibios_*() interface (and yes, it could be
formalised and abstracted into a pci_xxxx_ops structure) I don't like the fact
that those functions use architectural data in order to function. I know it
might sound strange, as they *are* supposed to be implemented by the arches,
but in my mind the link between generic code and arch code for PCI should be
done by the host bridge driver. That's how PCI spec describes it, and I see no
reason why we should not be able to adopt the same view.

To be more precise, what I would like to happen in the case of some functions
would be for the PCI core code to call a pci_host_bridge_ops method which
in turn will call the arch specific code if it needs to. Why I think that would
be better? Because otherwise you put in the architectural side code to cope
with a certain host bridge, then another host bridge comes in and you add
more architectural code, but then when you port host bridge X to arch B you
discover that you need to add code there as well for X. And it all ends up in
the mess we currently have where the drivers in drivers/pci/host are not capable
of being ported to a different architecture because they rely on infrastructure
only present in arm32 that is not properly documented.

> 
> I also don't realistically think we can get there on a lot of architectures
> any time soon. Note that most architectures only have one PCI host
> implementation, so the architecture structure is the same as the host
> driver structure anyway.
> 
> For architectures like powerpc and arm that have people actively working
> on them, we have a chance to clean up that code in the way we want it
> (if we can agree on the direction), but it's still not trivial to do.
> 
> Speaking of arm32 in particular, I think we will end up with a split
> approach: modern platforms (multiplatform, possibly all DT based) using
> PCI core infrastructure directly and no architecture specific PCI
> code on the one side, and a variation of today's code for the legacy
> platforms on the other.

Actually, if we could come up with a compromise for the pci_fixup_*() functions
(are they still used by functional hardware?) then I think we could convert
most of the arm32 arch code to re-direct the calls to the infrastructure code.
But yes, there might be a lot of resistance to change due to lack of resources
when changing old platforms.

Best regards,
Liviu

> 
> 	Arnd
> 
>
Arnd Bergmann April 11, 2014, 1:51 p.m. UTC | #23
On Friday 11 April 2014 10:22:25 Liviu Dudau wrote:
> On Thu, Apr 10, 2014 at 09:46:36PM +0100, Arnd Bergmann wrote:
> > On Thursday 10 April 2014 15:53:04 Liviu Dudau wrote:
> > > So Arnd seems to agree with me: we should try to get out of architecture specific
> > > pci_sys_data and link the host bridge driver straight into the PCI core. The
> > > core then can call into arch code via pcibios_*() functions.
> > > 
> > > Arnd, am I reading correctly into what you are saying?
> > 
> > Half of it ;-)
> > 
> > I think it would be better to not have an architecture specific data
> > structure, just like it would be better not to have architecture specific
> > pcibios_* functions that get called by the PCI core. Note that the
> > architecture specific functions are the ones that rely on the architecture
> > specific data structures as well. If they only use the common fields,
> > it should also be possible to share the code.
> 
> While I've come to like the pcibios_*() interface (and yes, it could be
> formalised and abstracted into a pci_xxxx_ops structure) I don't like the fact
> that those functions use architectural data in order to function. I know it
> might sound strange, as they *are* supposed to be implemented by the arches,
> but in my mind the link between generic code and arch code for PCI should be
> done by the host bridge driver. That's how PCI spec describes it, and I see no
> reason why we should not be able to adopt the same view.

Yes, that's a good goal for the architectures that need the complexity.
I would also like to have a way to change as little as possible for
the architectures that don't care about this because they only have
one possible host controller implementation, which isn't necessarily
a conflict.

> To be more precise, what I would like to happen in the case of some functions
> would be for the PCI core code to call a pci_host_bridge_ops method which
> in turn will call the arch specific code if it needs to. Why I think that would
> be better? Because otherwise you put in the architectural side code to cope
> with a certain host bridge, then another host bridge comes in and you add
> more architectural code, but then when you port host bridge X to arch B you
> discover that you need to add code there as well for X. And it all ends up in
> the mess we currently have where the drivers in drivers/pci/host are not capable
> of being ported to a different architecture because they rely on infrastructure
> only present in arm32 that is not properly documented.

Right. Now it was intentional that we started putting the host drivers
into drivers/pci/host before cleaning it all up. We just had to start
somewhere.

> > I also don't realistically think we can get there on a lot of architectures
> > any time soon. Note that most architectures only have one PCI host
> > implementation, so the architecture structure is the same as the host
> > driver structure anyway.
> > 
> > For architectures like powerpc and arm that have people actively working
> > on them, we have a chance to clean up that code in the way we want it
> > (if we can agree on the direction), but it's still not trivial to do.
> > 
> > Speaking of arm32 in particular, I think we will end up with a split
> > approach: modern platforms (multiplatform, possibly all DT based) using
> > PCI core infrastructure directly and no architecture specific PCI
> > code on the one side, and a variation of today's code for the legacy
> > platforms on the other.
> 
> Actually, if we could come up with a compromise for the pci_fixup_*() functions
> (are they still used by functional hardware?)  then I think we could convert
> most of the arm32 arch code to re-direct the calls to the infrastructure code.

The fixups are used by hardware that we want to keep supporting, but I don't
see a problem there. None of them rely on the architecture specific PCI
implementation, and we could easily move the fixup code into a separate
file. Also, I suspect they are all used only on platforms that won't be
using CONFIG_ARCH_MULTIPLATFORM.

> But yes, there might be a lot of resistance to change due to lack of resources
> when changing old platforms.

Well, it should be trivial to just create a pci_host_bridge_ops structure
containing the currently global functions, and use that for everything
registered through pci_common_init_dev(). We definitely have to support
this method for things like iop/ixp/pxa/sa1100/footbridge, especially those
that have their own concept of PCI domains.

For the more modern multiplatform stuff that uses DT for probing and
has a driver in drivers/pci/host, we should be able to use completely
distinct pci_host_bridge_ops structure that can be shared with arm64.

	Arnd
Liviu Dudau July 1, 2014, 4:37 p.m. UTC | #24
On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
> On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote:
> >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
> >> >
> >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
> >> > to have arch specific way of providing the number, specially after looking
> >> > at the existing implementations that return a value from a variable that
> >> > is never touched or incremented. My guess is that pci_domain_nr() was
> >> > created to work around the fact that there was no domain_nr maintainance in
> >> > the generic code.
> >>
> >> Well, there was no generic host bridge structure. There is one now, it should
> >> go there.
> >
> > Exactly! Hence my patch. After it gets accepted I will go through architectures
> > and remove their version of pci_domain_nr().
> 
> Currently the arch has to supply pci_domain_nr() because that's the
> only way for the generic code to learn the domain.  After you add
> pci_create_root_bus_in_domain(), the arch can supply the domain that
> way, and we won't need the arch-specific pci_domain_nr().  Right?
> That makes more sense to me; thanks for the explanation.
> 
> Let me try to explain my concern about the
> pci_create_root_bus_in_domain() interface.  We currently have these
> interfaces:
> 
>   pci_scan_root_bus()
>   pci_scan_bus()
>   pci_scan_bus_parented()
>   pci_create_root_bus()
> 
> pci_scan_root_bus() is a higher-level interface than
> pci_create_root_bus(), so I'm trying to migrate toward it because it
> lets us remove a little code from the arch, e.g., pci_scan_child_bus()
> and pci_bus_add_devices().
> 
> I think we can only remove the arch-specific pci_domain_nr() if that
> arch uses pci_create_root_bus_in_domain().  When we convert an arch
> from using scan_bus interfaces to using
> pci_create_root_bus_in_domain(), we will have to move the rest of the
> scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into
> the arch code.
> 
> One alternative is to add an _in_domain() variant of each of these
> interfaces, but that doesn't seem very convenient either.  My idea of
> passing in a structure would also require adding variants, so there's
> not really an advantage there, but I am thinking of the next
> unification effort, e.g., for NUMA node info.  I don't really want to
> have to change all the _in_domain() interfaces to also take yet
> another parameter for the node number.

Resurecting this thread as I'm about to send an updated patch:

TL;DR: Bjorn is concerned that my introduction of an _in_domain() version
of pci_create_root_bus() as a way to pass a domain number from the arch
code down (or up?) into the generic PCI code is incomplete, as other
APIs that he listed make use of the non-domain aware version of
pci_create_root_bus() and as he plans to remove the use of the function
and use higher level APIs like pci_scan_root_bus() we will have to
introduce an _in_domain() version for those higher level functions.

After a bit of thinking I think the change I'm proposing is fine exactly
because it is a low level API. My intention is to automate the management
of the PCI domain numbers and any architecture that wants to go against
that should probably use the lower abstraction API to better control the
flow. So, in my updated v8 version of the patch I'm going to keep the
suggestion *as is* and hope we can have a(nother) discussion and come up
with a conclusion.

Best regards,
Liviu

> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Liviu Dudau July 4, 2014, 2:57 p.m. UTC | #25
On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
> On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote:
> >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
> >> >
> >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
> >> > to have arch specific way of providing the number, specially after looking
> >> > at the existing implementations that return a value from a variable that
> >> > is never touched or incremented. My guess is that pci_domain_nr() was
> >> > created to work around the fact that there was no domain_nr maintainance in
> >> > the generic code.
> >>
> >> Well, there was no generic host bridge structure. There is one now, it should
> >> go there.
> >
> > Exactly! Hence my patch. After it gets accepted I will go through architectures
> > and remove their version of pci_domain_nr().
> 
> Currently the arch has to supply pci_domain_nr() because that's the
> only way for the generic code to learn the domain.  After you add
> pci_create_root_bus_in_domain(), the arch can supply the domain that
> way, and we won't need the arch-specific pci_domain_nr().  Right?
> That makes more sense to me; thanks for the explanation.
> 
> Let me try to explain my concern about the
> pci_create_root_bus_in_domain() interface.  We currently have these
> interfaces:
> 
>   pci_scan_root_bus()
>   pci_scan_bus()
>   pci_scan_bus_parented()
>   pci_create_root_bus()
> 
> pci_scan_root_bus() is a higher-level interface than
> pci_create_root_bus(), so I'm trying to migrate toward it because it
> lets us remove a little code from the arch, e.g., pci_scan_child_bus()
> and pci_bus_add_devices().
> 
> I think we can only remove the arch-specific pci_domain_nr() if that
> arch uses pci_create_root_bus_in_domain().  When we convert an arch
> from using scan_bus interfaces to using
> pci_create_root_bus_in_domain(), we will have to move the rest of the
> scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into
> the arch code.
> 
> One alternative is to add an _in_domain() variant of each of these
> interfaces, but that doesn't seem very convenient either.  My idea of
> passing in a structure would also require adding variants, so there's
> not really an advantage there, but I am thinking of the next
> unification effort, e.g., for NUMA node info.  I don't really want to
> have to change all the _in_domain() interfaces to also take yet
> another parameter for the node number.

Bjorn,

I'm coming around to your way of thinking and I want to suggest a strategy
for adding the domain number into the PCI framework.

My understanding is that when pci_host_bridge structure was introduced
you were trying to keep the APIs unchanged and hence the creation of a
bridge was hidden inside the pci_create_root_bus() function.

If we want to store the domain_nr information in the host bridge structure,
together with a pointer to sysdata, then we need to break up the creation
of the pci_host_bridge from the creation of a root bus. At that moment,
pci_scan_root_bus() will need to be changed to accept a pci_host_bridge
pointer, while pci_scan_bus() and pci_scan_bus_parented() will create
the host bridge in the body of their function.

Did I understood correctly this time your intentions? Do you agree with
this plan?

Best regards,
Liviu


> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Bjorn Helgaas July 8, 2014, 1:11 a.m. UTC | #26
On Fri, Jul 4, 2014 at 8:57 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
>> On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
>> > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote:
>> >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
>> >> >
>> >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
>> >> > to have arch specific way of providing the number, specially after looking
>> >> > at the existing implementations that return a value from a variable that
>> >> > is never touched or incremented. My guess is that pci_domain_nr() was
>> >> > created to work around the fact that there was no domain_nr maintainance in
>> >> > the generic code.
>> >>
>> >> Well, there was no generic host bridge structure. There is one now, it should
>> >> go there.
>> >
>> > Exactly! Hence my patch. After it gets accepted I will go through architectures
>> > and remove their version of pci_domain_nr().
>>
>> Currently the arch has to supply pci_domain_nr() because that's the
>> only way for the generic code to learn the domain.  After you add
>> pci_create_root_bus_in_domain(), the arch can supply the domain that
>> way, and we won't need the arch-specific pci_domain_nr().  Right?
>> That makes more sense to me; thanks for the explanation.
>>
>> Let me try to explain my concern about the
>> pci_create_root_bus_in_domain() interface.  We currently have these
>> interfaces:
>>
>>   pci_scan_root_bus()
>>   pci_scan_bus()
>>   pci_scan_bus_parented()
>>   pci_create_root_bus()
>>
>> pci_scan_root_bus() is a higher-level interface than
>> pci_create_root_bus(), so I'm trying to migrate toward it because it
>> lets us remove a little code from the arch, e.g., pci_scan_child_bus()
>> and pci_bus_add_devices().
>>
>> I think we can only remove the arch-specific pci_domain_nr() if that
>> arch uses pci_create_root_bus_in_domain().  When we convert an arch
>> from using scan_bus interfaces to using
>> pci_create_root_bus_in_domain(), we will have to move the rest of the
>> scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into
>> the arch code.
>>
>> One alternative is to add an _in_domain() variant of each of these
>> interfaces, but that doesn't seem very convenient either.  My idea of
>> passing in a structure would also require adding variants, so there's
>> not really an advantage there, but I am thinking of the next
>> unification effort, e.g., for NUMA node info.  I don't really want to
>> have to change all the _in_domain() interfaces to also take yet
>> another parameter for the node number.
>
> ...
> My understanding is that when pci_host_bridge structure was introduced
> you were trying to keep the APIs unchanged and hence the creation of a
> bridge was hidden inside the pci_create_root_bus() function.

You mean pci_alloc_host_bridge()?  Right; ideally I would have used
pci_scan_root_bus() everywhere and gotten rid of pci_create_root_bus().
The outline of pci_scan_root_bus() is:

    pci_create_root_bus()
    pci_scan_child_bus()
    pci_bus_add_devices()

The problem was that several arches do interesting things scattered among
that core.  The ACPI host bridge driver used on x86 and ia64 does resource
allocation before pci_bus_add_devices(), as does parisc.  Probably all
arches should do this, but they don't.

And powerpc and sparc use of_scan_bus() or something similar instead of
pci_scan_child_bus().  They probably *could* provide config space accessors
that talk to OF and would allow pci_scan_child_bus() to work.  But that
seemed like too much work at the time.

> If we want to store the domain_nr information in the host bridge structure,
> together with a pointer to sysdata, then we need to break up the creation
> of the pci_host_bridge from the creation of a root bus. At that moment,
> pci_scan_root_bus() will need to be changed to accept a pci_host_bridge
> pointer, while pci_scan_bus() and pci_scan_bus_parented() will create
> the host bridge in the body of their function.

It's hard to change an existing interface like pci_scan_root_bus() because
it's called from so many places and you have to change them all at once.
Then if something goes wrong, the revert makes a mess for everybody.  But
I think it makes sense to add a new interface that does what you want.

Bjorn
Liviu Dudau July 8, 2014, 10:21 a.m. UTC | #27
On Tue, Jul 08, 2014 at 02:11:36AM +0100, Bjorn Helgaas wrote:
> On Fri, Jul 4, 2014 at 8:57 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > On Mon, Apr 07, 2014 at 11:44:51PM +0100, Bjorn Helgaas wrote:
> >> On Mon, Apr 7, 2014 at 4:07 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> >> > On Mon, Apr 07, 2014 at 10:14:18AM +0100, Benjamin Herrenschmidt wrote:
> >> >> On Mon, 2014-04-07 at 09:46 +0100, Liviu Dudau wrote:
> >> >> >
> >> >> > *My* strategy is to get rid of pci_domain_nr(). I don't see why we need
> >> >> > to have arch specific way of providing the number, specially after looking
> >> >> > at the existing implementations that return a value from a variable that
> >> >> > is never touched or incremented. My guess is that pci_domain_nr() was
> >> >> > created to work around the fact that there was no domain_nr maintainance in
> >> >> > the generic code.
> >> >>
> >> >> Well, there was no generic host bridge structure. There is one now, it should
> >> >> go there.
> >> >
> >> > Exactly! Hence my patch. After it gets accepted I will go through architectures
> >> > and remove their version of pci_domain_nr().
> >>
> >> Currently the arch has to supply pci_domain_nr() because that's the
> >> only way for the generic code to learn the domain.  After you add
> >> pci_create_root_bus_in_domain(), the arch can supply the domain that
> >> way, and we won't need the arch-specific pci_domain_nr().  Right?
> >> That makes more sense to me; thanks for the explanation.
> >>
> >> Let me try to explain my concern about the
> >> pci_create_root_bus_in_domain() interface.  We currently have these
> >> interfaces:
> >>
> >>   pci_scan_root_bus()
> >>   pci_scan_bus()
> >>   pci_scan_bus_parented()
> >>   pci_create_root_bus()
> >>
> >> pci_scan_root_bus() is a higher-level interface than
> >> pci_create_root_bus(), so I'm trying to migrate toward it because it
> >> lets us remove a little code from the arch, e.g., pci_scan_child_bus()
> >> and pci_bus_add_devices().
> >>
> >> I think we can only remove the arch-specific pci_domain_nr() if that
> >> arch uses pci_create_root_bus_in_domain().  When we convert an arch
> >> from using scan_bus interfaces to using
> >> pci_create_root_bus_in_domain(), we will have to move the rest of the
> >> scan_bus code (pci_scan_child_bus(), pci_bus_add_devices()) back into
> >> the arch code.
> >>
> >> One alternative is to add an _in_domain() variant of each of these
> >> interfaces, but that doesn't seem very convenient either.  My idea of
> >> passing in a structure would also require adding variants, so there's
> >> not really an advantage there, but I am thinking of the next
> >> unification effort, e.g., for NUMA node info.  I don't really want to
> >> have to change all the _in_domain() interfaces to also take yet
> >> another parameter for the node number.
> >
> > ...
> > My understanding is that when pci_host_bridge structure was introduced
> > you were trying to keep the APIs unchanged and hence the creation of a
> > bridge was hidden inside the pci_create_root_bus() function.
> 
> You mean pci_alloc_host_bridge()?  Right; ideally I would have used
> pci_scan_root_bus() everywhere and gotten rid of pci_create_root_bus().
> The outline of pci_scan_root_bus() is:
> 
>     pci_create_root_bus()
>     pci_scan_child_bus()
>     pci_bus_add_devices()
> 
> The problem was that several arches do interesting things scattered among
> that core.  The ACPI host bridge driver used on x86 and ia64 does resource
> allocation before pci_bus_add_devices(), as does parisc.  Probably all
> arches should do this, but they don't.
> 
> And powerpc and sparc use of_scan_bus() or something similar instead of
> pci_scan_child_bus().  They probably *could* provide config space accessors
> that talk to OF and would allow pci_scan_child_bus() to work.  But that
> seemed like too much work at the time.
> 
> > If we want to store the domain_nr information in the host bridge structure,
> > together with a pointer to sysdata, then we need to break up the creation
> > of the pci_host_bridge from the creation of a root bus. At that moment,
> > pci_scan_root_bus() will need to be changed to accept a pci_host_bridge
> > pointer, while pci_scan_bus() and pci_scan_bus_parented() will create
> > the host bridge in the body of their function.
> 
> It's hard to change an existing interface like pci_scan_root_bus() because
> it's called from so many places and you have to change them all at once.
> Then if something goes wrong, the revert makes a mess for everybody.  But
> I think it makes sense to add a new interface that does what you want.

OK, I understand your concern. It does sort of return us back to the initial
discussion, where you were arguing against adding a new set of functions
for every existing function, but it makes sense from transition point of view.

Best regards,
Liviu

> 
> Bjorn
>
diff mbox

Patch

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index fd11c12..172c615 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1714,8 +1714,9 @@  void __weak pcibios_remove_bus(struct pci_bus *bus)
 {
 }
 
-struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
-		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+struct pci_bus *pci_create_root_bus_in_domain(struct device *parent,
+		int domain, int bus, struct pci_ops *ops, void *sysdata,
+		struct list_head *resources)
 {
 	int error;
 	struct pci_host_bridge *bridge;
@@ -1728,30 +1729,34 @@  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 
 	bridge = pci_alloc_host_bridge();
 	if (!bridge)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	bridge->dev.parent = parent;
 	bridge->dev.release = pci_release_host_bridge_dev;
+	bridge->domain_nr = domain;
 	error = pcibios_root_bridge_prepare(bridge);
 	if (error)
 		goto err_out;
 
 	b = pci_alloc_bus();
-	if (!b)
+	if (!b) {
+		error = -ENOMEM;
 		goto err_out;
+	}
 
 	b->sysdata = sysdata;
 	b->ops = ops;
 	b->number = b->busn_res.start = bus;
-	b2 = pci_find_bus(pci_domain_nr(b), bus);
+	b2 = pci_find_bus(bridge->domain_nr, bus);
 	if (b2) {
 		/* If we already got to this bus through a different bridge, ignore it */
 		dev_dbg(&b2->dev, "bus already known\n");
+		error = -EEXIST;
 		goto err_bus_out;
 	}
 
 	bridge->bus = b;
-	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus);
+	dev_set_name(&bridge->dev, "pci%04x:%02x", bridge->domain_nr, bus);
 	error = device_register(&bridge->dev);
 	if (error) {
 		put_device(&bridge->dev);
@@ -1766,7 +1771,7 @@  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 
 	b->dev.class = &pcibus_class;
 	b->dev.parent = b->bridge;
-	dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus);
+	dev_set_name(&b->dev, "%04x:%02x", bridge->domain_nr, bus);
 	error = device_register(&b->dev);
 	if (error)
 		goto class_dev_reg_err;
@@ -1816,7 +1821,27 @@  err_bus_out:
 	kfree(b);
 err_out:
 	kfree(bridge);
-	return NULL;
+	return ERR_PTR(error);
+}
+
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+{
+	int domain_nr;
+	struct pci_bus *b = pci_alloc_bus();
+	if (!b)
+		return NULL;
+
+	b->sysdata = sysdata;
+	domain_nr = pci_domain_nr(b);
+	kfree(b);
+
+	b = pci_create_root_bus_in_domain(parent, domain_nr, bus,
+				ops, sysdata, resources);
+	if (IS_ERR(b))
+		return NULL;
+
+	return b;
 }
 
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33aa2ca..1eed009 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -394,6 +394,7 @@  struct pci_host_bridge_window {
 struct pci_host_bridge {
 	struct device dev;
 	struct pci_bus *bus;		/* root bus */
+	int domain_nr;
 	struct list_head windows;	/* pci_host_bridge_windows */
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
@@ -747,6 +748,9 @@  struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 				    struct pci_ops *ops, void *sysdata,
 				    struct list_head *resources);
+struct pci_bus *pci_create_root_bus_in_domain(struct device *parent,
+			int domain, int bus, struct pci_ops *ops,
+			void *sysdata, struct list_head *resources);
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
 int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
 void pci_bus_release_busn_res(struct pci_bus *b);